Re: [PATCH v2 7/9] mm/mshare: Add unlink and munmap support

From: Khalid Aziz
Date: Fri Jul 01 2022 - 11:59:25 EST


On 6/30/22 15:50, Darrick J. Wong wrote:
On Wed, Jun 29, 2022 at 04:53:58PM -0600, Khalid Aziz wrote:
Number of mappings of an mshare region should be tracked so it can
be removed when there are no more references to it and associated
file has been deleted. This add code to support the unlink operation
for associated file, remove the mshare region on file deletion if
refcount goes to zero, add munmap operation to maintain refcount
to mshare region and remove it on last munmap if file has been
deleted.

Signed-off-by: Khalid Aziz <khalid.aziz@xxxxxxxxxx>
---
mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/mm/mshare.c b/mm/mshare.c
index 088a6cab1e93..90ce0564a138 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -29,6 +29,7 @@ static struct super_block *msharefs_sb;
struct mshare_data {
struct mm_struct *mm;
refcount_t refcnt;
+ int deleted;
struct mshare_info *minfo;
};
@@ -48,6 +49,7 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
size_t ret;
struct mshare_info m_info;
+ mmap_read_lock(info->mm);
if (info->minfo != NULL) {
m_info.start = info->minfo->start;
m_info.size = info->minfo->size;
@@ -55,18 +57,42 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
m_info.start = 0;
m_info.size = 0;
}
+ mmap_read_unlock(info->mm);
ret = copy_to_iter(&m_info, sizeof(m_info), iov);
if (!ret)
return -EFAULT;
return ret;
}
+static void
+msharefs_close(struct vm_area_struct *vma)
+{
+ struct mshare_data *info = vma->vm_private_data;
+
+ if (refcount_dec_and_test(&info->refcnt)) {
+ mmap_read_lock(info->mm);
+ if (info->deleted) {
+ mmap_read_unlock(info->mm);
+ mmput(info->mm);
+ kfree(info->minfo);
+ kfree(info);

Aren't filesystems supposed to take care of disposing of the file data
in destroy_inode? IIRC struct inode doesn't go away until all fds are
closed, mappings are torn down, and there are no more references from
dentries. I could be misremembering since it's been a few months since
I went looking at the (VFS) inode lifecycle.

Documentation (vfs.rst) says - "this method is called by destroy_inode() to release resources allocated for struct inode. It is only required if ->alloc_inode was defined and simply undoes anything done by ->alloc_inode.". I am not defining alloc_inode, so I assumed I do not need to define destroy_inode and the standard destroy_inode will do the right thing since standard alloc_inode is being used.

Are you suggesting per-region mshare_data should be freed in destroy_inode instead of in close?


+ } else {
+ mmap_read_unlock(info->mm);
+ }
+ }
+}
+
+static const struct vm_operations_struct msharefs_vm_ops = {
+ .close = msharefs_close,
+};
+
static int
msharefs_mmap(struct file *file, struct vm_area_struct *vma)
{
struct mshare_data *info = file->private_data;
struct mm_struct *mm = info->mm;
+ mmap_write_lock(mm);
/*
* If this mshare region has been set up once already, bail out
*/
@@ -80,10 +106,14 @@ msharefs_mmap(struct file *file, struct vm_area_struct *vma)
mm->task_size = vma->vm_end - vma->vm_start;
if (!mm->task_size)
mm->task_size--;
+ mmap_write_unlock(mm);
info->minfo->start = mm->mmap_base;
info->minfo->size = mm->task_size;
+ info->deleted = 0;
+ refcount_inc(&info->refcnt);
vma->vm_flags |= VM_SHARED_PT;
vma->vm_private_data = info;
+ vma->vm_ops = &msharefs_vm_ops;
return 0;
}
@@ -240,6 +270,38 @@ msharefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
return ret;
}
+static int
+msharefs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ struct mshare_data *info = inode->i_private;
+
+ /*
+ * Unmap the mshare region if it is still mapped in
+ */
+ vm_munmap(info->minfo->start, info->minfo->size);
+
+ /*
+ * Mark msharefs file for deletion so it can not be opened
+ * and used for mshare mappings any more
+ */
+ simple_unlink(dir, dentry);
+ mmap_write_lock(info->mm);
+ info->deleted = 1;
+ mmap_write_unlock(info->mm);

What if the file is hardlinked?

It looks like that is a bug currently. I need to account for that. Thanks!

--
Khalid