Re: [PATCH RFC 1/3] mm: Add f_ops->populate()

From: Greg Kroah-Hartman
Date: Sun Mar 06 2022 - 05:01:52 EST


On Sun, Mar 06, 2022 at 07:32:05AM +0200, Jarkko Sakkinen wrote:
> Sometimes you might want to use MAP_POPULATE to ask a device driver to
> initialize the device memory in some specific manner. SGX driver can use
> this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> page in the address range.
>
> Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> it conditionally called inside call_mmap(). Update call sites
> accodingly.
> ---
> Signed-off-by: Jarkko Sakkinen <jarkko@xxxxxxxxxx>
> v3:
> - if (!ret && do_populate && file->f_op->populate)
> + if (!ret && do_populate && file->f_op->populate &&
> + !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
> (reported by Matthew Wilcox)
> v2:
> - if (!ret && do_populate)
> + if (!ret && do_populate && file->f_op->populate)
> (reported by Jan Harkes)
> ---
> arch/mips/kernel/vdso.c | 2 +-
> drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 2 +-
> fs/coda/file.c | 2 +-
> fs/overlayfs/file.c | 2 +-
> include/linux/fs.h | 12 ++++++++++--
> include/linux/mm.h | 2 +-
> ipc/shm.c | 2 +-
> mm/mmap.c | 10 +++++-----
> mm/nommu.c | 4 ++--
> 9 files changed, 23 insertions(+), 15 deletions(-)
>
> diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
> index 3d0cf471f2fe..89f3f3da9abd 100644
> --- a/arch/mips/kernel/vdso.c
> +++ b/arch/mips/kernel/vdso.c
> @@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
> base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
> VM_READ | VM_EXEC |
> VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
> - 0, NULL);
> + 0, NULL, false);
> if (IS_ERR_VALUE(base)) {
> ret = base;
> goto out;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> index 1b526039a60d..4c71f64d6a79 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> @@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
> if (!obj->base.filp)
> return -ENODEV;
>
> - ret = call_mmap(obj->base.filp, vma);
> + ret = call_mmap(obj->base.filp, vma, false);
> if (ret)
> return ret;
>
> diff --git a/fs/coda/file.c b/fs/coda/file.c
> index 29dd87be2fb8..e14f312fdbf8 100644
> --- a/fs/coda/file.c
> +++ b/fs/coda/file.c
> @@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
> spin_unlock(&cii->c_lock);
>
> vma->vm_file = get_file(host_file);
> - ret = call_mmap(vma->vm_file, vma);
> + ret = call_mmap(vma->vm_file, vma, false);
>
> if (ret) {
> /* if call_mmap fails, our caller will put host_file so we
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index fa125feed0ff..b963a9397e80 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> vma_set_file(vma, realfile);
>
> old_cred = ovl_override_creds(file_inode(file)->i_sb);
> - ret = call_mmap(vma->vm_file, vma);
> + ret = call_mmap(vma->vm_file, vma, false);
> revert_creds(old_cred);
> ovl_file_accessed(file);
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index e2d892b201b0..2909e2d14af8 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -42,6 +42,7 @@
> #include <linux/mount.h>
> #include <linux/cred.h>
> #include <linux/mnt_idmapping.h>
> +#include <linux/mm.h>
>
> #include <asm/byteorder.h>
> #include <uapi/linux/fs.h>
> @@ -1993,6 +1994,7 @@ struct file_operations {
> long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
> long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
> int (*mmap) (struct file *, struct vm_area_struct *);
> + int (*populate)(struct file *, struct vm_area_struct *);
> unsigned long mmap_supported_flags;
> int (*open) (struct inode *, struct file *);
> int (*flush) (struct file *, fl_owner_t id);
> @@ -2074,9 +2076,15 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
> return file->f_op->write_iter(kio, iter);
> }
>
> -static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
> +static inline int call_mmap(struct file *file, struct vm_area_struct *vma, bool do_populate)
> {
> - return file->f_op->mmap(file, vma);
> + int ret = file->f_op->mmap(file, vma);
> +
> + if (!ret && do_populate && file->f_op->populate &&
> + !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
> + ret = file->f_op->populate(file, vma);
> +
> + return ret;
> }
>
> extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 213cc569b192..6c8c036f423b 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
>
> extern unsigned long mmap_region(struct file *file, unsigned long addr,
> unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> - struct list_head *uf);
> + struct list_head *uf, bool do_populate);

As I have said many times before, don't add random boolean flags to
function arguments, as they provide no hint as to what they do at all.
When you read the code, you then have to go back and look up the
function definition here and see what exactly it means and the flow is
broken.

Make function names mean something obvious, for this, if it really is a
good idea to have this new flag (and I doubt it, but that's not my
call), then make this a mmap_region_populate() call to make it obvious
it is something different than the notmal mmap_region() call.

But as is, this is pretty horrid, don't you agree?

thanks,

greg k-h