[RFC PATCH 1/2] mm: restrictedmem: Add flag as THP allocation hint for memfd_restricted() syscall

From: Ackerley Tng
Date: Fri Feb 17 2023 - 19:45:01 EST


Allow userspace to hint the kernel to use Transparent HugePages to
back restricted memory on a per-file basis.

Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx>
---
include/uapi/linux/restrictedmem.h | 1 +
mm/restrictedmem.c | 27 +++++++++++++++++----------
2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/restrictedmem.h b/include/uapi/linux/restrictedmem.h
index 9f108dd1ac4c..f671ccbb43bc 100644
--- a/include/uapi/linux/restrictedmem.h
+++ b/include/uapi/linux/restrictedmem.h
@@ -4,5 +4,6 @@

/* flags for memfd_restricted */
#define RMFD_TMPFILE 0x0001U
+#define RMFD_HUGEPAGE 0x0002U

#endif /* _UAPI_LINUX_RESTRICTEDMEM_H */
diff --git a/mm/restrictedmem.c b/mm/restrictedmem.c
index 97f3e2159e8b..87c829960b31 100644
--- a/mm/restrictedmem.c
+++ b/mm/restrictedmem.c
@@ -190,19 +190,25 @@ static struct file *restrictedmem_file_create(struct file *memfd)
return file;
}

-static int restrictedmem_create(struct vfsmount *mount)
+static int restrictedmem_create(unsigned int flags, struct vfsmount *mount)
{
struct file *file, *restricted_file;
int fd, err;
+ unsigned long shmem_setup_flags = VM_NORESERVE;

fd = get_unused_fd_flags(0);
if (fd < 0)
return fd;

- if (mount)
- file = shmem_file_setup_with_mnt(mount, "memfd:restrictedmem", 0, VM_NORESERVE);
- else
- file = shmem_file_setup("memfd:restrictedmem", 0, VM_NORESERVE);
+ if (flags & RMFD_HUGEPAGE)
+ shmem_setup_flags |= VM_HUGEPAGE;
+
+ if (mount) {
+ file = shmem_file_setup_with_mnt(mount, "memfd:restrictedmem",
+ 0, shmem_setup_flags);
+ } else {
+ file = shmem_file_setup("memfd:restrictedmem", 0, shmem_setup_flags);
+ }

if (IS_ERR(file)) {
err = PTR_ERR(file);
@@ -230,7 +236,8 @@ static bool is_shmem_mount(struct vfsmount *mnt)
return mnt->mnt_sb->s_magic == TMPFS_MAGIC;
}

-static int restrictedmem_create_from_path(const char __user *mount_path)
+static int restrictedmem_create_from_path(unsigned int flags,
+ const char __user *mount_path)
{
int ret;
struct path path;
@@ -250,7 +257,7 @@ static int restrictedmem_create_from_path(const char __user *mount_path)
if (unlikely(ret))
goto out;

- ret = restrictedmem_create(path.mnt);
+ ret = restrictedmem_create(flags, path.mnt);

mnt_drop_write(path.mnt);
out:
@@ -261,16 +268,16 @@ static int restrictedmem_create_from_path(const char __user *mount_path)

SYSCALL_DEFINE2(memfd_restricted, unsigned int, flags, const char __user *, mount_path)
{
- if (flags & ~RMFD_TMPFILE)
+ if (flags & ~(RMFD_TMPFILE | RMFD_HUGEPAGE))
return -EINVAL;

if (flags == RMFD_TMPFILE) {
if (!mount_path)
return -EINVAL;

- return restrictedmem_create_from_path(mount_path);
+ return restrictedmem_create_from_path(flags, mount_path);
} else {
- return restrictedmem_create(NULL);
+ return restrictedmem_create(flags, NULL);
}
}

--
2.39.2.637.g21b0678d19-goog