Re: [PATCH V9 1/7] KVM: guest_memfd: Use guest mem inodes instead of anonymous inodes

From: David Hildenbrand
Date: Tue Jul 22 2025 - 11:19:16 EST


On 13.07.25 19:43, Shivank Garg wrote:
From: Ackerley Tng <ackerleytng@xxxxxxxxxx>

guest_memfd's inode represents memory the guest_memfd is
providing. guest_memfd's file represents a struct kvm's view of that
memory.

Using a custom inode allows customization of the inode teardown
process via callbacks. For example, ->evict_inode() allows
customization of the truncation process on file close, and
->destroy_inode() and ->free_inode() allow customization of the inode
freeing process.

Customizing the truncation process allows flexibility in management of
guest_memfd memory and customization of the inode freeing process
allows proper cleanup of memory metadata stored on the inode.

Memory metadata is more appropriately stored on the inode (as opposed
to the file), since the metadata is for the memory and is not unique
to a specific binding and struct kvm.

Co-developed-by: Fuad Tabba <tabba@xxxxxxxxxx>
Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx>
Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx>
Signed-off-by: Shivank Garg <shivankg@xxxxxxx>
---

[...]

#include "kvm_mm.h"
+static struct vfsmount *kvm_gmem_mnt;
+
struct kvm_gmem {
struct kvm *kvm;
struct xarray bindings;
@@ -388,9 +392,51 @@ static struct file_operations kvm_gmem_fops = {
.fallocate = kvm_gmem_fallocate,
};
-void kvm_gmem_init(struct module *module)
+static const struct super_operations kvm_gmem_super_operations = {
+ .statfs = simple_statfs,
+};
+
+static int kvm_gmem_init_fs_context(struct fs_context *fc)
+{
+ struct pseudo_fs_context *ctx;
+
+ if (!init_pseudo(fc, GUEST_MEMFD_MAGIC))
+ return -ENOMEM;
+
+ ctx = fc->fs_private;
+ ctx->ops = &kvm_gmem_super_operations;

Curious, why is that required? (secretmem doesn't have it, so I wonder)

+
+ return 0;
+}
+
+static struct file_system_type kvm_gmem_fs = {
+ .name = "kvm_guest_memory",

It's GUEST_MEMFD_MAGIC but here "kvm_guest_memory".

For secretmem it's SECRETMEM_MAGIC vs. "secretmem".

So naturally, I wonder if that is to be made consistent :)

+ .init_fs_context = kvm_gmem_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static int kvm_gmem_init_mount(void)
+{
+ kvm_gmem_mnt = kern_mount(&kvm_gmem_fs);
+
+ if (IS_ERR(kvm_gmem_mnt))
+ return PTR_ERR(kvm_gmem_mnt);
+
+ kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC;
+ return 0;
+}
+
+int kvm_gmem_init(struct module *module)
{
kvm_gmem_fops.owner = module;
+
+ return kvm_gmem_init_mount();
+}
+
+void kvm_gmem_exit(void)
+{
+ kern_unmount(kvm_gmem_mnt);
+ kvm_gmem_mnt = NULL;
}
static int kvm_gmem_migrate_folio(struct address_space *mapping,
@@ -472,11 +518,71 @@ static const struct inode_operations kvm_gmem_iops = {
.setattr = kvm_gmem_setattr,
};
+static struct inode *kvm_gmem_inode_make_secure_inode(const char *name,
+ loff_t size, u64 flags)
+{
+ struct inode *inode;
+
+ inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
+ if (IS_ERR(inode))
+ return inode;
+
+ inode->i_private = (void *)(unsigned long)flags;
+ inode->i_op = &kvm_gmem_iops;
+ inode->i_mapping->a_ops = &kvm_gmem_aops;
+ inode->i_mode |= S_IFREG;
+ inode->i_size = size;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+ mapping_set_inaccessible(inode->i_mapping);
+ /* Unmovable mappings are supposed to be marked unevictable as well. */
+ WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
+
+ return inode;
+}
+
+static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size,
+ u64 flags)
+{
+ static const char *name = "[kvm-gmem]";
+ struct inode *inode;
+ struct file *file;
+ int err;
+
+ err = -ENOENT;
+ if (!try_module_get(kvm_gmem_fops.owner))
+ goto err;

Curious, shouldn't there be a module_put() somewhere after this function returned a file?

+
+ inode = kvm_gmem_inode_make_secure_inode(name, size, flags);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto err_put_module;
+ }
+
+ file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR,
+ &kvm_gmem_fops);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto err_put_inode;
+ }
+
+ file->f_flags |= O_LARGEFILE;
+ file->private_data = priv;
+


Nothing else jumped at me.

--
Cheers,

David / dhildenb