On 8/12/2025 2:53 AM, Ackerley Tng wrote: > David Hildenbrand <david@xxxxxxxxxx> writes: > >> On 11.08.25 11:06, Shivank Garg wrote: >>> From: Ackerley Tng <ackerleytng@xxxxxxxxxx> >>> >>> [...snip...] >>> >>> +static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size, >>> + u64 flags) >>> +{ >>> + static const char *name = "[kvm-gmem]"; >>> + struct inode *inode; >>> + struct file *file; >>> + int err; >>> + >>> + err = -ENOENT; >> >> Maybe add a comment here when the module reference will get >> dropped. And maybe we should just switch to fops_get() + fops_put? >> >> /* __fput() will take care of fops_put(). */ >> if (!fops_get(&kvm_gmem_fops)) >> goto err; >> > > Sounds good! Please see attached patch. It's exactly what you suggested > except I renamed the goto target to err_fops_put: > >>> + >>> + inode = kvm_gmem_inode_make_secure_inode(name, size, flags); >>> + if (IS_ERR(inode)) { >>> + err = PTR_ERR(inode); >>> + goto err_put_module; >>> + } >>> + >>> + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, >>> + &kvm_gmem_fops); >>> + if (IS_ERR(file)) { >>> + err = PTR_ERR(file); >>> + goto err_put_inode; >>> + } >>> + >>> + file->f_flags |= O_LARGEFILE; >>> + file->private_data = priv; >>> + >>> +out: >>> + return file; >>> + >>> +err_put_inode: >>> + iput(inode); >>> +err_put_module: >>> + module_put(kvm_gmem_fops.owner); >> >> fops_put(&kvm_gmem_fops); >> >> ? >> >> >> Acked-by: David Hildenbrand <david@xxxxxxxxxx> >> >> -- >> Cheers, >> >> David / dhildenb > > From f2bd4499bce4db69bf34be75e009579db4329b7c Mon Sep 17 00:00:00 2001 > From: Ackerley Tng <ackerleytng@xxxxxxxxxx> > Date: Sun, 13 Jul 2025 17:43:35 +0000 > Subject: [PATCH] KVM: guest_memfd: Use guest mem inodes instead of anonymous > inodes > > guest_memfd's inode represents memory the guest_memfd is > providing. guest_memfd's file represents a struct kvm's view of that > memory. > > Using a custom inode allows customization of the inode teardown > process via callbacks. For example, ->evict_inode() allows > customization of the truncation process on file close, and > ->destroy_inode() and ->free_inode() allow customization of the inode > freeing process. > > Customizing the truncation process allows flexibility in management of > guest_memfd memory and customization of the inode freeing process > allows proper cleanup of memory metadata stored on the inode. > > Memory metadata is more appropriately stored on the inode (as opposed > to the file), since the metadata is for the memory and is not unique > to a specific binding and struct kvm. > > Co-developed-by: Fuad Tabba <tabba@xxxxxxxxxx> > Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> > Signed-off-by: Shivank Garg <shivankg@xxxxxxx> > Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> > --- > include/uapi/linux/magic.h | 1 + > virt/kvm/guest_memfd.c | 129 ++++++++++++++++++++++++++++++------- > virt/kvm/kvm_main.c | 7 +- > virt/kvm/kvm_mm.h | 9 +-- > 4 files changed, 119 insertions(+), 27 deletions(-) > > diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h > index bb575f3ab45e5..638ca21b7a909 100644 > --- a/include/uapi/linux/magic.h > +++ b/include/uapi/linux/magic.h > @@ -103,5 +103,6 @@ > #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ > #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ > #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ > +#define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */ > > #endif /* __LINUX_MAGIC_H__ */ > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index 08a6bc7d25b60..6c66a09740550 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -1,12 +1,16 @@ > // SPDX-License-Identifier: GPL-2.0 > +#include <linux/anon_inodes.h> > #include <linux/backing-dev.h> > #include <linux/falloc.h> > +#include <linux/fs.h> > #include <linux/kvm_host.h> > +#include <linux/pseudo_fs.h> > #include <linux/pagemap.h> > -#include <linux/anon_inodes.h> > > #include "kvm_mm.h" > > +static struct vfsmount *kvm_gmem_mnt; > + > struct kvm_gmem { > struct kvm *kvm; > struct xarray bindings; > @@ -385,9 +389,45 @@ static struct file_operations kvm_gmem_fops = { > .fallocate = kvm_gmem_fallocate, > }; > > -void kvm_gmem_init(struct module *module) > +static int kvm_gmem_init_fs_context(struct fs_context *fc) > +{ > + if (!init_pseudo(fc, GUEST_MEMFD_MAGIC)) > + return -ENOMEM; > + > + fc->s_iflags |= SB_I_NOEXEC; > + fc->s_iflags |= SB_I_NODEV; > + > + return 0; > +} > + > +static struct file_system_type kvm_gmem_fs = { > + .name = "guest_memfd", > + .init_fs_context = kvm_gmem_init_fs_context, > + .kill_sb = kill_anon_super, > +}; > + > +static int kvm_gmem_init_mount(void) > +{ > + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs); > + > + if (IS_ERR(kvm_gmem_mnt)) > + return PTR_ERR(kvm_gmem_mnt); > + > + kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC; > + return 0; > +} > + > +int kvm_gmem_init(struct module *module) > { > kvm_gmem_fops.owner = module; > + > + return kvm_gmem_init_mount(); > +} > + > +void kvm_gmem_exit(void) > +{ > + kern_unmount(kvm_gmem_mnt); > + kvm_gmem_mnt = NULL; > } > > static int kvm_gmem_migrate_folio(struct address_space *mapping, > @@ -463,11 +503,72 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm) > return true; > } > > +static struct inode *kvm_gmem_inode_make_secure_inode(const char *name, > + loff_t size, u64 flags) > +{ > + struct inode *inode; > + > + inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL); > + if (IS_ERR(inode)) > + return inode; > + > + inode->i_private = (void *)(unsigned long)flags; > + inode->i_op = &kvm_gmem_iops; > + inode->i_mapping->a_ops = &kvm_gmem_aops; > + inode->i_mode |= S_IFREG; > + inode->i_size = size; > + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); > + mapping_set_inaccessible(inode->i_mapping); > + /* Unmovable mappings are supposed to be marked unevictable as well. */ > + WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); > + > + return inode; > +} > + > +static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size, > + u64 flags) > +{ > + static const char *name = "[kvm-gmem]"; > + struct inode *inode; > + struct file *file; > + int err; > + > + err = -ENOENT; > + /* __fput() will take care of fops_put(). */ > + if (!fops_get(&kvm_gmem_fops)) > + goto err; > + > + inode = kvm_gmem_inode_make_secure_inode(name, size, flags); > + if (IS_ERR(inode)) { > + err = PTR_ERR(inode); > + goto err_fops_put; > + } > + > + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, > + &kvm_gmem_fops); > + if (IS_ERR(file)) { > + err = PTR_ERR(file); > + goto err_put_inode; > + } > + > + file->f_flags |= O_LARGEFILE; > + file->private_data = priv; > + > +out: > + return file; > + > +err_put_inode: > + iput(inode); > +err_fops_put: > + fops_put(&kvm_gmem_fops); Thanks Ackerley. LGTM