Fuad Tabba <tabba@xxxxxxxxxx> writes: > This patch enables support for shared memory in guest_memfd, including > mapping that memory at the host userspace. This support is gated by the > configuration option KVM_GMEM_SHARED_MEM, and toggled by the guest_memfd > flag GUEST_MEMFD_FLAG_SUPPORT_SHARED, which can be set when creating a > guest_memfd instance. > > Co-developed-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> > Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> > Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 10 ++++ > include/linux/kvm_host.h | 13 +++++ > include/uapi/linux/kvm.h | 1 + > virt/kvm/Kconfig | 5 ++ > virt/kvm/guest_memfd.c | 88 +++++++++++++++++++++++++++++++++ > 5 files changed, 117 insertions(+) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 709cc2a7ba66..f72722949cae 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -2255,8 +2255,18 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, > > #ifdef CONFIG_KVM_GMEM > #define kvm_arch_supports_gmem(kvm) ((kvm)->arch.supports_gmem) > + > +/* > + * CoCo VMs with hardware support that use guest_memfd only for backing private > + * memory, e.g., TDX, cannot use guest_memfd with userspace mapping enabled. > + */ > +#define kvm_arch_vm_supports_gmem_shared_mem(kvm) \ > + (IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) && \ > + ((kvm)->arch.vm_type == KVM_X86_SW_PROTECTED_VM || \ > + (kvm)->arch.vm_type == KVM_X86_DEFAULT_VM)) > #else > #define kvm_arch_supports_gmem(kvm) false > +#define kvm_arch_vm_supports_gmem_shared_mem(kvm) false > #endif > > #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index ae70e4e19700..2ec89c214978 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -729,6 +729,19 @@ static inline bool kvm_arch_supports_gmem(struct kvm *kvm) > } > #endif > > +/* > + * Returns true if this VM supports shared mem in guest_memfd. > + * > + * Arch code must define kvm_arch_vm_supports_gmem_shared_mem if support for > + * guest_memfd is enabled. > + */ > +#if !defined(kvm_arch_vm_supports_gmem_shared_mem) && !IS_ENABLED(CONFIG_KVM_GMEM) > +static inline bool kvm_arch_vm_supports_gmem_shared_mem(struct kvm *kvm) > +{ > + return false; > +} > +#endif > + > #ifndef kvm_arch_has_readonly_mem > static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) > { > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index b6ae8ad8934b..9857022a0f0c 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -1566,6 +1566,7 @@ struct kvm_memory_attributes { > #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) > > #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) > +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1UL << 0) > > struct kvm_create_guest_memfd { > __u64 size; > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig > index 559c93ad90be..f4e469a62a60 100644 > --- a/virt/kvm/Kconfig > +++ b/virt/kvm/Kconfig > @@ -128,3 +128,8 @@ config HAVE_KVM_ARCH_GMEM_PREPARE > config HAVE_KVM_ARCH_GMEM_INVALIDATE > bool > depends on KVM_GMEM > + > +config KVM_GMEM_SHARED_MEM > + select KVM_GMEM > + bool > + prompt "Enables in-place shared memory for guest_memfd" > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index 6db515833f61..8e6d1866b55e 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -312,7 +312,88 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) > return gfn - slot->base_gfn + slot->gmem.pgoff; > } > > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM > + > +static bool kvm_gmem_supports_shared(struct inode *inode) > +{ > + uint64_t flags = (uint64_t)inode->i_private; > + > + return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED; > +} > + > +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf) > +{ > + struct inode *inode = file_inode(vmf->vma->vm_file); > + struct folio *folio; > + vm_fault_t ret = VM_FAULT_LOCKED; > + > + filemap_invalidate_lock_shared(inode->i_mapping); > + > + folio = kvm_gmem_get_folio(inode, vmf->pgoff); > + if (IS_ERR(folio)) { > + int err = PTR_ERR(folio); > + > + if (err == -EAGAIN) > + ret = VM_FAULT_RETRY; > + else > + ret = vmf_error(err); > + > + goto out_filemap; > + } > + > + if (folio_test_hwpoison(folio)) { > + ret = VM_FAULT_HWPOISON; > + goto out_folio; > + } > + > + if (WARN_ON_ONCE(folio_test_large(folio))) { > + ret = VM_FAULT_SIGBUS; > + goto out_folio; > + } > + > + if (!folio_test_uptodate(folio)) { > + clear_highpage(folio_page(folio, 0)); > + kvm_gmem_mark_prepared(folio); > + } > + > + vmf->page = folio_file_page(folio, vmf->pgoff); > + > +out_folio: > + if (ret != VM_FAULT_LOCKED) { > + folio_unlock(folio); > + folio_put(folio); > + } > + > +out_filemap: > + filemap_invalidate_unlock_shared(inode->i_mapping); Do we need to hold the filemap_invalidate_lock while zeroing? Would holding the folio lock be enough? > + > + return ret; > +} > + > +static const struct vm_operations_struct kvm_gmem_vm_ops = { > + .fault = kvm_gmem_fault_shared, > +}; > + > +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) > +{ > + if (!kvm_gmem_supports_shared(file_inode(file))) > + return -ENODEV; > + > + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != > + (VM_SHARED | VM_MAYSHARE)) { > + return -EINVAL; > + } > + > + vma->vm_ops = &kvm_gmem_vm_ops; > + > + return 0; > +} > +#else > +#define kvm_gmem_mmap NULL > +#endif /* CONFIG_KVM_GMEM_SHARED_MEM */ > + > static struct file_operations kvm_gmem_fops = { > + .mmap = kvm_gmem_mmap, > .open = generic_file_open, > .release = kvm_gmem_release, > .fallocate = kvm_gmem_fallocate, > @@ -463,6 +544,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) > u64 flags = args->flags; > u64 valid_flags = 0; > > + if (kvm_arch_vm_supports_gmem_shared_mem(kvm)) > + valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED; > + > if (flags & ~valid_flags) > return -EINVAL; > > @@ -501,6 +585,10 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, > offset + size > i_size_read(inode)) > goto err; > > + if (kvm_gmem_supports_shared(inode) && > + !kvm_arch_vm_supports_gmem_shared_mem(kvm)) > + goto err; > + > filemap_invalidate_lock(inode->i_mapping); > > start = offset >> PAGE_SHIFT;