Track guest_memfd memory's shareability status within the inode as opposed to the file, since it is property of the guest_memfd's memory contents. Shareability is a property of the memory and is indexed using the page's index in the inode. Because shareability is the memory's property, it is stored within guest_memfd instead of within KVM, like in kvm->mem_attr_array. KVM_MEMORY_ATTRIBUTE_PRIVATE in kvm->mem_attr_array must still be retained to allow VMs to only use guest_memfd for private memory and some other memory for shared memory. Not all use cases require guest_memfd() to be shared with the host when first created. Add a new flag, GUEST_MEMFD_FLAG_INIT_PRIVATE, which when set on KVM_CREATE_GUEST_MEMFD, initializes the memory as private to the guest, and therefore not mappable by the host. Otherwise, memory is shared until explicitly converted to private. Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> Co-developed-by: Vishal Annapurve <vannapurve@xxxxxxxxxx> Signed-off-by: Vishal Annapurve <vannapurve@xxxxxxxxxx> Co-developed-by: Fuad Tabba <tabba@xxxxxxxxxx> Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> Change-Id: If03609cbab3ad1564685c85bdba6dcbb6b240c0f --- Documentation/virt/kvm/api.rst | 5 ++ include/uapi/linux/kvm.h | 2 + virt/kvm/guest_memfd.c | 124 ++++++++++++++++++++++++++++++++- 3 files changed, 129 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 86f74ce7f12a..f609337ae1c2 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6408,6 +6408,11 @@ belonging to the slot via its userspace_addr. The use of GUEST_MEMFD_FLAG_SUPPORT_SHARED will not be allowed for CoCo VMs. This is validated when the guest_memfd instance is bound to the VM. +If the capability KVM_CAP_GMEM_CONVERSIONS is supported, then the 'flags' field +supports GUEST_MEMFD_FLAG_INIT_PRIVATE. Setting GUEST_MEMFD_FLAG_INIT_PRIVATE +will initialize the memory for the guest_memfd as guest-only and not faultable +by the host. + See KVM_SET_USER_MEMORY_REGION2 for additional details. 4.143 KVM_PRE_FAULT_MEMORY diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4cc824a3a7c9..d7df312479aa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1567,7 +1567,9 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) + #define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1UL << 0) +#define GUEST_MEMFD_FLAG_INIT_PRIVATE (1UL << 1) struct kvm_create_guest_memfd { __u64 size; diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 239d0f13dcc1..590932499eba 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -4,6 +4,7 @@ #include <linux/falloc.h> #include <linux/fs.h> #include <linux/kvm_host.h> +#include <linux/maple_tree.h> #include <linux/pseudo_fs.h> #include <linux/pagemap.h> @@ -17,6 +18,24 @@ struct kvm_gmem { struct list_head entry; }; +struct kvm_gmem_inode_private { +#ifdef CONFIG_KVM_GMEM_SHARED_MEM + struct maple_tree shareability; +#endif +}; + +enum shareability { + SHAREABILITY_GUEST = 1, /* Only the guest can map (fault) folios in this range. */ + SHAREABILITY_ALL = 2, /* Both guest and host can fault folios in this range. */ +}; + +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index); + +static struct kvm_gmem_inode_private *kvm_gmem_private(struct inode *inode) +{ + return inode->i_mapping->i_private_data; +} + /** * folio_file_pfn - like folio_file_page, but return a pfn. * @folio: The folio which contains this index. @@ -29,6 +48,58 @@ static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index) return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1)); } +#ifdef CONFIG_KVM_GMEM_SHARED_MEM + +static int kvm_gmem_shareability_setup(struct kvm_gmem_inode_private *private, + loff_t size, u64 flags) +{ + enum shareability m; + pgoff_t last; + + last = (size >> PAGE_SHIFT) - 1; + m = flags & GUEST_MEMFD_FLAG_INIT_PRIVATE ? SHAREABILITY_GUEST : + SHAREABILITY_ALL; + return mtree_store_range(&private->shareability, 0, last, xa_mk_value(m), + GFP_KERNEL); +} + +static enum shareability kvm_gmem_shareability_get(struct inode *inode, + pgoff_t index) +{ + struct maple_tree *mt; + void *entry; + + mt = &kvm_gmem_private(inode)->shareability; + entry = mtree_load(mt, index); + WARN(!entry, + "Shareability should always be defined for all indices in inode."); + + return xa_to_value(entry); +} + +static struct folio *kvm_gmem_get_shared_folio(struct inode *inode, pgoff_t index) +{ + if (kvm_gmem_shareability_get(inode, index) != SHAREABILITY_ALL) + return ERR_PTR(-EACCES); + + return kvm_gmem_get_folio(inode, index); +} + +#else + +static int kvm_gmem_shareability_setup(struct maple_tree *mt, loff_t size, u64 flags) +{ + return 0; +} + +static inline struct folio *kvm_gmem_get_shared_folio(struct inode *inode, pgoff_t index) +{ + WARN_ONCE("Unexpected call to get shared folio.") + return NULL; +} + +#endif /* CONFIG_KVM_GMEM_SHARED_MEM */ + static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, pgoff_t index, struct folio *folio) { @@ -333,7 +404,7 @@ static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf) filemap_invalidate_lock_shared(inode->i_mapping); - folio = kvm_gmem_get_folio(inode, vmf->pgoff); + folio = kvm_gmem_get_shared_folio(inode, vmf->pgoff); if (IS_ERR(folio)) { int err = PTR_ERR(folio); @@ -420,8 +491,33 @@ static struct file_operations kvm_gmem_fops = { .fallocate = kvm_gmem_fallocate, }; +static void kvm_gmem_free_inode(struct inode *inode) +{ + struct kvm_gmem_inode_private *private = kvm_gmem_private(inode); + + kfree(private); + + free_inode_nonrcu(inode); +} + +static void kvm_gmem_destroy_inode(struct inode *inode) +{ + struct kvm_gmem_inode_private *private = kvm_gmem_private(inode); + +#ifdef CONFIG_KVM_GMEM_SHARED_MEM + /* + * mtree_destroy() can't be used within rcu callback, hence can't be + * done in ->free_inode(). + */ + if (private) + mtree_destroy(&private->shareability); +#endif +} + static const struct super_operations kvm_gmem_super_operations = { .statfs = simple_statfs, + .destroy_inode = kvm_gmem_destroy_inode, + .free_inode = kvm_gmem_free_inode, }; static int kvm_gmem_init_fs_context(struct fs_context *fc) @@ -549,12 +645,26 @@ static const struct inode_operations kvm_gmem_iops = { static struct inode *kvm_gmem_inode_make_secure_inode(const char *name, loff_t size, u64 flags) { + struct kvm_gmem_inode_private *private; struct inode *inode; + int err; inode = alloc_anon_secure_inode(kvm_gmem_mnt->mnt_sb, name); if (IS_ERR(inode)) return inode; + err = -ENOMEM; + private = kzalloc(sizeof(*private), GFP_KERNEL); + if (!private) + goto out; + + mt_init(&private->shareability); + inode->i_mapping->i_private_data = private; + + err = kvm_gmem_shareability_setup(private, size, flags); + if (err) + goto out; + inode->i_private = (void *)(unsigned long)flags; inode->i_op = &kvm_gmem_iops; inode->i_mapping->a_ops = &kvm_gmem_aops; @@ -566,6 +676,11 @@ static struct inode *kvm_gmem_inode_make_secure_inode(const char *name, WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); return inode; + +out: + iput(inode); + + return ERR_PTR(err); } static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size, @@ -654,6 +769,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) if (kvm_arch_vm_supports_gmem_shared_mem(kvm)) valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED; + if (flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED) + valid_flags |= GUEST_MEMFD_FLAG_INIT_PRIVATE; + if (flags & ~valid_flags) return -EINVAL; @@ -842,6 +960,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, if (!file) return -EFAULT; + filemap_invalidate_lock_shared(file_inode(file)->i_mapping); + folio = __kvm_gmem_get_pfn(file, slot, index, pfn, &is_prepared, max_order); if (IS_ERR(folio)) { r = PTR_ERR(folio); @@ -857,8 +977,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, *page = folio_file_page(folio, index); else folio_put(folio); - out: + filemap_invalidate_unlock_shared(file_inode(file)->i_mapping); fput(file); return r; } -- 2.49.0.1045.g170613ef41-goog