On 5/13/2025 10:04 PM, Fuad Tabba wrote: > From: Ackerley Tng <ackerleytng@xxxxxxxxxx> > > This patch adds kvm_gmem_max_mapping_level(), which always returns > PG_LEVEL_4K since guest_memfd only supports 4K pages for now. > > When guest_memfd supports shared memory, max_mapping_level (especially > when recovering huge pages - see call to __kvm_mmu_max_mapping_level() > from recover_huge_pages_range()) should take input from > guest_memfd. > > Input from guest_memfd should be taken in these cases: > > + if the memslot supports shared memory (guest_memfd is used for > shared memory, or in future both shared and private memory) or > + if the memslot is only used for private memory and that gfn is > private. > > If the memslot doesn't use guest_memfd, figure out the > max_mapping_level using the host page tables like before. > > This patch also refactors and inlines the other call to > __kvm_mmu_max_mapping_level(). > > In kvm_mmu_hugepage_adjust(), guest_memfd's input is already > provided (if applicable) in fault->max_level. Hence, there is no need > to query guest_memfd. > > lpage_info is queried like before, and then if the fault is not from > guest_memfd, adjust fault->req_level based on input from host page > tables. > > Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> > Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> > --- > arch/x86/kvm/mmu/mmu.c | 92 ++++++++++++++++++++++++++-------------- > include/linux/kvm_host.h | 7 +++ > virt/kvm/guest_memfd.c | 12 ++++++ > 3 files changed, 79 insertions(+), 32 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index cfbb471f7c70..9e0bc8114859 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -3256,12 +3256,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, > return level; > } > > -static int __kvm_mmu_max_mapping_level(struct kvm *kvm, > - const struct kvm_memory_slot *slot, > - gfn_t gfn, int max_level, bool is_private) > +static int kvm_lpage_info_max_mapping_level(struct kvm *kvm, > + const struct kvm_memory_slot *slot, > + gfn_t gfn, int max_level) > { > struct kvm_lpage_info *linfo; > - int host_level; > > max_level = min(max_level, max_huge_page_level); > for ( ; max_level > PG_LEVEL_4K; max_level--) { > @@ -3270,23 +3269,61 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, > break; > } > > - if (is_private) > - return max_level; > + return max_level; > +} > + > +static inline u8 kvm_max_level_for_order(int order) > +{ > + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); > + > + KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && > + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && > + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); > + > + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) > + return PG_LEVEL_1G; > + > + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) > + return PG_LEVEL_2M; > + > + return PG_LEVEL_4K; > +} > + > +static inline int kvm_gmem_max_mapping_level(const struct kvm_memory_slot *slot, > + gfn_t gfn, int max_level) > +{ > + int max_order; > > if (max_level == PG_LEVEL_4K) > return PG_LEVEL_4K; > > - host_level = host_pfn_mapping_level(kvm, gfn, slot); > - return min(host_level, max_level); > + max_order = kvm_gmem_mapping_order(slot, gfn); > + return min(max_level, kvm_max_level_for_order(max_order)); > } > > int kvm_mmu_max_mapping_level(struct kvm *kvm, > const struct kvm_memory_slot *slot, gfn_t gfn) > { > - bool is_private = kvm_slot_has_gmem(slot) && > - kvm_mem_is_private(kvm, gfn); > + int max_level; > + > + max_level = kvm_lpage_info_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM); > + if (max_level == PG_LEVEL_4K) > + return PG_LEVEL_4K; > > - return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); > + if (kvm_slot_has_gmem(slot) && > + (kvm_gmem_memslot_supports_shared(slot) || > + kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) { > + return kvm_gmem_max_mapping_level(slot, gfn, max_level); > + } > + > + return min(max_level, host_pfn_mapping_level(kvm, gfn, slot)); > +} > + > +static inline bool fault_from_gmem(struct kvm_page_fault *fault) > +{ > + return fault->is_private || > + (kvm_slot_has_gmem(fault->slot) && > + kvm_gmem_memslot_supports_shared(fault->slot)); > } > > void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > @@ -3309,12 +3346,20 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > * Enforce the iTLB multihit workaround after capturing the requested > * level, which will be used to do precise, accurate accounting. > */ > - fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, > - fault->gfn, fault->max_level, > - fault->is_private); > + fault->req_level = kvm_lpage_info_max_mapping_level(vcpu->kvm, slot, > + fault->gfn, fault->max_level); > if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) > return; > > + if (!fault_from_gmem(fault)) { > + int host_level; > + > + host_level = host_pfn_mapping_level(vcpu->kvm, fault->gfn, slot); > + fault->req_level = min(fault->req_level, host_level); > + if (fault->req_level == PG_LEVEL_4K) > + return; > + } > + > /* > * mmu_invalidate_retry() was successful and mmu_lock is held, so > * the pmd can't be split from under us. > @@ -4448,23 +4493,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) > vcpu->stat.pf_fixed++; > } > > -static inline u8 kvm_max_level_for_order(int order) > -{ > - BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); > - > - KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && > - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && > - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); > - > - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) > - return PG_LEVEL_1G; > - > - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) > - return PG_LEVEL_2M; > - > - return PG_LEVEL_4K; > -} > - > static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, > struct kvm_page_fault *fault, > int order) > @@ -4523,7 +4551,7 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, > { > unsigned int foll = fault->write ? FOLL_WRITE : 0; > > - if (fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot)) > + if (fault_from_gmem(fault)) > return kvm_mmu_faultin_pfn_gmem(vcpu, fault); > > foll |= FOLL_NOWAIT; > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index de7b46ee1762..f9bb025327c3 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -2560,6 +2560,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) > int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > gfn_t gfn, kvm_pfn_t *pfn, struct page **page, > int *max_order); > +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); > #else > static inline int kvm_gmem_get_pfn(struct kvm *kvm, > struct kvm_memory_slot *slot, gfn_t gfn, > @@ -2569,6 +2570,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, > KVM_BUG_ON(1, kvm); > return -EIO; > } > +static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, > + gfn_t gfn) > +{ > + BUG(); > + return 0; > +} > #endif /* CONFIG_KVM_GMEM */ > > #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index fe0245335c96..b8e247063b20 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -774,6 +774,18 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > } > EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); > > +/** > + * Returns the mapping order for this @gfn in @slot. > + * > + * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were > + * called now. > + */ make W=1 ./ -s generates following warnings- warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Returns the mapping order for this @gfn in @slot This will fix it. Subject: [PATCH] tmp Signed-off-by: Shivank Garg <shivankg@xxxxxxx> --- virt/kvm/guest_memfd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index b8e247063b20..d880b9098cc0 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -775,10 +775,12 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); /** - * Returns the mapping order for this @gfn in @slot. + * kvm_gmem_mapping_order - Get the mapping order for a GFN. + * @slot: The KVM memory slot containing the @gfn. + * @gfn: The guest frame number to check. * - * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were - * called now. + * Returns: The mapping order for a @gfn in @slot. This is equal to max_order + * that kvm_gmem_get_pfn() would return for this @gfn. */ int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn) { -- 2.34.1 Thanks, Shivank > +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn) > +{ > + return 0; > +} > +EXPORT_SYMBOL_GPL(kvm_gmem_mapping_order); > + > #ifdef CONFIG_KVM_GENERIC_GMEM_POPULATE > long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, > kvm_gmem_populate_cb post_populate, void *opaque)