From: Ackerley Tng <ackerleytng@xxxxxxxxxx> Modify kvm_mmu_max_mapping_level() to consult guest_memfd for memory regions backed by it when computing the maximum mapping level, especially during huge page recovery. Previously, kvm_mmu_max_mapping_level() was designed primarily for host-backed memory and private pages. With guest_memfd now supporting non-private memory, it's necessary to factor in guest_memfd's influence on mapping levels for such memory. Since guest_memfd can now be used for non-private memory, make kvm_max_max_mapping_level, when recovering huge pages, take input from guest_memfd. Input is taken from guest_memfd as long as a fault to that slot and gfn would have been served from guest_memfd. For now, take a shortcut if the slot and gfn points to memory that is private, since recovering huge pages aren't supported for private memory yet. Since guest_memfd memory can also be faulted into host page tables, __kvm_mmu_max_mapping_level() still applies since consulting lpage_info and host page tables are required. Move functions kvm_max_level_for_order() and kvm_gmem_max_mapping_level() so kvm_mmu_max_mapping_level() can use those functions. Acked-by: David Hildenbrand <david@xxxxxxxxxx> Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> Co-developed-by: Fuad Tabba <tabba@xxxxxxxxxx> Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx> --- arch/x86/kvm/mmu/mmu.c | 90 ++++++++++++++++++++++++---------------- include/linux/kvm_host.h | 7 ++++ virt/kvm/guest_memfd.c | 17 ++++++++ 3 files changed, 79 insertions(+), 35 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6bd28fda0fd3..94be15cde6da 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3282,13 +3282,67 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, return min(host_level, max_level); } +static u8 kvm_max_level_for_order(int order) +{ + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); + + KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); + + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) + return PG_LEVEL_1G; + + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) + return PG_LEVEL_2M; + + return PG_LEVEL_4K; +} + +static u8 kvm_gmem_max_mapping_level(struct kvm *kvm, int order, + struct kvm_page_fault *fault) +{ + u8 req_max_level; + u8 max_level; + + max_level = kvm_max_level_for_order(order); + if (max_level == PG_LEVEL_4K) + return PG_LEVEL_4K; + + req_max_level = kvm_x86_call(max_mapping_level)(kvm, fault); + if (req_max_level) + max_level = min(max_level, req_max_level); + + return max_level; +} + int kvm_mmu_max_mapping_level(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn) { bool is_private = kvm_slot_has_gmem(slot) && kvm_mem_is_private(kvm, gfn); + int max_level = PG_LEVEL_NUM; + + /* + * For now, kvm_mmu_max_mapping_level() is only called from + * kvm_mmu_recover_huge_pages(), and that's not yet supported for + * private memory, hence we can take a shortcut and return early. + */ + if (is_private) + return PG_LEVEL_4K; - return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); + /* + * For non-private pages that would have been faulted from guest_memfd, + * let guest_memfd influence max_mapping_level. + */ + if (kvm_memslot_is_gmem_only(slot)) { + int order = kvm_gmem_mapping_order(slot, gfn); + + max_level = min(max_level, + kvm_gmem_max_mapping_level(kvm, order, NULL)); + } + + return __kvm_mmu_max_mapping_level(kvm, slot, gfn, max_level, is_private); } void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) @@ -4450,40 +4504,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) vcpu->stat.pf_fixed++; } -static inline u8 kvm_max_level_for_order(int order) -{ - BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); - - KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); - - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) - return PG_LEVEL_1G; - - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) - return PG_LEVEL_2M; - - return PG_LEVEL_4K; -} - -static u8 kvm_gmem_max_mapping_level(struct kvm *kvm, int order, - struct kvm_page_fault *fault) -{ - u8 req_max_level; - u8 max_level; - - max_level = kvm_max_level_for_order(order); - if (max_level == PG_LEVEL_4K) - return PG_LEVEL_4K; - - req_max_level = kvm_x86_call(max_mapping_level)(kvm, fault); - if (req_max_level) - max_level = min(max_level, req_max_level); - - return max_level; -} - static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int r) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d2218ec57ceb..662271314778 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2574,6 +2574,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, struct page **page, int *max_order); +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); #else static inline int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, @@ -2583,6 +2584,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, KVM_BUG_ON(1, kvm); return -EIO; } +static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, + gfn_t gfn) +{ + WARN_ONCE(1, "Unexpected call since gmem is disabled."); + return 0; +} #endif /* CONFIG_KVM_GMEM */ #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 2b00f8796a15..d01bd7a2c2bd 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -713,6 +713,23 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); +/** + * kvm_gmem_mapping_order() - Get the mapping order for this @gfn in @slot. + * + * @slot: the memslot that gfn belongs to. + * @gfn: the gfn to look up mapping order for. + * + * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were + * called now. + * + * Return: the mapping order for this @gfn in @slot. + */ +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn) +{ + return 0; +} +EXPORT_SYMBOL_GPL(kvm_gmem_mapping_order); + #ifdef CONFIG_KVM_GENERIC_GMEM_POPULATE long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque) -- 2.50.0.727.gbf7dc18ff4-goog