Only a few changes are needed to support KVM userfault exits on x86: 1. Adjust kvm_mmu_hugepage_adjust() to force pages to be mapped at 4K while KVM_MEM_USERFAULT is enabled. 2. Return -EFAULT when kvm_do_userfault() when it reports that the page is userfault. (Upon failure to read from the bitmap, kvm_do_userfault() will return true without setting up a memory fault exit, so we'll return a bare -EFAULT). For hugepage recovery, the behavior when disabling KVM_MEM_USERFAULT should match the behavior when disabling KVM_MEM_LOG_DIRTY_PAGES; make changes to kvm_mmu_slot_apply_flags() to recover hugepages when KVM_MEM_USERFAULT is disabled. Signed-off-by: James Houghton <jthoughton@xxxxxxxxxx> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- arch/x86/kvm/mmu/mmu.c | 5 ++++- arch/x86/kvm/x86.c | 27 +++++++++++++++++---------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a4439e9e07268..49eb6b9b268cb 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3304,7 +3304,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (is_error_noslot_pfn(fault->pfn)) return; - if (kvm_slot_dirty_track_enabled(slot)) + if (kvm_slot_dirty_track_enabled(slot) || kvm_is_userfault_memslot(slot)) return; /* @@ -4522,6 +4522,9 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, { unsigned int foll = fault->write ? FOLL_WRITE : 0; + if (kvm_do_userfault(vcpu, fault)) + return -EFAULT; + if (fault->is_private) return kvm_mmu_faultin_pfn_private(vcpu, fault); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b58a74c1722de..fa279ba38115c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13152,12 +13152,27 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, u32 new_flags = new ? new->flags : 0; bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES; + /* + * Recover hugepages when userfault is toggled off, as KVM forces 4KiB + * mappings when userfault is enabled. See below for why CREATE, MOVE, + * and DELETE don't need special handling. Note, common KVM handles + * zapping SPTEs when userfault is toggled on. + */ + if (change == KVM_MR_FLAGS_ONLY && (old_flags & KVM_MEM_USERFAULT) && + !(new_flags & KVM_MEM_USERFAULT)) + kvm_mmu_recover_huge_pages(kvm, new); + + /* + * Nothing more to do if dirty logging isn't being toggled. + */ + if (!((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)) + return; + /* * Update CPU dirty logging if dirty logging is being toggled. This * applies to all operations. */ - if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES) - kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages); + kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages); /* * Nothing more to do for RO slots (which can't be dirtied and can't be @@ -13177,14 +13192,6 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, if ((change != KVM_MR_FLAGS_ONLY) || (new_flags & KVM_MEM_READONLY)) return; - /* - * READONLY and non-flags changes were filtered out above, and the only - * other flag is LOG_DIRTY_PAGES, i.e. something is wrong if dirty - * logging isn't being toggled on or off. - */ - if (WARN_ON_ONCE(!((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES))) - return; - if (!log_dirty_pages) { /* * Recover huge page mappings in the slot now that dirty logging -- 2.50.0.rc2.692.g299adb8693-goog