Re: [PATCH v8 06/13] KVM: x86: Generalize private fault lookups to guest_memfd fault lookups

Fuad Tabba <tabba@xxxxxxxxxx> · Thu, 1 May 2025 10:53:59 +0100

Hi Ackerley,

On Wed, 30 Apr 2025 at 19:58, Ackerley Tng <ackerleytng@xxxxxxxxxx> wrote:
>
> Fuad Tabba <tabba@xxxxxxxxxx> writes:
>
> > Until now, faults to private memory backed by guest_memfd are always
> > consumed from guest_memfd whereas faults to shared memory are consumed
> > from anonymous memory. Subsequent patches will allow sharing guest_memfd
> > backed memory in-place, and mapping it by the host. Faults to in-place
> > shared memory should be consumed from guest_memfd as well.
> >
> > In order to facilitate that, generalize the fault lookups. Currently,
> > only private memory is consumed from guest_memfd and therefore as it
> > stands, this patch does not change the behavior.
> >
> > Co-developed-by: David Hildenbrand <david@xxxxxxxxxx>
> > Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
> > Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx>
> > ---
> >  arch/x86/kvm/mmu/mmu.c   | 19 +++++++++----------
> >  include/linux/kvm_host.h |  6 ++++++
> >  2 files changed, 15 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 6d5dd869c890..08eebd24a0e1 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -3258,7 +3258,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
> >
> >  static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
> >                                      const struct kvm_memory_slot *slot,
> > -                                    gfn_t gfn, int max_level, bool is_private)
> > +                                    gfn_t gfn, int max_level, bool is_gmem)
> >  {
> >       struct kvm_lpage_info *linfo;
> >       int host_level;
> > @@ -3270,7 +3270,7 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
> >                       break;
> >       }
> >
> > -     if (is_private)
> > +     if (is_gmem)
> >               return max_level;
>
> I think this renaming isn't quite accurate.
>
> IIUC in __kvm_mmu_max_mapping_level(), we skip considering
> host_pfn_mapping_level() if the gfn is private because private memory
> will not be mapped to userspace, so there's no need to query userspace
> page tables in host_pfn_mapping_level().
>
> Renaming is_private to is_gmem in this function implies that as long as
> gmem is used, especially for shared pages from gmem, lpage_info will
> always be updated and there's no need to query userspace page tables.
>

I understand.

> >
> >       if (max_level == PG_LEVEL_4K)
> > @@ -3283,10 +3283,9 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
> >  int kvm_mmu_max_mapping_level(struct kvm *kvm,
> >                             const struct kvm_memory_slot *slot, gfn_t gfn)
> >  {
> > -     bool is_private = kvm_slot_has_gmem(slot) &&
> > -                       kvm_mem_is_private(kvm, gfn);
> > +     bool is_gmem = kvm_slot_has_gmem(slot) && kvm_mem_from_gmem(kvm, gfn);
>
> This renaming should probably be undone too.

Ack.

> >
> > -     return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private);
> > +     return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_gmem);
> >  }
> >
> >  void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> > @@ -4465,7 +4464,7 @@ static inline u8 kvm_max_level_for_order(int order)
> >       return PG_LEVEL_4K;
> >  }
> >
> > -static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
> > +static u8 kvm_max_gmem_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
> >                                       u8 max_level, int gmem_order)
> >  {
> >       u8 req_max_level;
> > @@ -4491,7 +4490,7 @@ static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu,
> >                                r == RET_PF_RETRY, fault->map_writable);
> >  }
> >
> > -static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu,
> > +static int kvm_mmu_faultin_pfn_gmem(struct kvm_vcpu *vcpu,
> >                                      struct kvm_page_fault *fault)
> >  {
> >       int max_order, r;
> > @@ -4509,8 +4508,8 @@ static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu,
> >       }
> >
> >       fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
> > -     fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->pfn,
> > -                                                      fault->max_level, max_order);
> > +     fault->max_level = kvm_max_gmem_mapping_level(vcpu->kvm, fault->pfn,
> > +                                                   fault->max_level, max_order);
> >
> >       return RET_PF_CONTINUE;
> >  }
> > @@ -4521,7 +4520,7 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
> >       unsigned int foll = fault->write ? FOLL_WRITE : 0;
> >
> >       if (fault->is_private)
> > -             return kvm_mmu_faultin_pfn_private(vcpu, fault);
> > +             return kvm_mmu_faultin_pfn_gmem(vcpu, fault);
> >
> >       foll |= FOLL_NOWAIT;
> >       fault->pfn = __kvm_faultin_pfn(fault->slot, fault->gfn, foll,
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index d9616ee6acc7..cdcd7ac091b5 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -2514,6 +2514,12 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> >  }
> >  #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
> >
> > +static inline bool kvm_mem_from_gmem(struct kvm *kvm, gfn_t gfn)
> > +{
> > +     /* For now, only private memory gets consumed from guest_memfd. */
> > +     return kvm_mem_is_private(kvm, gfn);
> > +}
>
> Can I understand this function as "should fault from gmem"? And hence
> also "was faulted from gmem"?
>
> After this entire patch series, for arm64, KVM will always service stage
> 2 faults from gmem.
>
> Perhaps this function should retain your suggested name of
> kvm_mem_from_gmem() but only depend on
> kvm_arch_gmem_supports_shared_mem(), since this patch series doesn't
> update the MMU in X86. So something like this,

Ack.

> +static inline bool kvm_mem_from_gmem(struct kvm *kvm, gfn_t gfn)
> +{
> +       return kvm_arch_gmem_supports_shared_mem(kvm);
> +}
>
> with the only usage in arm64.
>
> When the MMU code for X86 is updated, we could then update the above
> with
>
> static inline bool kvm_mem_from_gmem(struct kvm *kvm, gfn_t gfn)
> {
> -       return kvm_arch_gmem_supports_shared_mem(kvm);
> +       return kvm_arch_gmem_supports_shared_mem(kvm) ||
> +              kvm_gmem_should_always_use_gmem(gfn_to_memslot(kvm, gfn)->gmem.file) ||
> +              kvm_mem_is_private(kvm, gfn);
> }
>
> where kvm_gmem_should_always_use_gmem() will read a guest_memfd flag?

I'm not sure I follow this one... Could you please explain what you
mean a bit more?

Thanks,
/fuad

> > +
> >  #ifdef CONFIG_KVM_GMEM
> >  int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
> >                    gfn_t gfn, kvm_pfn_t *pfn, struct page **page,