Re: [RFC PATCH 21/21] KVM: x86: Ignore splitting huge pages in fault path for TDX

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2025-04-24 at 11:09 +0800, Yan Zhao wrote:

>  int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level,
> -			       void *private_spt)
> +			       void *private_spt, bool mmu_lock_shared)
>  {
>  	struct page *page = virt_to_page(private_spt);
>  	int ret;
> @@ -1842,6 +1842,29 @@ int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level,
>  	if (KVM_BUG_ON(to_kvm_tdx(kvm)->state != TD_STATE_RUNNABLE || level != PG_LEVEL_2M, kvm))
>  		return -EINVAL;
>  
> +	/*
> +	 * Split request with mmu_lock held for reading can only occur when one
> +	 * vCPU accepts at 2MB level while another vCPU accepts at 4KB level.
> +	 * Ignore this 4KB mapping request by setting violation_request_level to
> +	 * 2MB and returning -EBUSY for retry. Then the next fault at 2MB level
> +	 * would be a spurious fault. The vCPU accepting at 2MB will accept the
> +	 * whole 2MB range.
> +	 */
> +	if (mmu_lock_shared) {
> +		struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
> +		struct vcpu_tdx *tdx = to_tdx(vcpu);
> +
> +		if (KVM_BUG_ON(!vcpu, kvm))
> +			return -EOPNOTSUPP;
> +
> +		/* Request to map as 2MB leaf for the whole 2MB range */
> +		tdx->violation_gfn_start = gfn_round_for_level(gfn, level);
> +		tdx->violation_gfn_end = tdx->violation_gfn_start + KVM_PAGES_PER_HPAGE(level);
> +		tdx->violation_request_level = level;
> +
> +		return -EBUSY;

This is too hacky the way it infers so much from mmu_lock_shared. Since guests
shouldn't be doing this, what about just doing kvm_vm_dead(), with a little
pr_warn()? Maybe even just do it in set_external_spte_present() and declare it
the rule for external page tables. It can shrink this patch significantly, for
no expected user impact.

> +	}
> +
>  	ret = tdx_sept_zap_private_spte(kvm, gfn, level, page);
>  	if (ret <= 0)
>  		return ret;
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index 0619e9390e5d..fcba76887508 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -159,7 +159,7 @@ int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
>  int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
>  				 enum pg_level level, kvm_pfn_t pfn);
>  int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level,
> -			       void *private_spt);
> +			       void *private_spt, bool mmu_lock_shared);
>  
>  void tdx_flush_tlb_current(struct kvm_vcpu *vcpu);
>  void tdx_flush_tlb_all(struct kvm_vcpu *vcpu);
> @@ -228,7 +228,8 @@ static inline int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
>  
>  static inline int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn,
>  					     enum pg_level level,
> -					     void *private_spt)
> +					     void *private_spt,
> +					     bool mmu_lock_shared)
>  {
>  	return -EOPNOTSUPP;
>  }





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux