On Thu, 2025-04-24 at 11:09 +0800, Yan Zhao wrote: > int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level, > - void *private_spt) > + void *private_spt, bool mmu_lock_shared) > { > struct page *page = virt_to_page(private_spt); > int ret; > @@ -1842,6 +1842,29 @@ int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level, > if (KVM_BUG_ON(to_kvm_tdx(kvm)->state != TD_STATE_RUNNABLE || level != PG_LEVEL_2M, kvm)) > return -EINVAL; > > + /* > + * Split request with mmu_lock held for reading can only occur when one > + * vCPU accepts at 2MB level while another vCPU accepts at 4KB level. > + * Ignore this 4KB mapping request by setting violation_request_level to > + * 2MB and returning -EBUSY for retry. Then the next fault at 2MB level > + * would be a spurious fault. The vCPU accepting at 2MB will accept the > + * whole 2MB range. > + */ > + if (mmu_lock_shared) { > + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); > + struct vcpu_tdx *tdx = to_tdx(vcpu); > + > + if (KVM_BUG_ON(!vcpu, kvm)) > + return -EOPNOTSUPP; > + > + /* Request to map as 2MB leaf for the whole 2MB range */ > + tdx->violation_gfn_start = gfn_round_for_level(gfn, level); > + tdx->violation_gfn_end = tdx->violation_gfn_start + KVM_PAGES_PER_HPAGE(level); > + tdx->violation_request_level = level; > + > + return -EBUSY; This is too hacky the way it infers so much from mmu_lock_shared. Since guests shouldn't be doing this, what about just doing kvm_vm_dead(), with a little pr_warn()? Maybe even just do it in set_external_spte_present() and declare it the rule for external page tables. It can shrink this patch significantly, for no expected user impact. > + } > + > ret = tdx_sept_zap_private_spte(kvm, gfn, level, page); > if (ret <= 0) > return ret; > diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h > index 0619e9390e5d..fcba76887508 100644 > --- a/arch/x86/kvm/vmx/x86_ops.h > +++ b/arch/x86/kvm/vmx/x86_ops.h > @@ -159,7 +159,7 @@ int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, > int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn, > enum pg_level level, kvm_pfn_t pfn); > int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level, > - void *private_spt); > + void *private_spt, bool mmu_lock_shared); > > void tdx_flush_tlb_current(struct kvm_vcpu *vcpu); > void tdx_flush_tlb_all(struct kvm_vcpu *vcpu); > @@ -228,7 +228,8 @@ static inline int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn, > > static inline int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, > enum pg_level level, > - void *private_spt) > + void *private_spt, > + bool mmu_lock_shared) > { > return -EOPNOTSUPP; > }