From: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> Implement the split_external_spt hook to support huge page splitting for TDX when kvm->mmu_lock is held for writing. Invoke tdh_mem_range_block(), tdh_mem_track(), kicking off vCPUs, tdh_mem_page_demote() in sequence. Since kvm->mmu_lock is held for writing, simply kick off vCPUs on tdx_operand_busy() to ensure the second SEAMCALL invocation succeeds. TDX module may return TDX_INTERRUPTED_RESTARTABLE when there is a pending interrupt on the host side during tdh_mem_page_demote(). Retry indefinitely on this error, as with exclusive kvm->mmu_lock the pending interrupt is for host only. [Yan: Split patch for exclusive mmu_lock only, handled busy error ] Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx> Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx> --- arch/x86/kvm/vmx/main.c | 1 + arch/x86/kvm/vmx/tdx.c | 45 ++++++++++++++++++++++++++++++++++++ arch/x86/kvm/vmx/tdx_errno.h | 1 + arch/x86/kvm/vmx/x86_ops.h | 9 ++++++++ 4 files changed, 56 insertions(+) diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index ae8540576821..16c0c31dd066 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -62,6 +62,7 @@ static __init int vt_hardware_setup(void) vt_x86_ops.set_external_spte = tdx_sept_set_private_spte; vt_x86_ops.free_external_spt = tdx_sept_free_private_spt; vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte; + vt_x86_ops.split_external_spt = tdx_sept_split_private_spt; vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt; } diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index dd63a634e633..4386e1a0323e 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1806,6 +1806,51 @@ int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn, return tdx_reclaim_page(virt_to_page(private_spt), PG_LEVEL_4K); } +static int tdx_spte_demote_private_spte(struct kvm *kvm, gfn_t gfn, + enum pg_level level, struct page *page) +{ + int tdx_level = pg_level_to_tdx_sept_level(level); + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); + gpa_t gpa = gfn_to_gpa(gfn); + u64 err, entry, level_state; + + do { + err = tdh_mem_page_demote(&kvm_tdx->td, gpa, tdx_level, page, + &entry, &level_state); + } while (err == TDX_INTERRUPTED_RESTARTABLE); + + if (unlikely(tdx_operand_busy(err))) { + tdx_no_vcpus_enter_start(kvm); + err = tdh_mem_page_demote(&kvm_tdx->td, gpa, tdx_level, page, + &entry, &level_state); + tdx_no_vcpus_enter_stop(kvm); + } + + if (KVM_BUG_ON(err, kvm)) { + pr_tdx_error_2(TDH_MEM_PAGE_DEMOTE, err, entry, level_state); + return -EIO; + } + return 0; +} + +int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *private_spt) +{ + struct page *page = virt_to_page(private_spt); + int ret; + + if (KVM_BUG_ON(to_kvm_tdx(kvm)->state != TD_STATE_RUNNABLE || level != PG_LEVEL_2M, kvm)) + return -EINVAL; + + ret = tdx_sept_zap_private_spte(kvm, gfn, level, page); + if (ret <= 0) + return ret; + + tdx_track(kvm); + + return tdx_spte_demote_private_spte(kvm, gfn, level, page); +} + int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level level, kvm_pfn_t pfn) { diff --git a/arch/x86/kvm/vmx/tdx_errno.h b/arch/x86/kvm/vmx/tdx_errno.h index 6ff4672c4181..33589e7fa1e1 100644 --- a/arch/x86/kvm/vmx/tdx_errno.h +++ b/arch/x86/kvm/vmx/tdx_errno.h @@ -14,6 +14,7 @@ #define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE 0x6000000500000000ULL #define TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE 0x6000000700000000ULL #define TDX_INTERRUPTED_RESUMABLE 0x8000000300000000ULL +#define TDX_INTERRUPTED_RESTARTABLE 0x8000000400000000ULL #define TDX_OPERAND_INVALID 0xC000010000000000ULL #define TDX_OPERAND_BUSY 0x8000020000000000ULL #define TDX_PREVIOUS_TLB_EPOCH_BUSY 0x8000020100000000ULL diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 7c183da7c4d4..df7d4cd1436c 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -158,6 +158,8 @@ int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level level, kvm_pfn_t pfn); int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level level, kvm_pfn_t pfn); +int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *private_spt); void tdx_flush_tlb_current(struct kvm_vcpu *vcpu); void tdx_flush_tlb_all(struct kvm_vcpu *vcpu); @@ -224,6 +226,13 @@ static inline int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn, return -EOPNOTSUPP; } +static inline int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, + enum pg_level level, + void *private_spt) +{ + return -EOPNOTSUPP; +} + static inline void tdx_flush_tlb_current(struct kvm_vcpu *vcpu) {} static inline void tdx_flush_tlb_all(struct kvm_vcpu *vcpu) {} static inline void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) {} -- 2.43.2