Re: [PATCH V3 1/1] KVM: TDX: Add sub-ioctl KVM_TDX_TERMINATE_VM

Sean Christopherson <seanjc@xxxxxxxxxx> · Fri, 6 Jun 2025 12:17:17 -0700

On Fri, Apr 25, 2025, Adrian Hunter wrote:
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index b952bc673271..5161f6f891d7 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -500,14 +500,7 @@ void tdx_mmu_release_hkid(struct kvm *kvm)
>  	 */
>  	mutex_lock(&tdx_lock);
>  
> -	/*
> -	 * Releasing HKID is in vm_destroy().
> -	 * After the above flushing vps, there should be no more vCPU
> -	 * associations, as all vCPU fds have been released at this stage.
> -	 */
>  	err = tdh_mng_vpflushdone(&kvm_tdx->td);
> -	if (err == TDX_FLUSHVP_NOT_DONE)
> -		goto out;

This belongs in a separate patch, with a changelog explaining what's up.  Because
my original "suggestion"[1] was simply a question :-)

+	/* Uh, what's going on here? */
 	if (err == TDX_FLUSHVP_NOT_DONE)

You did all the hard work of tracking down the history, and as above, this
definitely warrants its own changelog.

[1] https://lkml.kernel.org/r/Z-V0qyTn2bXdrPF7%40google.com
[2] https://lore.kernel.org/all/d7e220ab-3000-408b-9dd6-0e7ee06d79ec@xxxxxxxxx

>  	if (KVM_BUG_ON(err, kvm)) {
>  		pr_tdx_error(TDH_MNG_VPFLUSHDONE, err);
>  		pr_err("tdh_mng_vpflushdone() failed. HKID %d is leaked.\n",
> @@ -515,6 +508,7 @@ void tdx_mmu_release_hkid(struct kvm *kvm)
>  		goto out;
>  	}
>  
> +	write_lock(&kvm->mmu_lock);
>  	for_each_online_cpu(i) {
>  		if (packages_allocated &&
>  		    cpumask_test_and_set_cpu(topology_physical_package_id(i),
> @@ -539,7 +533,7 @@ void tdx_mmu_release_hkid(struct kvm *kvm)
>  	} else {
>  		tdx_hkid_free(kvm_tdx);
>  	}
> -
> +	write_unlock(&kvm->mmu_lock);
>  out:
>  	mutex_unlock(&tdx_lock);
>  	cpus_read_unlock();
> @@ -1789,13 +1783,13 @@ int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
>  	struct page *page = pfn_to_page(pfn);
>  	int ret;
>  
> -	/*
> -	 * HKID is released after all private pages have been removed, and set
> -	 * before any might be populated. Warn if zapping is attempted when
> -	 * there can't be anything populated in the private EPT.
> -	 */
> -	if (KVM_BUG_ON(!is_hkid_assigned(to_kvm_tdx(kvm)), kvm))
> -		return -EINVAL;
> +	if (!is_hkid_assigned(to_kvm_tdx(kvm))) {
> +		WARN_ON_ONCE(!kvm->vm_dead);

Should this be a KVM_BUG_ON?  I.e. to kill the VM?  That'd set vm_dead, which is
kinda neat, i.e. that it'd achieve what the warning is warning about :-)

> +		ret = tdx_reclaim_page(page);
> +		if (!ret)
> +			tdx_unpin(kvm, page);
> +		return ret;
> +	}
>  
>  	ret = tdx_sept_zap_private_spte(kvm, gfn, level, page);
>  	if (ret <= 0)