> +/* TDH.PHYMEM.PAGE.RECLAIM is allowed only when destroying the TD. */ > +static int __tdx_reclaim_page(struct page *page) > +{ > + u64 err, rcx, rdx, r8; > + int i; > + > + for (i = TDX_SEAMCALL_RETRIES; i > 0; i--) { Yes, the retries can be dropped according to the previous analysis at https://lore.kernel.org/kvm/ZyMAD0tSZiadZ%2FYx@xxxxxxxxxxxxxxxxxxxxxxxxx. Currently, all TD pages are 4K and even freed before the hkid is released, before the control of tdh_phymem_page_reclaim(). Non-TD control pages are also 4K and are guaranteed to be reclaimed before TDR pages are reclaimed. So, for basic TDX, contentions in tdh_phymem_page_reclaim() are not expected. > + err = tdh_phymem_page_reclaim(page, &rcx, &rdx, &r8); > + > + /* > + * TDH.PHYMEM.PAGE.RECLAIM is allowed only when TD is shutdown. > + * state. i.e. destructing TD. > + * TDH.PHYMEM.PAGE.RECLAIM requires TDR and target page. > + * Because we're destructing TD, it's rare to contend with TDR. > + */ > + switch (err) { > + case TDX_OPERAND_BUSY | TDX_OPERAND_ID_RCX: > + case TDX_OPERAND_BUSY | TDX_OPERAND_ID_TDR: > + cond_resched(); > + continue; > + default: > + goto out; > + } > + } > + > +out: > + if (WARN_ON_ONCE(err)) { > + pr_tdx_error_3(TDH_PHYMEM_PAGE_RECLAIM, err, rcx, rdx, r8); > + return -EIO; > + } > + return 0; > +} > + ... > +static void smp_func_do_phymem_cache_wb(void *unused) > +{ > + u64 err = 0; > + bool resume; > + int i; > + > + /* > + * TDH.PHYMEM.CACHE.WB flushes caches associated with any TDX private > + * KeyID on the package or core. The TDX module may not finish the > + * cache flush but return TDX_INTERRUPTED_RESUMEABLE instead. The > + * kernel should retry it until it returns success w/o rescheduling. > + */ > + for (i = TDX_SEAMCALL_RETRIES; i > 0; i--) { > + resume = !!err; > + err = tdh_phymem_cache_wb(resume); > + switch (err) { > + case TDX_INTERRUPTED_RESUMABLE: > + continue; These retries may not be removable as tdh_phymem_cache_wb() is an interruptible and restartable function. If a pending interrupt is detected during operation, tdh_phymem_cache_wb() returns with a TDX_INTERRUPED_RESUMABLE status in RAX. KVM needs complete the cache write-back operation by resuming tdh_phymem_cache_wb(). > + case TDX_NO_HKID_READY_TO_WBCACHE: > + err = TDX_SUCCESS; /* Already done by other thread */ > + fallthrough; > + default: > + goto out; > + } > + } > + > +out: > + if (WARN_ON_ONCE(err)) > + pr_tdx_error(TDH_PHYMEM_CACHE_WB, err); > +} > + > +void tdx_mmu_release_hkid(struct kvm *kvm) > +{ > + bool packages_allocated, targets_allocated; > + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); > + cpumask_var_t packages, targets; > + u64 err; > + int i; > + > + if (!is_hkid_assigned(kvm_tdx)) > + return; > + > + /* KeyID has been allocated but guest is not yet configured */ > + if (!kvm_tdx->td.tdr_page) { > + tdx_hkid_free(kvm_tdx); > + return; > + } > + > + packages_allocated = zalloc_cpumask_var(&packages, GFP_KERNEL); > + targets_allocated = zalloc_cpumask_var(&targets, GFP_KERNEL); > + cpus_read_lock(); > + > + /* > + * TDH.PHYMEM.CACHE.WB tries to acquire the TDX module global lock > + * and can fail with TDX_OPERAND_BUSY when it fails to get the lock. > + * Multiple TDX guests can be destroyed simultaneously. Take the > + * mutex to prevent it from getting error. > + */ > + mutex_lock(&tdx_lock); > + > + /* > + * Releasing HKID is in vm_destroy(). > + * After the above flushing vps, there should be no more vCPU > + * associations, as all vCPU fds have been released at this stage. > + */ > + for_each_online_cpu(i) { > + if (packages_allocated && > + cpumask_test_and_set_cpu(topology_physical_package_id(i), > + packages)) > + continue; > + if (targets_allocated) > + cpumask_set_cpu(i, targets); > + } > + if (targets_allocated) > + on_each_cpu_mask(targets, smp_func_do_phymem_cache_wb, NULL, true); > + else > + on_each_cpu(smp_func_do_phymem_cache_wb, NULL, true); If either packages_allocated or targets_allocated is false, tdh_phymem_cache_wb() will be executed on each core. Then TDX_OPERAND_BUSY is possible since tdh_phymem_cache_wb() needs to acquire a per package wbt_entries. So, should we add the retries for this rare case or just simply leave it to the WARN_ON()? > + /* > + * In the case of error in smp_func_do_phymem_cache_wb(), the following > + * tdh_mng_key_freeid() will fail. > + */ > + err = tdh_mng_key_freeid(&kvm_tdx->td); > + if (KVM_BUG_ON(err, kvm)) { > + pr_tdx_error(TDH_MNG_KEY_FREEID, err); > + pr_err("tdh_mng_key_freeid() failed. HKID %d is leaked.\n", > + kvm_tdx->hkid); > + } else { > + tdx_hkid_free(kvm_tdx); > + } > + > + mutex_unlock(&tdx_lock); > + cpus_read_unlock(); > + free_cpumask_var(targets); > + free_cpumask_var(packages); > +} > +