Re: [RFC PATCH v2 21/23] KVM: TDX: Preallocate PAMT pages to be used in split path

Binbin Wu <binbin.wu@xxxxxxxxxxxxxxx> · Thu, 4 Sep 2025 17:17:40 +0800

On 8/7/2025 5:46 PM, Yan Zhao wrote:
From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>

Preallocate a page to be used in the split_external_spt() path.

Not just "a" page.


Kernel needs one PAMT page pair for external_spt and one that provided
directly to the TDH.MEM.PAGE.DEMOTE SEAMCALL.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Co-developed-by: Yan Zhao <yan.y.zhao@xxxxxxxxx>
Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx>
---
RFC v2:
- Pulled from
   git://git.kernel.org/pub/scm/linux/kernel/git/kas/linux.git tdx/dpamt-huge.
- Implemented the flow of topup pamt_page_cache in
   tdp_mmu_split_huge_pages_root() (Yan)
---
  arch/x86/include/asm/kvm_host.h |  2 ++
  arch/x86/kvm/mmu/mmu.c          |  1 +
  arch/x86/kvm/mmu/tdp_mmu.c      | 51 +++++++++++++++++++++++++++++++++
  3 files changed, 54 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6b6c46c27390..508b133df903 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1591,6 +1591,8 @@ struct kvm_arch {
  #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
  	struct kvm_mmu_memory_cache split_desc_cache;
  
+	struct kvm_mmu_memory_cache pamt_page_cache;
+
  	gfn_t gfn_direct_bits;
  
  	/*
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f23d8fc59323..e581cee37f64 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6848,6 +6848,7 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
  	kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache);
  	kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
  	kvm_mmu_free_memory_cache(&kvm->arch.split_shadow_page_cache);
+	kvm_mmu_free_memory_cache(&kvm->arch.pamt_page_cache);
  }
  
  void kvm_mmu_uninit_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index eb758aaa4374..064c4e823658 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1584,6 +1584,27 @@ static bool iter_cross_boundary(struct tdp_iter *iter, gfn_t start, gfn_t end)
  		 (iter->gfn + KVM_PAGES_PER_HPAGE(iter->level)) <= end);
  }
  
+static bool need_topup_mirror_caches(struct kvm *kvm)
+{
+	int nr = tdx_nr_pamt_pages() * 2;
+
+	return kvm_mmu_memory_cache_nr_free_objects(&kvm->arch.pamt_page_cache) < nr;
+}
+
+static int topup_mirror_caches(struct kvm *kvm)
+{
+	int r, nr;
+
+	/* One for external_spt, one for TDH.MEM.PAGE.DEMOTE */

The comment is a bit confusing.
IIUC, external_spt is also for TDH.MEM.PAGE.DEMOTE.
and it's "one pair" for PAMT pages.

+	nr = tdx_nr_pamt_pages() * 2;
+
+	r = kvm_mmu_topup_memory_cache(&kvm->arch.pamt_page_cache, nr);
+	if (r)
+		return r;
+
+	return 0;

This could be simplified:
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 064c4e823658..35d052aa408c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1593,16 +1593,12 @@ static bool need_topup_mirror_caches(struct kvm *kvm)

 static int topup_mirror_caches(struct kvm *kvm)
 {
-       int r, nr;
+       int nr;

        /* One for external_spt, one for TDH.MEM.PAGE.DEMOTE */
        nr = tdx_nr_pamt_pages() * 2;

-       r = kvm_mmu_topup_memory_cache(&kvm->arch.pamt_page_cache, nr);
-       if (r)
-               return r;
-
-       return 0;
+       return kvm_mmu_topup_memory_cache(&kvm->arch.pamt_page_cache, nr);
 }

+}
+
  static int tdp_mmu_split_huge_pages_root(struct kvm *kvm,
  					 struct kvm_mmu_page *root,
  					 gfn_t start, gfn_t end,
@@ -1656,6 +1677,36 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm,
  			continue;
  		}
  
+		if (is_mirror_sp(root) && need_topup_mirror_caches(kvm)) {
+			int r;
+
+			rcu_read_unlock();
+
+			if (shared)
+				read_unlock(&kvm->mmu_lock);
+			else
+				write_unlock(&kvm->mmu_lock);
+
+			r = topup_mirror_caches(kvm);
+
+			if (shared)
+				read_lock(&kvm->mmu_lock);
+			else
+				write_lock(&kvm->mmu_lock);
+
+			if (r) {
+				trace_kvm_mmu_split_huge_page(iter.gfn,
+							      iter.old_spte,
+							      iter.level, r);
+				return r;
+			}
+
+			rcu_read_lock();
+
+			iter.yielded = true;
+			continue;
+		}
+
  		tdp_mmu_init_child_sp(sp, &iter);
  
  		if (tdp_mmu_split_huge_page(kvm, &iter, sp, shared))