On 8/27/2025 5:34 AM, Sagi Shahar wrote:
TDX module protects the EOI-bitmap which prevents the use of in-kernel
I/O APIC. See more details in the original patch [1]
The current implementation already enforces the use of split irqchip for
TDX but it does so at the vCPU creation time which is generally to late
^
too
to fallback to split irqchip.
This patch follows Sean's recomendation from [2] and move the check if
recomendation -> recommendation
I/O APIC is supported for the VM at irqchip creation time.
[1] https://lore.kernel.org/lkml/20250222014757.897978-11-binbin.wu@xxxxxxxxxxxxxxx/
[2] https://lore.kernel.org/lkml/aK3vZ5HuKKeFuuM4@xxxxxxxxxx/
Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx>
Signed-off-by: Sagi Shahar <sagis@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 3 +++
arch/x86/kvm/vmx/tdx.c | 15 ++++++++-------
arch/x86/kvm/x86.c | 10 ++++++++++
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f19a76d3ca0e..cb22fc48cdec 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1357,6 +1357,7 @@ struct kvm_arch {
u8 vm_type;
bool has_private_mem;
bool has_protected_state;
+ bool has_protected_eoi;
bool pre_fault_allowed;
struct hlist_head *mmu_page_hash;
struct list_head active_mmu_pages;
@@ -2284,6 +2285,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
+#define kvm_arch_has_protected_eoi(kvm) (!(kvm)->arch.has_protected_eoi)
+
static inline u16 kvm_read_ldt(void)
{
u16 ldt;
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 66744f5768c8..8c270a159692 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -658,6 +658,12 @@ int tdx_vm_init(struct kvm *kvm)
*/
kvm->max_vcpus = min_t(int, kvm->max_vcpus, num_present_cpus());
+ /*
+ * TDX Module doesn't allow the hypervisor to modify the EOI-bitmap,
+ * i.e. all EOIs are accelerated and never trigger exits.
+ */
+ kvm->arch.has_protected_eoi = true;
+
kvm_tdx->state = TD_STATE_UNINITIALIZED;
return 0;
@@ -671,13 +677,8 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
if (kvm_tdx->state != TD_STATE_INITIALIZED)
return -EIO;
- /*
- * TDX module mandates APICv, which requires an in-kernel local APIC.
- * Disallow an in-kernel I/O APIC, because level-triggered interrupts
- * and thus the I/O APIC as a whole can't be faithfully emulated in KVM.
- */
- if (!irqchip_split(vcpu->kvm))
- return -EINVAL;
+ /* Split irqchip should be enforced at irqchip creation time. */
+ KVM_BUG_ON(irqchip_split(vcpu->kvm), vcpu->kvm);
Should be
KVM_BUG_ON(!irqchip_split(vcpu->kvm), vcpu->kvm);
fpstate_set_confidential(&vcpu->arch.guest_fpu);
vcpu->arch.apic->guest_apic_protected = true;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a1c49bc681c4..a846dd3dcb23 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6966,6 +6966,16 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
if (irqchip_in_kernel(kvm))
goto create_irqchip_unlock;
+ /*
+ * Disallow an in-kernel I/O APIC for platforms that has protected
+ * EOI (such as TDX). The hypervisor can't modify the EOI-bitmap
+ * on these platforms which prevents the proper emulation of
+ * level-triggered interrupts.
+ */
+ r = -ENOTTY;
+ if (kvm_arch_has_protected_eoi(kvm))
+ goto create_irqchip_unlock;
+
r = -EINVAL;
if (kvm->created_vcpus)
goto create_irqchip_unlock;