On Thu, May 22, 2025 at 06:17:54PM -0700, Sean Christopherson wrote: > Enforce the MMIO State Data mitigation if KVM has ever mapped host MMIO > into the VM, not if the VM has an assigned device. VFIO is but one of > many ways to map host MMIO into a KVM guest, and even within VFIO, > formally attaching a device to a VM via KVM_DEV_VFIO_FILE_ADD is entirely > optional. > > Track whether or not the guest can access host MMIO on a per-MMU basis, > i.e. based on whether or not the vCPU has a mapping to host MMIO. For > simplicity, track MMIO mappings in "special" rools (those without a > kvm_mmu_page) at the VM level, as only Intel CPUs are vulnerable, and so > only legacy 32-bit shadow paging is affected, i.e. lack of precise > tracking is a complete non-issue. > > Make the per-MMU and per-VM flags sticky. Detecting when *all* MMIO > mappings have been removed would be absurdly complex. And in practice, > removing MMIO from a guest will be done by deleting the associated memslot, > which by default will force KVM to re-allocate all roots. Special roots > will forever be mitigated, but as above, the affected scenarios are not > expected to be performance sensitive. > > Use a VMX_RUN flag to communicate the need for a buffers flush to > vmx_vcpu_enter_exit() so that kvm_vcpu_can_access_host_mmio() and all its > dependencies don't need to be marked __always_inline, e.g. so that KASAN > doesn't trigger a noinstr violation. > > Cc: Pawan Gupta <pawan.kumar.gupta@xxxxxxxxxxxxxxx> > Cc: Borislav Petkov <bp@xxxxxxxxx> > Fixes: 8cb861e9e3c9 ("x86/speculation/mmio: Add mitigation for Processor MMIO Stale Data") > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/mmu/mmu_internal.h | 3 +++ > arch/x86/kvm/mmu/spte.c | 21 +++++++++++++++++++++ > arch/x86/kvm/mmu/spte.h | 10 ++++++++++ > arch/x86/kvm/vmx/run_flags.h | 10 ++++++---- > arch/x86/kvm/vmx/vmx.c | 8 +++++++- > 6 files changed, 48 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 01edcefbd937..043be00ec5b8 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1458,6 +1458,7 @@ struct kvm_arch { > bool x2apic_format; > bool x2apic_broadcast_quirk_disabled; > > + bool has_mapped_host_mmio; > bool guest_can_read_msr_platform_info; > bool exception_payload_enabled; > > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h > index db8f33e4de62..65f3c89d7c5d 100644 > --- a/arch/x86/kvm/mmu/mmu_internal.h > +++ b/arch/x86/kvm/mmu/mmu_internal.h > @@ -103,6 +103,9 @@ struct kvm_mmu_page { > int root_count; > refcount_t tdp_mmu_root_count; > }; > + > + bool has_mapped_host_mmio; > + > union { > /* These two members aren't used for TDP MMU */ > struct { > diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c > index 3f16c91aa042..5fb43a834d48 100644 > --- a/arch/x86/kvm/mmu/spte.c > +++ b/arch/x86/kvm/mmu/spte.c > @@ -138,6 +138,22 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn, int *is_host_mmio) > return *is_host_mmio; > } > > +static void kvm_track_host_mmio_mapping(struct kvm_vcpu *vcpu) > +{ > + struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa); > + > + if (root) > + WRITE_ONCE(root->has_mapped_host_mmio, true); > + else > + WRITE_ONCE(vcpu->kvm->arch.has_mapped_host_mmio, true); > + > + /* > + * Force vCPUs to exit and flush CPU buffers if the vCPU is using the > + * affected root(s). > + */ > + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_OUTSIDE_GUEST_MODE); > +} > + > /* > * Returns true if the SPTE needs to be updated atomically due to having bits > * that may be changed without holding mmu_lock, and for which KVM must not > @@ -276,6 +292,11 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, > mark_page_dirty_in_slot(vcpu->kvm, slot, gfn); > } > > + if (static_branch_unlikely(&mmio_stale_data_clear) && > + !kvm_vcpu_can_access_host_mmio(vcpu) && > + kvm_is_mmio_pfn(pfn, &is_host_mmio)) > + kvm_track_host_mmio_mapping(vcpu); > + > *new_spte = spte; > return wrprot; > } > diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h > index 1e94f081bdaf..3133f066927e 100644 > --- a/arch/x86/kvm/mmu/spte.h > +++ b/arch/x86/kvm/mmu/spte.h > @@ -280,6 +280,16 @@ static inline bool is_mirror_sptep(tdp_ptep_t sptep) > return is_mirror_sp(sptep_to_sp(rcu_dereference(sptep))); > } > > +static inline bool kvm_vcpu_can_access_host_mmio(struct kvm_vcpu *vcpu) > +{ > + struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa); > + > + if (root) > + return READ_ONCE(root->has_mapped_host_mmio); > + > + return READ_ONCE(vcpu->kvm->arch.has_mapped_host_mmio); > +} > + > static inline bool is_mmio_spte(struct kvm *kvm, u64 spte) > { > return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value && > diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h > index 6a9bfdfbb6e5..2f20fb170def 100644 > --- a/arch/x86/kvm/vmx/run_flags.h > +++ b/arch/x86/kvm/vmx/run_flags.h > @@ -2,10 +2,12 @@ > #ifndef __KVM_X86_VMX_RUN_FLAGS_H > #define __KVM_X86_VMX_RUN_FLAGS_H > > -#define VMX_RUN_VMRESUME_SHIFT 0 > -#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 > +#define VMX_RUN_VMRESUME_SHIFT 0 > +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 > +#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2 > > -#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) > -#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) > +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) > +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) > +#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT) > > #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index f79604bc0127..27e870d83122 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -74,6 +74,8 @@ > #include "vmx_onhyperv.h" > #include "posted_intr.h" > > +#include "mmu/spte.h" > + > MODULE_AUTHOR("Qumranet"); > MODULE_DESCRIPTION("KVM support for VMX (Intel VT-x) extensions"); > MODULE_LICENSE("GPL"); > @@ -959,6 +961,10 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) > if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)) > flags |= VMX_RUN_SAVE_SPEC_CTRL; > > + if (static_branch_unlikely(&mmio_stale_data_clear) && > + kvm_vcpu_can_access_host_mmio(&vmx->vcpu)) > + flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO; > + > return flags; > } > > @@ -7282,7 +7288,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, > if (static_branch_unlikely(&vmx_l1d_should_flush)) > vmx_l1d_flush(vcpu); > else if (static_branch_unlikely(&mmio_stale_data_clear) && > - kvm_arch_has_assigned_device(vcpu->kvm)) > + (flags & VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO)) > mds_clear_cpu_buffers(); I think this also paves way for buffer clear for MDS and MMIO to be done at a single place. Please let me know if below is feasible: diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index 2f20fb170def..004fe1ca89f0 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -2,12 +2,12 @@ #ifndef __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H -#define VMX_RUN_VMRESUME_SHIFT 0 -#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT 2 +#define VMX_RUN_VMRESUME_SHIFT 0 +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 +#define VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT 2 -#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) -#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO_SHIFT) +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) +#define VMX_RUN_CLEAR_CPU_BUFFERS BIT(VMX_RUN_CLEAR_CPU_BUFFERS_SHIFT) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index f6986dee6f8c..ab602ce4967e 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -141,6 +141,8 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Check if vmlaunch or vmresume is needed */ bt $VMX_RUN_VMRESUME_SHIFT, %ebx + test $VMX_RUN_CLEAR_CPU_BUFFERS, %ebx + /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX mov VCPU_RDX(%_ASM_AX), %_ASM_DX @@ -161,8 +163,11 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX + /* Check EFLAGS.ZF from the VMX_RUN_CLEAR_CPU_BUFFERS bit test above */ + jz .Lskip_clear_cpu_buffers /* Clobbers EFLAGS.ZF */ CLEAR_CPU_BUFFERS +.Lskip_clear_cpu_buffers: /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ jnc .Lvmlaunch diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1e4790c8993a..1415aeea35f7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -958,9 +958,10 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)) flags |= VMX_RUN_SAVE_SPEC_CTRL; - if (static_branch_unlikely(&mmio_stale_data_clear) && - kvm_vcpu_can_access_host_mmio(&vmx->vcpu)) - flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO; + if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) || + (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_vcpu_can_access_host_mmio(&vmx->vcpu))) + flags |= VMX_RUN_CLEAR_CPU_BUFFERS; return flags; } @@ -7296,9 +7297,6 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mmio_stale_data_clear) && - (flags & VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO)) - mds_clear_cpu_buffers(); vmx_disable_fb_clear(vmx);