On Thu, Mar 13, 2025, Jon Kohler wrote: > @@ -28,6 +28,7 @@ u64 __read_mostly shadow_host_writable_mask; > u64 __read_mostly shadow_mmu_writable_mask; > u64 __read_mostly shadow_nx_mask; > u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ > +u64 __read_mostly shadow_ux_mask; > u64 __read_mostly shadow_user_mask; > u64 __read_mostly shadow_accessed_mask; > u64 __read_mostly shadow_dirty_mask; > @@ -313,8 +314,14 @@ u64 make_huge_page_split_spte(struct kvm *kvm, u64 huge_spte, > * the page executable as the NX hugepage mitigation no longer > * applies. > */ > - if ((role.access & ACC_EXEC_MASK) && is_nx_huge_page_enabled(kvm)) > + if ((role.access & ACC_EXEC_MASK) && is_nx_huge_page_enabled(kvm)) { This is wrong, and probably so is every other chunk of KVM that looks at ACC_EXEC_MASK. E.g. if a guest hugepage is executable for user but not supervisor, KVM will fail to make the small child user-executable. The bug in make_spte() is even worse, because KVM would let an MBEC-aware guest trigger the iTLB multi-hit #MC. > child_spte = make_spte_executable(child_spte); > + // TODO: For LKML: switch to vcpu->arch.pt_guest_exec_control? up > + // for suggestions on how best to toggle this. No, it belongs in the role. > + if (enable_pt_guest_exec_control && > + role.access & ACC_USER_EXEC_MASK) > + child_spte |= shadow_ux_mask; > + } > } > > return child_spte; > @@ -326,7 +333,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled) > u64 spte = SPTE_MMU_PRESENT_MASK; > > spte |= __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK | > - shadow_user_mask | shadow_x_mask | shadow_me_value; > + shadow_user_mask | shadow_x_mask | shadow_ux_mask | shadow_me_value; > > if (ad_disabled) > spte |= SPTE_TDP_AD_DISABLED; > @@ -420,7 +427,8 @@ void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask) > } > EXPORT_SYMBOL_GPL(kvm_mmu_set_me_spte_mask); > > -void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) > +void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only, > + bool has_guest_exec_ctrl) > { > shadow_user_mask = VMX_EPT_READABLE_MASK; > shadow_accessed_mask = has_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull; > @@ -428,8 +436,14 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) > shadow_nx_mask = 0ull; > shadow_x_mask = VMX_EPT_EXECUTABLE_MASK; > /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */ > + // For LKML Review: > + // Do we need to modify shadow_present_mask in the MBEC case? No, because MBEC bifurcates X, it doesn't change whether or not an EPTE can be X without being R. From the SDM: 1. If the “mode-based execute control for EPT” VM-execution control is 1, setting bit 0 indicates also that software may also configure EPT paging-structure entries in which bits 1:0 are both clear and in which bit 10 is set (indicating a translation that can be used to fetch instructions from a supervisor-mode linear address or a user-mode linear address). > shadow_present_mask = > (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT; > + > + shadow_ux_mask = > + has_guest_exec_ctrl ? VMX_EPT_USER_EXECUTABLE_MASK : 0ull; This is EPT specific code, just call this what it is: shadow_ux_mask = has_mbec ? VMX_EPT_USER_EXECUTABLE_MASK : 0ull; > + > /* > * EPT overrides the host MTRRs, and so KVM must program the desired > * memtype directly into the SPTEs. Note, this mask is just the mask > @@ -484,6 +498,7 @@ void kvm_mmu_reset_all_pte_masks(void) > shadow_dirty_mask = PT_DIRTY_MASK; > shadow_nx_mask = PT64_NX_MASK; > shadow_x_mask = 0; > + shadow_ux_mask = 0; > shadow_present_mask = PT_PRESENT_MASK; > > /* > diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h > index d9e22133b6d0..dc2f0dc9c46e 100644 > --- a/arch/x86/kvm/mmu/spte.h > +++ b/arch/x86/kvm/mmu/spte.h > @@ -171,6 +171,7 @@ extern u64 __read_mostly shadow_mmu_writable_mask; > extern u64 __read_mostly shadow_nx_mask; > extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ > extern u64 __read_mostly shadow_user_mask; > +extern u64 __read_mostly shadow_ux_mask; > extern u64 __read_mostly shadow_accessed_mask; > extern u64 __read_mostly shadow_dirty_mask; > extern u64 __read_mostly shadow_mmio_value; > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 0aadfa924045..d16e3f170258 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -8544,7 +8544,8 @@ __init int vmx_hardware_setup(void) > > if (enable_ept) > kvm_mmu_set_ept_masks(enable_ept_ad_bits, > - cpu_has_vmx_ept_execute_only()); > + cpu_has_vmx_ept_execute_only(), > + enable_pt_guest_exec_control); Without the module param, just cpu_has_vmx_mbec().