>@@ -4479,6 +4543,9 @@ static bool is_vmcs12_ext_field(unsigned long field) > case GUEST_IDTR_BASE: > case GUEST_PENDING_DBG_EXCEPTIONS: > case GUEST_BNDCFGS: >+ case GUEST_S_CET: >+ case GUEST_SSP: >+ case GUEST_INTR_SSP_TABLE: > return true; > default: > break; >@@ -4529,6 +4596,10 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, > vmcs12->guest_pending_dbg_exceptions = > vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); > >+ cet_vmcs_fields_get(&vmx->vcpu, &vmcs12->guest_s_cet, >+ &vmcs12->guest_ssp, >+ &vmcs12->guest_ssp_tbl); >+ > vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false; > } > >@@ -4760,6 +4831,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, > if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) > vmcs_write64(GUEST_BNDCFGS, 0); > >+ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE) >+ cet_vmcs_fields_set(vcpu, vmcs12->host_s_cet, vmcs12->host_ssp, >+ vmcs12->host_ssp_tbl); >+ Xin wrote a new test [*] and found a bug here. If VM_EXIT_LOAD_CET_STATE is not set, the guest values should be retained after the nested vm-exit. so here should be if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE) cet_vmcs_fields_set(vcpu, vmcs12->host_s_cet, vmcs12->host_ssp, vmcs12->host_ssp_tbl); else cet_vmcs_fields_set(vcpu, vmcs12->guest_s_cet, vmcs12->guest_ssp, vmcs12->guest_ssp_tbl); This creates a dependency that vmcs12->guest_s_cet/ssp/ssp_tbl must be up-to-date here. So, vmcs12->guest_s_cet/ssp/ssp_tbl should be synced from vmcs02 on each nested VM-exit rather than lazily in sync_vmcs02_to_vmcs12_rare(). Specifically, the cet_vmcs_fields_get() in sync_vmcs02_to_vmcs12_rare() should be moved to sync_vmcs02_to_vmcs12(), and is_vmcs12_ext_field() should return false for CET VMCS fields. Note that Xin's test differs from the test I wrote [**] in L2 guest behavior. His test writes to the S_CET MSR, while my test reads the S_CET MSR, which is why this bug escaped from my CET test. [*]: https://github.com/xinli-intel/kvm-unit-tests/commit/f1df81c3189a3328adb47c7dd6cd985830fe738f [**]: https://lore.kernel.org/kvm/20250626073459.12990-9-minipli@xxxxxxxxxxxxxx/ below diff can fix this issue: diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7c88fedc27c7..eadd659ae22f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4596,9 +4596,6 @@ static bool is_vmcs12_ext_field(unsigned long field) case GUEST_IDTR_BASE: case GUEST_PENDING_DBG_EXCEPTIONS: case GUEST_BNDCFGS: - case GUEST_S_CET: - case GUEST_SSP: - case GUEST_INTR_SSP_TABLE: return true; default: break; @@ -4649,10 +4646,6 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); - cet_vmcs_fields_get(&vmx->vcpu, &vmcs12->guest_s_cet, - &vmcs12->guest_ssp, - &vmcs12->guest_ssp_tbl); - vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false; } @@ -4759,6 +4752,10 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) vmcs12->guest_ia32_efer = vcpu->arch.efer; + + cet_vmcs_fields_get(&vmx->vcpu, &vmcs12->guest_s_cet, + &vmcs12->guest_ssp, + &vmcs12->guest_ssp_tbl); } /* @@ -4884,9 +4881,17 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) vmcs_write64(GUEST_BNDCFGS, 0); + /* + * Load CET state from host state if VM_EXIT_LOAD_CET_STATE is set. + * otherwise CET state should be retained across VM-exit, i.e., + * guest values should be propagated from vmcs12 to vmcs01. + */ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE) cet_vmcs_fields_set(vcpu, vmcs12->host_s_cet, vmcs12->host_ssp, vmcs12->host_ssp_tbl); + else + cet_vmcs_fields_set(vcpu, vmcs12->guest_s_cet, vmcs12->guest_ssp, + vmcs12->guest_ssp_tbl); if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);