On Fri, Mar 28, 2025, Xin Li (Intel) wrote: > From: Xin Li <xin3.li@xxxxxxxxx> > > Handle FRED MSR access requests, allowing FRED context to be set/get > from both host and guest. > > During VM save/restore and live migration, FRED context needs to be > saved/restored, which requires FRED MSRs to be accessed from userspace, > e.g., Qemu. > > Note, handling of MSR_IA32_FRED_SSP0, i.e., MSR_IA32_PL0_SSP, is not > added yet, which is done in the KVM CET patch set. > > Signed-off-by: Xin Li <xin3.li@xxxxxxxxx> > Signed-off-by: Xin Li (Intel) <xin@xxxxxxxxx> > Tested-by: Shan Kang <shan.kang@xxxxxxxxx> > --- > > Changes since v2: > * Add a helper to convert FRED MSR index to VMCS field encoding to > make the code more compact (Chao Gao). > * Get rid of the "host_initiated" check because userspace has to set > CPUID before MSRs (Chao Gao & Sean Christopherson). > * Address a few cleanup comments (Sean Christopherson). > > Changes since v1: > * Use kvm_cpu_cap_has() instead of cpu_feature_enabled() (Chao Gao). > * Fail host requested FRED MSRs access if KVM cannot virtualize FRED > (Chao Gao). > * Handle the case FRED MSRs are valid but KVM cannot virtualize FRED > (Chao Gao). > * Add sanity checks when writing to FRED MSRs. > --- > arch/x86/kvm/vmx/vmx.c | 48 ++++++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.c | 28 ++++++++++++++++++++++++ > 2 files changed, 76 insertions(+) > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 1fd32aa255f9..ae9712624413 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -1426,6 +1426,24 @@ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) > preempt_enable(); > vmx->msr_guest_kernel_gs_base = data; > } > + > +static u64 vmx_read_guest_fred_rsp0(struct vcpu_vmx *vmx) > +{ > + preempt_disable(); > + if (vmx->guest_state_loaded) > + vmx->msr_guest_fred_rsp0 = read_msr(MSR_IA32_FRED_RSP0); > + preempt_enable(); > + return vmx->msr_guest_fred_rsp0; > +} > + > +static void vmx_write_guest_fred_rsp0(struct vcpu_vmx *vmx, u64 data) > +{ > + preempt_disable(); > + if (vmx->guest_state_loaded) > + wrmsrns(MSR_IA32_FRED_RSP0, data); > + preempt_enable(); > + vmx->msr_guest_fred_rsp0 = data; > +} > #endif Maybe add helpers to deal with the preemption stuff? Oh, never mind, FRED uses WRMSRNS. Hmm, actually, can't these all be non-serializing? KVM is progating *guest* values to hardware, so a VM-Enter is guaranteed before the CPU value can be consumed. #ifdef CONFIG_X86_64 static u64 vmx_read_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 *cache) { preempt_disable(); if (vmx->guest_state_loaded) *cache = read_msr(msr); preempt_enable(); return *cache; } static u64 vmx_write_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 data, u64 *cache) { preempt_disable(); if (vmx->guest_state_loaded) wrmsrns(MSR_KERNEL_GS_BASE, data); preempt_enable(); *cache = data; } static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) { return vmx_read_guest_host_msr(vmx, MSR_KERNEL_GS_BASE, &vmx->msr_guest_kernel_gs_base); } static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) { vmx_write_guest_host_msr(vmx, MSR_KERNEL_GS_BASE, data, &vmx->msr_guest_kernel_gs_base); } static u64 vmx_read_guest_fred_rsp0(struct vcpu_vmx *vmx) { return vmx_read_guest_host_msr(vmx, MSR_IA32_FRED_RSP0, &vmx->msr_guest_fred_rsp0); } static void vmx_write_guest_fred_rsp0(struct vcpu_vmx *vmx, u64 data) { return vmx_write_guest_host_msr(vmx, MSR_IA32_FRED_RSP0, data, &vmx->msr_guest_fred_rsp0); } #endif > static void grow_ple_window(struct kvm_vcpu *vcpu) > @@ -2039,6 +2057,24 @@ int vmx_get_feature_msr(u32 msr, u64 *data) > } > } > > +#ifdef CONFIG_X86_64 > +static u32 fred_msr_vmcs_fields[] = { This should be const. > + GUEST_IA32_FRED_RSP1, > + GUEST_IA32_FRED_RSP2, > + GUEST_IA32_FRED_RSP3, > + GUEST_IA32_FRED_STKLVLS, > + GUEST_IA32_FRED_SSP1, > + GUEST_IA32_FRED_SSP2, > + GUEST_IA32_FRED_SSP3, > + GUEST_IA32_FRED_CONFIG, > +}; I think it also makes sense to add a static_assert() here, more so to help readers follow along than anything else. static_assert(MSR_IA32_FRED_CONFIG - MSR_IA32_FRED_RSP1 == ARRAY_SIZE(fred_msr_vmcs_fields) - 1); > + > +static u32 fred_msr_to_vmcs(u32 msr) > +{ > + return fred_msr_vmcs_fields[msr - MSR_IA32_FRED_RSP1]; > +} > +#endif > + > /* > * Reads an msr value (of 'msr_info->index') into 'msr_info->data'. > * Returns 0 on success, non-0 otherwise. > @@ -2061,6 +2097,12 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > case MSR_KERNEL_GS_BASE: > msr_info->data = vmx_read_guest_kernel_gs_base(vmx); > break; > + case MSR_IA32_FRED_RSP0: > + msr_info->data = vmx_read_guest_fred_rsp0(vmx); > + break; > + case MSR_IA32_FRED_RSP1 ... MSR_IA32_FRED_CONFIG: > + msr_info->data = vmcs_read64(fred_msr_to_vmcs(msr_info->index)); > + break; > #endif > case MSR_EFER: > return kvm_get_msr_common(vcpu, msr_info); > @@ -2268,6 +2310,12 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > vmx_update_exception_bitmap(vcpu); > } > break; > + case MSR_IA32_FRED_RSP0: > + vmx_write_guest_fred_rsp0(vmx, data); > + break; > + case MSR_IA32_FRED_RSP1 ... MSR_IA32_FRED_CONFIG: > + vmcs_write64(fred_msr_to_vmcs(msr_index), data); > + break; > #endif > case MSR_IA32_SYSENTER_CS: > if (is_guest_mode(vcpu)) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index c841817a914a..007577143337 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -318,6 +318,9 @@ static const u32 msrs_to_save_base[] = { > MSR_STAR, > #ifdef CONFIG_X86_64 > MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, > + MSR_IA32_FRED_RSP0, MSR_IA32_FRED_RSP1, MSR_IA32_FRED_RSP2, > + MSR_IA32_FRED_RSP3, MSR_IA32_FRED_STKLVLS, MSR_IA32_FRED_SSP1, > + MSR_IA32_FRED_SSP2, MSR_IA32_FRED_SSP3, MSR_IA32_FRED_CONFIG, > #endif > MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, > MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, > @@ -1849,6 +1852,23 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, > > data = (u32)data; > break; > + case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG: > + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_FRED)) > + return 1; Yeesh, this is a bit of a no-win situation. Having to re-check the MSR index is no fun, but the amount of overlap between MSRs is significant, i.e. I see why you bundled everything together. Ugh, and MSR_IA32_FRED_STKLVLS is buried smack dab in the middle of everything. > + > + /* Bit 11, bits 5:4, and bit 2 of the IA32_FRED_CONFIG must be zero */ Eh, the comment isn't helping much. If we want to add more documentation, add #defines. But I think we can documented the reserved behavior while also tidying up the code a bit. After much fiddling, how about this? case MSR_IA32_FRED_STKLVLS: if (!guest_cpu_cap_has(vcpu, X86_FEATURE_FRED)) return 1; break; case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_RSP3: case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_CONFIG: { u64 reserved_bits; if (!guest_cpu_cap_has(vcpu, X86_FEATURE_FRED)) return 1; if (is_noncanonical_msr_address(data, vcpu)) return 1; switch (index) { case MSR_IA32_FRED_CONFIG: reserved_bits = BIT_ULL(11) | GENMASK_ULL(5, 4) | BIT_ULL(2); break; case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_RSP3: reserved_bits = GENMASK_ULL(5, 0); break; case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_SSP3: reserved_bits = GENMASK_ULL(2, 0); break; default: WARN_ON_ONCE(1); return 1; } if (data & reserved_bits) return 1; break; } > @@ -1893,6 +1913,10 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, > !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID)) > return 1; > break; > + case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG: > + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_FRED)) > + return 1; > + break; > } > > msr.index = index; > @@ -7455,6 +7479,10 @@ static void kvm_probe_msr_to_save(u32 msr_index) > if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR)) > return; > break; > + case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG: > + if (!kvm_cpu_cap_has(X86_FEATURE_FRED)) > + return; > + break; > default: > break; > } > -- > 2.48.1 >