On 6/24/2025 9:27 AM, Sean Christopherson wrote:
+
+static u64 vmx_read_guest_fred_rsp0(struct vcpu_vmx *vmx)
+{
+ preempt_disable();
+ if (vmx->guest_state_loaded)
+ vmx->msr_guest_fred_rsp0 = read_msr(MSR_IA32_FRED_RSP0);
+ preempt_enable();
+ return vmx->msr_guest_fred_rsp0;
+}
+
+static void vmx_write_guest_fred_rsp0(struct vcpu_vmx *vmx, u64 data)
+{
+ preempt_disable();
+ if (vmx->guest_state_loaded)
+ wrmsrns(MSR_IA32_FRED_RSP0, data);
+ preempt_enable();
+ vmx->msr_guest_fred_rsp0 = data;
+}
#endif
Maybe add helpers to deal with the preemption stuff? Oh, never mind, FRED
This is a good idea.
Do you want to upstream the following patch?
So I can rebase this patch on top of it in the next iteration.
uses WRMSRNS. Hmm, actually, can't these all be non-serializing? KVM is
progating *guest* values to hardware, so a VM-Enter is guaranteed before the
CPU value can be consumed.
I see your point. It seems that only a new MSR write instruction could
achieve this: consistently performing a non-serializing write to a MSR
with the assumption that the target is a guest MSR. So software needs
to explicitly specify the type, host or guest, of the target MSR to the CPU.
(WRMSRNS writes to an MSR in either a serializing or non-serializing
manner, only based on its index.)
#ifdef CONFIG_X86_64
static u64 vmx_read_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 *cache)
{
preempt_disable();
if (vmx->guest_state_loaded)
*cache = read_msr(msr);
preempt_enable();
return *cache;
}
static u64 vmx_write_guest_host_msr(struct vcpu_vmx *vmx, u32 msr, u64 data,
u64 *cache)
{
preempt_disable();
if (vmx->guest_state_loaded)
wrmsrns(MSR_KERNEL_GS_BASE, data);
preempt_enable();
*cache = data;
}
static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
{
return vmx_read_guest_host_msr(vmx, MSR_KERNEL_GS_BASE,
&vmx->msr_guest_kernel_gs_base);
}
static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
{
vmx_write_guest_host_msr(vmx, MSR_KERNEL_GS_BASE, data,
&vmx->msr_guest_kernel_gs_base);
}
static u64 vmx_read_guest_fred_rsp0(struct vcpu_vmx *vmx)
{
return vmx_read_guest_host_msr(vmx, MSR_IA32_FRED_RSP0,
&vmx->msr_guest_fred_rsp0);
}
static void vmx_write_guest_fred_rsp0(struct vcpu_vmx *vmx, u64 data)
{
return vmx_write_guest_host_msr(vmx, MSR_IA32_FRED_RSP0, data,
&vmx->msr_guest_fred_rsp0);
}
#endif
+#ifdef CONFIG_X86_64
+static u32 fred_msr_vmcs_fields[] = {
This should be const.
Will add.
+ GUEST_IA32_FRED_RSP1,
+ GUEST_IA32_FRED_RSP2,
+ GUEST_IA32_FRED_RSP3,
+ GUEST_IA32_FRED_STKLVLS,
+ GUEST_IA32_FRED_SSP1,
+ GUEST_IA32_FRED_SSP2,
+ GUEST_IA32_FRED_SSP3,
+ GUEST_IA32_FRED_CONFIG,
+};
I think it also makes sense to add a static_assert() here, more so to help
readers follow along than anything else.
static_assert(MSR_IA32_FRED_CONFIG - MSR_IA32_FRED_RSP1 ==
ARRAY_SIZE(fred_msr_vmcs_fields) - 1);
Good idea!
I tried to make fred_msr_to_vmcs() fail at build time, but couldn’t get
it to work.
+
+static u32 fred_msr_to_vmcs(u32 msr)
+{
+ return fred_msr_vmcs_fields[msr - MSR_IA32_FRED_RSP1];
+}
+#endif
+
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
@@ -1849,6 +1852,23 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
data = (u32)data;
break;
+ case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_FRED))
+ return 1;
Yeesh, this is a bit of a no-win situation. Having to re-check the MSR index is
no fun, but the amount of overlap between MSRs is significant, i.e. I see why you
bundled everything together. Ugh, and MSR_IA32_FRED_STKLVLS is buried smack dab
in the middle of everything.
+
+ /* Bit 11, bits 5:4, and bit 2 of the IA32_FRED_CONFIG must be zero */
Eh, the comment isn't helping much. If we want to add more documentation, add
#defines. But I think we can documented the reserved behavior while also tidying
up the code a bit.
After much fiddling, how about this?
case MSR_IA32_FRED_STKLVLS:
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_FRED))
return 1;
break;
case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_RSP3:
case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_CONFIG: {
u64 reserved_bits;
if (!guest_cpu_cap_has(vcpu, X86_FEATURE_FRED))
return 1;
if (is_noncanonical_msr_address(data, vcpu))
return 1;
switch (index) {
case MSR_IA32_FRED_CONFIG:
reserved_bits = BIT_ULL(11) | GENMASK_ULL(5, 4) | BIT_ULL(2);
break;
case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_RSP3:
reserved_bits = GENMASK_ULL(5, 0);
break;
case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_SSP3:
reserved_bits = GENMASK_ULL(2, 0);
break;
default:
WARN_ON_ONCE(1);
return 1;
}
if (data & reserved_bits)
return 1;
break;
}
Easier to read, I will use it :)
Thanks!
Xin