On 5/2/2025 4:34 AM, mlevitsk@xxxxxxxxxx wrote: > On Wed, 2025-04-23 at 17:51 +0800, Mi, Dapeng wrote: >> The shortlog "x86: KVM: VMX: Wrap GUEST_IA32_DEBUGCTL read/write with >> access functions" doesn't follow Sean's suggestion >> (https://github.com/kvm-x86/linux/blob/next/Documentation/process/maintainer-kvm-x86.rst#shortlog). >> Please modify. Thanks. >> >> >> On 4/16/2025 8:25 AM, Maxim Levitsky wrote: >>> Instead of reading and writing GUEST_IA32_DEBUGCTL vmcs field directly, >>> wrap the logic with get/set functions. >>> >>> Also move the checks that the guest's supplied value is valid to the new >>> 'set' function. >>> >>> In particular, the above change fixes a minor security issue in which L1 >>> hypervisor could set the GUEST_IA32_DEBUGCTL, and eventually the host's >>> MSR_IA32_DEBUGCTL to any value by performing a VM entry to L2 with >>> VM_ENTRY_LOAD_DEBUG_CONTROLS set. >>> >>> Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> >>> --- >>> arch/x86/kvm/vmx/nested.c | 15 +++++++--- >>> arch/x86/kvm/vmx/pmu_intel.c | 9 +++--- >>> arch/x86/kvm/vmx/vmx.c | 58 +++++++++++++++++++++++------------- >>> arch/x86/kvm/vmx/vmx.h | 3 ++ >>> 4 files changed, 57 insertions(+), 28 deletions(-) >>> >>> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >>> index e073e3008b16..b7686569ee09 100644 >>> --- a/arch/x86/kvm/vmx/nested.c >>> +++ b/arch/x86/kvm/vmx/nested.c >>> @@ -2641,6 +2641,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, >>> struct vcpu_vmx *vmx = to_vmx(vcpu); >>> struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx); >>> bool load_guest_pdptrs_vmcs12 = false; >>> + u64 new_debugctl; >>> >>> if (vmx->nested.dirty_vmcs12 || nested_vmx_is_evmptr12_valid(vmx)) { >>> prepare_vmcs02_rare(vmx, vmcs12); >>> @@ -2653,11 +2654,17 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, >>> if (vmx->nested.nested_run_pending && >>> (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { >>> kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); >>> - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); >>> + new_debugctl = vmcs12->guest_ia32_debugctl; >>> } else { >>> kvm_set_dr(vcpu, 7, vcpu->arch.dr7); >>> - vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl); >>> + new_debugctl = vmx->nested.pre_vmenter_debugctl; >>> } >>> + >>> + if (CC(!vmx_set_guest_debugctl(vcpu, new_debugctl, false))) { >>> + *entry_failure_code = ENTRY_FAIL_DEFAULT; >>> + return -EINVAL; >>> + } >>> + >>> if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || >>> !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) >>> vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs); >>> @@ -3520,7 +3527,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, >>> >>> if (!vmx->nested.nested_run_pending || >>> !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) >>> - vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); >>> + vmx->nested.pre_vmenter_debugctl = vmx_get_guest_debugctl(vcpu); >>> if (kvm_mpx_supported() && >>> (!vmx->nested.nested_run_pending || >>> !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) >>> @@ -4788,7 +4795,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, >>> __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR); >>> >>> kvm_set_dr(vcpu, 7, 0x400); >>> - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); >>> + vmx_set_guest_debugctl(vcpu, 0, false); >>> >>> if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, >>> vmcs12->vm_exit_msr_load_count)) >>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c >>> index 8a94b52c5731..f6f448adfb80 100644 >>> --- a/arch/x86/kvm/vmx/pmu_intel.c >>> +++ b/arch/x86/kvm/vmx/pmu_intel.c >>> @@ -19,6 +19,7 @@ >>> #include "lapic.h" >>> #include "nested.h" >>> #include "pmu.h" >>> +#include "vmx.h" >>> #include "tdx.h" >>> >>> /* >>> @@ -652,11 +653,11 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) >>> */ >>> static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu) >>> { >>> - u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL); >>> + u64 data = vmx_get_guest_debugctl(vcpu); >>> >>> if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) { >>> data &= ~DEBUGCTLMSR_LBR; >>> - vmcs_write64(GUEST_IA32_DEBUGCTL, data); >>> + vmx_set_guest_debugctl(vcpu, data, true); >> Two questions. >> >> 1. why to call vmx_set_guest_debugctl() to do the extra check? currently >> IA32_DEBUGCTL MSR is always intercepted and it's already checked at >> vmx_set_msr() and seems unnecessary to check here again. > Hi, > > > I wanted this to be consistent. KVM has plenty of functions that can be both > guest triggered and internally triggered. For example kvm_set_cr4() > > Besides the vmx_set_guest_debugctl also notes the value the guest wrote > to be able to return it back to the guest if we choose to overide some > bits of the MSR, so it made sense to have one common function to set the msr. > > Do you think that can affect performance? hmm, since only DEBUGCTLMSR_LBR bit is changed here, it's safe to skip this check and write guest debug_ctrl directly. I have no idea how much performance impact this check would bring in high sampling frequency, but why not to eliminate it if it can? > > >> 2. why the argument "host_initiated" is true? It looks the data is not from >> host. > This is my mistake. > >> >>> } >>> } >>> >>> @@ -729,7 +730,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) >>> >>> if (!lbr_desc->event) { >>> vmx_disable_lbr_msrs_passthrough(vcpu); >>> - if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR) >>> + if (vmx_get_guest_debugctl(vcpu) & DEBUGCTLMSR_LBR) >>> goto warn; >>> if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use)) >>> goto warn; >>> @@ -751,7 +752,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) >>> >>> static void intel_pmu_cleanup(struct kvm_vcpu *vcpu) >>> { >>> - if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) >>> + if (!(vmx_get_guest_debugctl(vcpu) & DEBUGCTLMSR_LBR)) >>> intel_pmu_release_guest_lbr_event(vcpu); >>> } >>> >>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >>> index ef2d7208dd20..4237422dc4ed 100644 >>> --- a/arch/x86/kvm/vmx/vmx.c >>> +++ b/arch/x86/kvm/vmx/vmx.c >>> @@ -2154,7 +2154,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >>> msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; >>> break; >>> case MSR_IA32_DEBUGCTLMSR: >>> - msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL); >>> + msr_info->data = vmx_get_guest_debugctl(vcpu); >>> break; >>> default: >>> find_uret_msr: >>> @@ -2194,6 +2194,41 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated >>> return debugctl; >>> } >>> >>> +u64 vmx_get_guest_debugctl(struct kvm_vcpu *vcpu) >>> +{ >>> + return vmcs_read64(GUEST_IA32_DEBUGCTL); >>> +} >>> + >>> +static void __vmx_set_guest_debugctl(struct kvm_vcpu *vcpu, u64 data) >>> +{ >>> + vmcs_write64(GUEST_IA32_DEBUGCTL, data); >>> +} >> IMO, it seems unnecessary to add these 2 wrappers since the original code >> is already intuitive enough and simple. But if you want, please add >> "inline" before these 2 wrappers. > The __vmx_set_guest_debugctl in the next patch will store the written value in > a field, this is why I did it this way. > > The vmx_get_guest_debugctl will read this value instead, also in the next patch. > > I thought it would be cleaner to first introduce the trivial wrappers and then > extend them. > >> >>> + >>> +bool vmx_set_guest_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) >> Since most of code in this function checks guest debugctl, better to rename >> it to "vmx_check_and_set_guest_debugctl". > I don't mind doing so. > >> >>> +{ >>> + u64 invalid = data & ~vmx_get_supported_debugctl(vcpu, host_initiated); >>> + >>> + if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) { >>> + kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data); >>> + data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); >>> + invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); >> Add space around above 3 "|". > I copied this code "as is" from the wrmsr code. I can add this though. > > Best regards, > Maxim Levitsky > >> >>> + } >>> + >>> + if (invalid) >>> + return false; >>> + >>> + if (is_guest_mode(vcpu) && (get_vmcs12(vcpu)->vm_exit_controls & >>> + VM_EXIT_SAVE_DEBUG_CONTROLS)) >>> + get_vmcs12(vcpu)->guest_ia32_debugctl = data; >>> + >>> + if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event && >>> + (data & DEBUGCTLMSR_LBR)) >>> + intel_pmu_create_guest_lbr_event(vcpu); >>> + >>> + __vmx_set_guest_debugctl(vcpu, data); >>> + return true; >>> +} >>> + >>> /* >>> * Writes msr value into the appropriate "register". >>> * Returns 0 on success, non-0 otherwise. >>> @@ -2263,26 +2298,9 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >>> vmcs_writel(GUEST_SYSENTER_ESP, data); >>> break; >>> case MSR_IA32_DEBUGCTLMSR: { >>> - u64 invalid; >>> - >>> - invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated); >>> - if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) { >>> - kvm_pr_unimpl_wrmsr(vcpu, msr_index, data); >>> - data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); >>> - invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); >>> - } >>> - >>> - if (invalid) >>> + if (!vmx_set_guest_debugctl(vcpu, data, msr_info->host_initiated)) >>> return 1; >>> >>> - if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & >>> - VM_EXIT_SAVE_DEBUG_CONTROLS) >>> - get_vmcs12(vcpu)->guest_ia32_debugctl = data; >>> - >>> - vmcs_write64(GUEST_IA32_DEBUGCTL, data); >>> - if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event && >>> - (data & DEBUGCTLMSR_LBR)) >>> - intel_pmu_create_guest_lbr_event(vcpu); >>> return 0; >>> } >>> case MSR_IA32_BNDCFGS: >>> @@ -4795,7 +4813,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) >>> vmcs_write32(GUEST_SYSENTER_CS, 0); >>> vmcs_writel(GUEST_SYSENTER_ESP, 0); >>> vmcs_writel(GUEST_SYSENTER_EIP, 0); >>> - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); >>> + __vmx_set_guest_debugctl(&vmx->vcpu, 0); >>> >>> if (cpu_has_vmx_tpr_shadow()) { >>> vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); >>> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h >>> index 6d1e40ecc024..8ac46fb47abd 100644 >>> --- a/arch/x86/kvm/vmx/vmx.h >>> +++ b/arch/x86/kvm/vmx/vmx.h >>> @@ -404,6 +404,9 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu); >>> >>> gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); >>> >>> +bool vmx_set_guest_debugctl(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); >>> +u64 vmx_get_guest_debugctl(struct kvm_vcpu *vcpu); >>> + >>> static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, >>> int type, bool value) >>> {