In Lazy switch, the function event_sched_out() will be called. This function deletes the perf event of the task being scheduled out, causing the active_mask in cpu_hw_events to be cleared. In AMD's NMI handler, if the bit corresponding to active_mask is not set, the CPU will not handle the NMI event, ultimately triggering an unknown NMI error. Additionally, event_sched_out() may call amd_pmu_wait_on_overflow(), leading to a busy wait of up to 50us during lazy switch. This patch adds two per_cpu variables. rpal_nmi_handle is set when an NMI occurs. When encountering an unknown NMI, this NMI is skipped. rpal_nmi is set before lazy switch and cleared after lazy switch, preventing the busy wait caused by amd_pmu_wait_on_overflow(). Signed-off-by: Bo Li <libo.gcs85@xxxxxxxxxxxxx> --- arch/x86/events/amd/core.c | 14 ++++++++++++++ arch/x86/kernel/nmi.c | 20 ++++++++++++++++++++ arch/x86/rpal/core.c | 17 ++++++++++++++++- 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index b20661b8621d..633a9ac4e77c 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -719,6 +719,10 @@ static void amd_pmu_wait_on_overflow(int idx) } } +#ifdef CONFIG_RPAL +DEFINE_PER_CPU(bool, rpal_nmi); +#endif + static void amd_pmu_check_overflow(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -732,6 +736,11 @@ static void amd_pmu_check_overflow(void) if (in_nmi()) return; +#ifdef CONFIG_RPAL + if (this_cpu_read(rpal_nmi)) + return; +#endif + /* * Check each counter for overflow and wait for it to be reset by the * NMI if it has overflowed. This relies on the fact that all active @@ -807,6 +816,11 @@ static void amd_pmu_disable_event(struct perf_event *event) if (in_nmi()) return; +#ifdef CONFIG_RPAL + if (this_cpu_read(rpal_nmi)) + return; +#endif + amd_pmu_wait_on_overflow(event->hw.idx); } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index be93ec7255bf..dd72b6d1c7f9 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -351,12 +351,23 @@ NOKPROBE_SYMBOL(unknown_nmi_error); static DEFINE_PER_CPU(bool, swallow_nmi); static DEFINE_PER_CPU(unsigned long, last_nmi_rip); +#ifdef CONFIG_RPAL +DEFINE_PER_CPU(bool, rpal_nmi_handle); +#endif static noinstr void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int handled; bool b2b = false; +#ifdef CONFIG_RPAL + bool rpal_handle = false; + + if (__this_cpu_read(rpal_nmi_handle)) { + __this_cpu_write(rpal_nmi_handle, false); + rpal_handle = true; + } +#endif /* * Back-to-back NMIs are detected by comparing the RIP of the @@ -471,6 +482,15 @@ static noinstr void default_do_nmi(struct pt_regs *regs) */ if (b2b && __this_cpu_read(swallow_nmi)) __this_cpu_add(nmi_stats.swallow, 1); +#ifdef CONFIG_RPAL + /* + * Lazy switch may clear the bit in active_mask, causing + * nmi event not handled. This will lead to unknown nmi, + * try to avoid this. + */ + else if (rpal_handle) + goto out; +#endif else unknown_nmi_error(reason, regs); diff --git a/arch/x86/rpal/core.c b/arch/x86/rpal/core.c index 6a22b9faa100..92281b557a6c 100644 --- a/arch/x86/rpal/core.c +++ b/arch/x86/rpal/core.c @@ -376,11 +376,26 @@ rpal_exception_context_switch(struct pt_regs *regs) return next; } +DECLARE_PER_CPU(bool, rpal_nmi_handle); +DECLARE_PER_CPU(bool, rpal_nmi); __visible struct task_struct *rpal_nmi_context_switch(struct pt_regs *regs) { struct task_struct *next; - next = rpal_kernel_context_switch(regs); + if (rpal_test_current_thread_flag(RPAL_LAZY_SWITCHED_BIT)) + rpal_update_fsbase(regs); + + next = rpal_misidentify(); + if (unlikely(next != NULL)) { + next = rpal_fix_critical_section(next, regs); + if (next) { + __this_cpu_write(rpal_nmi_handle, true); + /* avoid wait in amd_pmu_check_overflow */ + __this_cpu_write(rpal_nmi, true); + next = rpal_do_kernel_context_switch(next, regs); + __this_cpu_write(rpal_nmi, false); + } + } return next; } -- 2.20.1