Currently, the CPPC Frequency Invariance Engine (FIE) is invoked from the scheduler tick but defers the update of arch_freq_scale to a separate thread because cppc_get_perf_ctrs() would sleep if the CPC regs are in PCC. However, this deferred update mechanism is unnecessary and introduces extra overhead for non-PCC register spaces (e.g. System Memory or FFH), where accessing the regs won't sleep and can be safely performed from the tick context. Also, reading perf counters of a remote CPU may return 0 if it's in a low-power idle state, e.g. power down or reset. Update arch_freq_scale directly in ticks for non-PCC regs and keep the deferred update mechanism for PCC regs. Signed-off-by: Jie Zhan <zhanjie9@xxxxxxxxxxxxx> --- drivers/cpufreq/cppc_cpufreq.c | 52 +++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 7724318b3415..66d74b062ceb 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -55,31 +55,24 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, struct cppc_perf_fb_ctrs *fb_ctrs_t1); /** - * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance - * @work: The work item. + * __cppc_scale_freq_tick - CPPC arch_freq_scale updater for frequency invariance + * @cppc_fi: per-cpu CPPC FIE data. * - * The CPPC driver register itself with the topology core to provide its own + * The CPPC driver registers itself with the topology core to provide its own * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which * gets called by the scheduler on every tick. * * Note that the arch specific counters have higher priority than CPPC counters, * if available, though the CPPC driver doesn't need to have any special * handling for that. - * - * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we - * reach here from hard-irq context), which then schedules a normal work item - * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable - * based on the counter updates since the last tick. */ -static void cppc_scale_freq_workfn(struct kthread_work *work) +static void __cppc_scale_freq_tick(struct cppc_freq_invariance *cppc_fi) { - struct cppc_freq_invariance *cppc_fi; struct cppc_perf_fb_ctrs fb_ctrs = {0}; struct cppc_cpudata *cpu_data; unsigned long local_freq_scale; u64 perf; - cppc_fi = container_of(work, struct cppc_freq_invariance, work); cpu_data = cppc_fi->cpu_data; if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) { @@ -104,6 +97,14 @@ static void cppc_scale_freq_workfn(struct kthread_work *work) per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale; } +static void cppc_scale_freq_workfn(struct kthread_work *work) +{ + struct cppc_freq_invariance *cppc_fi; + + cppc_fi = container_of(work, struct cppc_freq_invariance, work); + __cppc_scale_freq_tick(cppc_fi); +} + static void cppc_irq_work(struct irq_work *irq_work) { struct cppc_freq_invariance *cppc_fi; @@ -112,7 +113,14 @@ static void cppc_irq_work(struct irq_work *irq_work) kthread_queue_work(kworker_fie, &cppc_fi->work); } -static void cppc_scale_freq_tick(void) +/* + * Reading perf counters may sleep if the CPC regs are in PCC. Thus, we + * schedule an irq work in scale_freq_tick (since we reach here from hard-irq + * context), which then schedules a normal work item cppc_scale_freq_workfn() + * that updates the per_cpu arch_freq_scale variable based on the counter + * updates since the last tick. + */ +static void cppc_scale_freq_tick_pcc(void) { struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id()); @@ -123,6 +131,11 @@ static void cppc_scale_freq_tick(void) irq_work_queue(&cppc_fi->irq_work); } +static void cppc_scale_freq_tick(void) +{ + __cppc_scale_freq_tick(&per_cpu(cppc_freq_inv, smp_processor_id())); +} + static struct scale_freq_data cppc_sftd = { .source = SCALE_FREQ_SOURCE_CPPC, .set_freq_scale = cppc_scale_freq_tick, @@ -140,8 +153,10 @@ static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy) cppc_fi = &per_cpu(cppc_freq_inv, cpu); cppc_fi->cpu = cpu; cppc_fi->cpu_data = policy->driver_data; - kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn); - init_irq_work(&cppc_fi->irq_work, cppc_irq_work); + if (cppc_perf_ctrs_in_pcc()) { + kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn); + init_irq_work(&cppc_fi->irq_work, cppc_irq_work); + } ret = cppc_get_perf_ctrs(cpu, &cppc_fi->prev_perf_fb_ctrs); if (ret && cpu_online(cpu)) { @@ -174,6 +189,9 @@ static void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy) /* policy->cpus will be empty here, use related_cpus instead */ topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, policy->related_cpus); + if (!cppc_perf_ctrs_in_pcc()) + return; + for_each_cpu(cpu, policy->related_cpus) { cppc_fi = &per_cpu(cppc_freq_inv, cpu); irq_work_sync(&cppc_fi->irq_work); @@ -206,9 +224,11 @@ static void __init cppc_freq_invariance_init(void) } } - if (fie_disabled) + if (fie_disabled || !cppc_perf_ctrs_in_pcc()) return; + cppc_sftd.set_freq_scale = cppc_scale_freq_tick_pcc; + kworker_fie = kthread_run_worker(0, "cppc_fie"); if (IS_ERR(kworker_fie)) { pr_warn("%s: failed to create kworker_fie: %ld\n", __func__, @@ -228,7 +248,7 @@ static void __init cppc_freq_invariance_init(void) static void cppc_freq_invariance_exit(void) { - if (fie_disabled) + if (fie_disabled || !cppc_perf_ctrs_in_pcc()) return; kthread_destroy_worker(kworker_fie); -- 2.33.0