On Mon, Apr 29, 2019 at 11:34 AM Viresh Kumar <viresh.kumar@xxxxxxxxxx> wrote: > > Currently the notifiers are called once for each CPU of the policy->cpus > cpumask. It would be more optimal if the notifier can be called only > once and all the relevant information be provided to it. IMO this also is a matter of I/F clarity, at least for policies with more than one CPU, because some actions may only be applicable to the policy CPU in principle and the notifiers are only passed a CPU number, so they may need to get to the policy and find the policy CPU in it which is pointless. > Out of the 23 drivers that register for the transition notifiers today, only 4 of them > do per-cpu updates and the callback for the rest can be called only once > for the policy without any impact. > > This would also avoid multiple function calls to the notifier callbacks > and reduce multiple iterations of notifier core's code (which does > locking as well). > > This patch adds pointer to the cpufreq policy to the struct > cpufreq_freqs, so the notifier callback has all the information > available to it with a single call. The five drivers which perform > per-cpu updates are updated to use the cpufreq policy. The freqs->cpu > field is redundant now and is removed. > > Acked-by: David S. Miller <davem@xxxxxxxxxxxxx> (sparc) > Signed-off-by: Viresh Kumar <viresh.kumar@xxxxxxxxxx> I'm going to queue up this one unless there are concerns and/or objections from arch or KVM maintainers. > --- > V3->V4: > - rebased over pm/linux-next > - tsc.c's diff has changed due to rafael's patch present in > pm/linux-next. > - Minor update in commit-log. > > arch/arm/kernel/smp.c | 24 +++++++++++++++--------- > arch/sparc/kernel/time_64.c | 28 ++++++++++++++++------------ > arch/x86/kernel/tsc.c | 2 +- > arch/x86/kvm/x86.c | 31 ++++++++++++++++++++----------- > drivers/cpufreq/cpufreq.c | 19 ++++++++++--------- > include/linux/cpufreq.h | 14 +++++++------- > 6 files changed, 69 insertions(+), 49 deletions(-) > > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > index facd4240ca02..c6d37563610a 100644 > --- a/arch/arm/kernel/smp.c > +++ b/arch/arm/kernel/smp.c > @@ -754,15 +754,20 @@ static int cpufreq_callback(struct notifier_block *nb, > unsigned long val, void *data) > { > struct cpufreq_freqs *freq = data; > - int cpu = freq->cpu; > + struct cpumask *cpus = freq->policy->cpus; > + int cpu, first = cpumask_first(cpus); > + unsigned int lpj; > > if (freq->flags & CPUFREQ_CONST_LOOPS) > return NOTIFY_OK; > > - if (!per_cpu(l_p_j_ref, cpu)) { > - per_cpu(l_p_j_ref, cpu) = > - per_cpu(cpu_data, cpu).loops_per_jiffy; > - per_cpu(l_p_j_ref_freq, cpu) = freq->old; > + if (!per_cpu(l_p_j_ref, first)) { > + for_each_cpu(cpu, cpus) { > + per_cpu(l_p_j_ref, cpu) = > + per_cpu(cpu_data, cpu).loops_per_jiffy; > + per_cpu(l_p_j_ref_freq, cpu) = freq->old; > + } > + > if (!global_l_p_j_ref) { > global_l_p_j_ref = loops_per_jiffy; > global_l_p_j_ref_freq = freq->old; > @@ -774,10 +779,11 @@ static int cpufreq_callback(struct notifier_block *nb, > loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, > global_l_p_j_ref_freq, > freq->new); > - per_cpu(cpu_data, cpu).loops_per_jiffy = > - cpufreq_scale(per_cpu(l_p_j_ref, cpu), > - per_cpu(l_p_j_ref_freq, cpu), > - freq->new); > + > + lpj = cpufreq_scale(per_cpu(l_p_j_ref, first), > + per_cpu(l_p_j_ref_freq, first), freq->new); > + for_each_cpu(cpu, cpus) > + per_cpu(cpu_data, cpu).loops_per_jiffy = lpj; > } > return NOTIFY_OK; > } > diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c > index 3eb77943ce12..89fb05f90609 100644 > --- a/arch/sparc/kernel/time_64.c > +++ b/arch/sparc/kernel/time_64.c > @@ -653,19 +653,23 @@ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val > void *data) > { > struct cpufreq_freqs *freq = data; > - unsigned int cpu = freq->cpu; > - struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); > + unsigned int cpu; > + struct freq_table *ft; > > - if (!ft->ref_freq) { > - ft->ref_freq = freq->old; > - ft->clock_tick_ref = cpu_data(cpu).clock_tick; > - } > - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > - cpu_data(cpu).clock_tick = > - cpufreq_scale(ft->clock_tick_ref, > - ft->ref_freq, > - freq->new); > + for_each_cpu(cpu, freq->policy->cpus) { > + ft = &per_cpu(sparc64_freq_table, cpu); > + > + if (!ft->ref_freq) { > + ft->ref_freq = freq->old; > + ft->clock_tick_ref = cpu_data(cpu).clock_tick; > + } > + > + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || > + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { > + cpu_data(cpu).clock_tick = > + cpufreq_scale(ft->clock_tick_ref, ft->ref_freq, > + freq->new); > + } > } > > return 0; > diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > index cc6df5c6d7b3..650fafa6a4d0 100644 > --- a/arch/x86/kernel/tsc.c > +++ b/arch/x86/kernel/tsc.c > @@ -976,7 +976,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > if (!(freq->flags & CPUFREQ_CONST_LOOPS)) > mark_tsc_unstable("cpufreq changes"); > > - set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); > + set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc()); > } > > return 0; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index a0d1fc80ac5a..55efbc1b0a56 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -6677,10 +6677,8 @@ static void kvm_hyperv_tsc_notifier(void) > } > #endif > > -static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > - void *data) > +static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu) > { > - struct cpufreq_freqs *freq = data; > struct kvm *kvm; > struct kvm_vcpu *vcpu; > int i, send_ipi = 0; > @@ -6724,17 +6722,12 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va > * > */ > > - if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) > - return 0; > - if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) > - return 0; > - > - smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); > + smp_call_function_single(cpu, tsc_khz_changed, freq, 1); > > spin_lock(&kvm_lock); > list_for_each_entry(kvm, &vm_list, vm_list) { > kvm_for_each_vcpu(i, vcpu, kvm) { > - if (vcpu->cpu != freq->cpu) > + if (vcpu->cpu != cpu) > continue; > kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); > if (vcpu->cpu != smp_processor_id()) > @@ -6756,8 +6749,24 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va > * guest context is entered kvmclock will be updated, > * so the guest will not see stale values. > */ > - smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); > + smp_call_function_single(cpu, tsc_khz_changed, freq, 1); > } > +} > + > +static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, > + void *data) > +{ > + struct cpufreq_freqs *freq = data; > + int cpu; > + > + if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) > + return 0; > + if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) > + return 0; > + > + for_each_cpu(cpu, freq->policy->cpus) > + __kvmclock_cpufreq_notifier(freq, cpu); > + > return 0; > } > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c > index 92604afdeec4..3681ec8d19f2 100644 > --- a/drivers/cpufreq/cpufreq.c > +++ b/drivers/cpufreq/cpufreq.c > @@ -340,11 +340,14 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, > struct cpufreq_freqs *freqs, > unsigned int state) > { > + int cpu; > + > BUG_ON(irqs_disabled()); > > if (cpufreq_disabled()) > return; > > + freqs->policy = policy; > freqs->flags = cpufreq_driver->flags; > pr_debug("notification %u of frequency transition to %u kHz\n", > state, freqs->new); > @@ -364,10 +367,8 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, > } > } > > - for_each_cpu(freqs->cpu, policy->cpus) { > - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, > - CPUFREQ_PRECHANGE, freqs); > - } > + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, > + CPUFREQ_PRECHANGE, freqs); > > adjust_jiffies(CPUFREQ_PRECHANGE, freqs); > break; > @@ -377,11 +378,11 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, > pr_debug("FREQ: %u - CPUs: %*pbl\n", freqs->new, > cpumask_pr_args(policy->cpus)); > > - for_each_cpu(freqs->cpu, policy->cpus) { > - trace_cpu_frequency(freqs->new, freqs->cpu); > - srcu_notifier_call_chain(&cpufreq_transition_notifier_list, > - CPUFREQ_POSTCHANGE, freqs); > - } > + for_each_cpu(cpu, policy->cpus) > + trace_cpu_frequency(freqs->new, cpu); > + > + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, > + CPUFREQ_POSTCHANGE, freqs); > > cpufreq_stats_record_transition(policy, freqs->new); > policy->cur = freqs->new; > diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h > index 684caf067003..d01a74fbc4db 100644 > --- a/include/linux/cpufreq.h > +++ b/include/linux/cpufreq.h > @@ -42,13 +42,6 @@ enum cpufreq_table_sorting { > CPUFREQ_TABLE_SORTED_DESCENDING > }; > > -struct cpufreq_freqs { > - unsigned int cpu; /* cpu nr */ > - unsigned int old; > - unsigned int new; > - u8 flags; /* flags of cpufreq_driver, see below. */ > -}; > - > struct cpufreq_cpuinfo { > unsigned int max_freq; > unsigned int min_freq; > @@ -156,6 +149,13 @@ struct cpufreq_policy { > struct thermal_cooling_device *cdev; > }; > > +struct cpufreq_freqs { > + struct cpufreq_policy *policy; > + unsigned int old; > + unsigned int new; > + u8 flags; /* flags of cpufreq_driver, see below. */ > +}; > + > /* Only for ACPI */ > #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ > #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ > -- > 2.21.0.rc0.269.g1a574e7a288b >