On Tuesday, November 20, 2012 01:06:16 PM Fabio Baltieri wrote: > From: Rickard Andersson <rickard.andersson@xxxxxxxxxxxxxx> > > This patch fixes a bug that occurred when we had load on a secondary CPU > and the primary CPU was sleeping. Only one sampling timer was spawned > and it was spawned as a deferred timer on the primary CPU, so when a > secondary CPU had a change in load this was not detected by the ondemand > governor. > > This patch make sure that deferred timers are run on all CPUs in the > case of software controlled CPUs that run on the same frequency. While I basically don't have problems with the functionality of this, I have some with the code organization. > Signed-off-by: Rickard Andersson <rickard.andersson@xxxxxxxxxxxxxx> > Signed-off-by: Fabio Baltieri <fabio.baltieri@xxxxxxxxxx> > --- > drivers/cpufreq/cpufreq_ondemand.c | 141 ++++++++++++++++++++++++++++++++----- > 1 file changed, 122 insertions(+), 19 deletions(-) > > diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c > index 396322f..430f614 100644 > --- a/drivers/cpufreq/cpufreq_ondemand.c > +++ b/drivers/cpufreq/cpufreq_ondemand.c > @@ -93,6 +93,7 @@ struct cpu_dbs_info_s { > * when user is changing the governor or limits. > */ > struct mutex timer_mutex; > + ktime_t time_stamp; > }; > static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); > > @@ -285,7 +286,7 @@ static void update_sampling_rate(unsigned int new_rate) > policy = cpufreq_cpu_get(cpu); > if (!policy) > continue; > - dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); > + dbs_info = &per_cpu(od_cpu_dbs_info, cpu); > cpufreq_cpu_put(policy); > > mutex_lock(&dbs_info->timer_mutex); > @@ -305,7 +306,7 @@ static void update_sampling_rate(unsigned int new_rate) > cancel_delayed_work_sync(&dbs_info->work); > mutex_lock(&dbs_info->timer_mutex); > > - schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, > + schedule_delayed_work_on(cpu, &dbs_info->work, > usecs_to_jiffies(new_rate)); > > } The above two changes don't belong to this patch. Please send a separate patch with them and a matching description in the changelog. > @@ -449,6 +450,16 @@ static struct attribute_group dbs_attr_group = { > > /************************** sysfs end ************************/ > > +static bool dbs_sw_coordinated_cpus(struct cpu_dbs_info_s *dbs_info) > +{ > + struct cpufreq_policy *policy = dbs_info->cur_policy; > + > + if (cpumask_weight(policy->cpus) > 1) > + return true; > + else > + return false; > +} return cpumask_weight(policy->cpus) > 1; pretty please. > + > static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) > { > if (dbs_tuners_ins.powersave_bias) > @@ -598,20 +609,41 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) > > static void do_dbs_timer(struct work_struct *work) > { > + struct delayed_work *dw = to_delayed_work(work); > struct cpu_dbs_info_s *dbs_info = > container_of(work, struct cpu_dbs_info_s, work.work); > - unsigned int cpu = dbs_info->cpu; > - int sample_type = dbs_info->sample_type; > - > + int sample_type; > int delay; > + bool sample = true; > + > + if (dbs_sw_coordinated_cpus(dbs_info)) { > + ktime_t time_now; > + s64 delta_us; > + > + /* use leader CPU's dbs_info */ > + dbs_info = &per_cpu(od_cpu_dbs_info, dbs_info->cpu); > + mutex_lock(&dbs_info->timer_mutex); > > - mutex_lock(&dbs_info->timer_mutex); > + time_now = ktime_get(); > + delta_us = ktime_us_delta(time_now, dbs_info->time_stamp); > + > + /* Do nothing if we recently have sampled */ > + if (delta_us < (s64)(dbs_tuners_ins.sampling_rate / 2)) > + sample = false; > + else > + dbs_info->time_stamp = time_now; > + } else { > + mutex_lock(&dbs_info->timer_mutex); > + } Please don't handle locking this way. Instead, please move the code you'll run under the lock in both cases into a separate function (it may take "sample" as an argument along with dbs_info) and call it between mutex_lock() and mutex_unlock() in each block. In addition to that, I'd move the whole block executed when dbs_sw_coordinated_cpus(dbs_info) is true into a separate function where dbs_info would be a local variable. This way it wouldn't mix two distinct cases in the same piece of code that's remarkably hard to follow. > + > + sample_type = dbs_info->sample_type; > > /* Common NORMAL_SAMPLE setup */ > dbs_info->sample_type = DBS_NORMAL_SAMPLE; > if (!dbs_tuners_ins.powersave_bias || > sample_type == DBS_NORMAL_SAMPLE) { > - dbs_check_cpu(dbs_info); > + if (sample) > + dbs_check_cpu(dbs_info); > if (dbs_info->freq_lo) { > /* Setup timer for SUB_SAMPLE */ > dbs_info->sample_type = DBS_SUB_SAMPLE; > @@ -627,32 +659,41 @@ static void do_dbs_timer(struct work_struct *work) > delay -= jiffies % delay; > } > } else { > - __cpufreq_driver_target(dbs_info->cur_policy, > - dbs_info->freq_lo, CPUFREQ_RELATION_H); > + if (sample) > + __cpufreq_driver_target(dbs_info->cur_policy, > + dbs_info->freq_lo, > + CPUFREQ_RELATION_H); > delay = dbs_info->freq_lo_jiffies; > } > - schedule_delayed_work_on(cpu, &dbs_info->work, delay); > + schedule_delayed_work_on(smp_processor_id(), dw, delay); We're not supposed to be using smp_processor_id() any more. get_cpu()/put_cpu() should be used instead. > mutex_unlock(&dbs_info->timer_mutex); > } > > -static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) > +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info, int cpu) > { > /* We want all CPUs to do sampling nearly on same jiffy */ > int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); > + struct cpu_dbs_info_s *dbs_info_local = &per_cpu(od_cpu_dbs_info, cpu); > > if (num_online_cpus() > 1) > delay -= jiffies % delay; > > + cancel_delayed_work_sync(&dbs_info_local->work); > dbs_info->sample_type = DBS_NORMAL_SAMPLE; > - INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer); > - schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); > + schedule_delayed_work_on(cpu, &dbs_info_local->work, delay); > } > > -static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) > +static inline void dbs_timer_exit(int cpu) > { > + struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); > cancel_delayed_work_sync(&dbs_info->work); > } > > +static void dbs_timer_exit_per_cpu(struct work_struct *dummy) > +{ > + dbs_timer_exit(smp_processor_id()); > +} > + > /* > * Not all CPUs want IO time to be accounted as busy; this dependson how > * efficient idling at a higher frequency/voltage is. > @@ -676,6 +717,43 @@ static int should_io_be_busy(void) > return 0; > } > > +static int __cpuinit cpu_callback(struct notifier_block *nfb, > + unsigned long action, void *hcpu) > +{ > + unsigned int cpu = (unsigned long)hcpu; > + struct device *cpu_dev; > + struct cpu_dbs_info_s *dbs_info; > + > + dbs_info = &per_cpu(od_cpu_dbs_info, cpu); > + > + /* use leader CPU's dbs_info */ > + if (dbs_sw_coordinated_cpus(dbs_info)) > + dbs_info = &per_cpu(od_cpu_dbs_info, dbs_info->cpu); > + > + cpu_dev = get_cpu_device(cpu); > + if (cpu_dev) { > + switch (action) { > + case CPU_ONLINE: > + case CPU_ONLINE_FROZEN: > + dbs_timer_init(dbs_info, cpu); > + break; > + case CPU_DOWN_PREPARE: > + case CPU_DOWN_PREPARE_FROZEN: > + dbs_timer_exit(cpu); > + break; > + case CPU_DOWN_FAILED: > + case CPU_DOWN_FAILED_FROZEN: > + dbs_timer_init(dbs_info, cpu); Why don't you merge this with the CPU_ONLINE* cases? > + break; > + } > + } > + return NOTIFY_OK; > +} > + > +static struct notifier_block __refdata ondemand_cpu_notifier = { > + .notifier_call = cpu_callback, > +}; > + > static int cpufreq_governor_dbs(struct cpufreq_policy *policy, > unsigned int event) > { > @@ -704,9 +782,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, > if (dbs_tuners_ins.ignore_nice) > j_dbs_info->prev_cpu_nice = > kcpustat_cpu(j).cpustat[CPUTIME_NICE]; > + > + mutex_init(&j_dbs_info->timer_mutex); > + INIT_DEFERRABLE_WORK(&j_dbs_info->work, do_dbs_timer); > + > + j_dbs_info->rate_mult = 1; > } > this_dbs_info->cpu = cpu; > - this_dbs_info->rate_mult = 1; > ondemand_powersave_bias_init_cpu(cpu); > /* > * Start the timerschedule work, when this governor > @@ -736,21 +818,42 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, > } > mutex_unlock(&dbs_mutex); > > - mutex_init(&this_dbs_info->timer_mutex); > - dbs_timer_init(this_dbs_info); > + /* If SW coordinated CPUs then register notifier */ > + if (dbs_sw_coordinated_cpus(this_dbs_info)) { > + register_hotcpu_notifier(&ondemand_cpu_notifier); > + > + /* Initiate timer time stamp */ > + this_dbs_info->time_stamp = ktime_get(); > + > + for_each_cpu(j, policy->cpus) { > + struct cpu_dbs_info_s *j_dbs_info; > + > + j_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); > + dbs_timer_init(j_dbs_info, j); > + } > + } else { > + dbs_timer_init(this_dbs_info, cpu); > + } > break; > > case CPUFREQ_GOV_STOP: > - dbs_timer_exit(this_dbs_info); > + dbs_timer_exit(cpu); > > mutex_lock(&dbs_mutex); > mutex_destroy(&this_dbs_info->timer_mutex); > dbs_enable--; > mutex_unlock(&dbs_mutex); > - if (!dbs_enable) > + if (!dbs_enable) { > sysfs_remove_group(cpufreq_global_kobject, > &dbs_attr_group); > > + if (dbs_sw_coordinated_cpus(this_dbs_info)) { > + /* Make sure all pending timers/works are > + * stopped. */ > + schedule_on_each_cpu(dbs_timer_exit_per_cpu); > + unregister_hotcpu_notifier(&ondemand_cpu_notifier); > + } > + } > break; > > case CPUFREQ_GOV_LIMITS: Thanks, Rafael -- I speak only for myself. Rafael J. Wysocki, Intel Open Source Technology Center. -- To unsubscribe from this list: send the line "unsubscribe cpufreq" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html