The old cpu/cpu*/cpufreq/scaling_{governor,driver} will still be usable for some time. A deprecation message is printed once if one of the deprecated sysfs files is used: CPUFREQ: Per core cpufreq sysfs interface is deprecated - show_scaling_available_governors The new interface is placed statically (will never be removed) into the global /sys/devices/system/cpu/cpufreq dir. I found one inconsistency/bug while doing sysfs stress tests: cat cpu/cpu*/cpufreq/scaling_governor could (very rarely) show different governors, while only one should be active. I expect to solve this a global mutex between online/offling (CPU_DOWN_PREPARE_FROZEN notifier event) and store_scaling_governor, around the while loop is needed. Not sure whether this works out, but seeing different governors could happen before and should not be sever. Working this around can get ugly. As soon as the cpu* scaling_governor files vanished this can of course not happen. Another "uglyness" which can ripped out after deprecation time is to detect the store_governor access in the generic store() function and not doing the rwsem_write locking. If this is not done a deadlock will happen between some kind of generic sysfs lock with the cpu online file and the rwsem lock: INFO: task switch_governor:5934 blocked for more than 120 seconds. 2 locks held by switch_governor/5934: Jul 23 17:14:22 linux kernel: [ 481.152132] #0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8112e40d>] sysfs_write_file+0x38/0x119 Jul 23 17:14:22 linux kernel: [ 481.152138] #1: (&per_cpu(cpu_policy_rwsem, cpu)){+++++.}, at: [<ffffffff813e930f>] lock_policy_rwsem_write+0x48/0x79 4 locks held by offline_cpus.sh/6665: Jul 23 17:14:22 linux kernel: [ 481.152244] #0: (&buffer->mutex){+.+.+.}, at: [<ffffffff8112e40d>] sysfs_write_file+0x38/0x119 Jul 23 17:14:22 linux kernel: [ 481.152249] #1: (cpu_add_remove_lock){+.+.+.}, at: [<ffffffff81559567>] cpu_down+0x28/0x82 Jul 23 17:14:22 linux kernel: [ 481.152254] #2: (cpu_hotplug.lock){+.+.+.}, at: [<ffffffff81042d08>] cpu_hotplug_begin+0x22/0x50 Jul 23 17:14:22 linux kernel: [ 481.152259] #3: (&per_cpu(cpu_policy_rwsem, cpu)){+++++.}, at: [<ffffffff813e930f>] lock_policy_rwsem_write+0x48/0x79 What do you think? If this gets accepted I volunteer to update documentation and cpufrequtils... CC: Dave Jones <davej@xxxxxxxxxx> CC: Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx> Signed-off-by: Thomas Renninger <trenn@xxxxxxx> --- drivers/cpufreq/cpufreq.c | 228 ++++++++++++++++++++++++++++++++++----------- 1 files changed, 175 insertions(+), 53 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bccdebb..2a0d3a9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -39,10 +39,6 @@ */ static struct cpufreq_driver *cpufreq_driver; static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); -#ifdef CONFIG_HOTPLUG_CPU -/* This one keeps track of the previously set governor of a removed CPU */ -static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor); -#endif static DEFINE_SPINLOCK(cpufreq_driver_lock); /* @@ -128,6 +124,10 @@ static int __init init_cpufreq_transition_notifier_list(void) } pure_initcall(init_cpufreq_transition_notifier_list); +static struct cpufreq_governor *cpufreq_current_governor = + CPUFREQ_DEFAULT_GOVERNOR; +static int current_driver_policy; + static LIST_HEAD(cpufreq_governor_list); static DEFINE_MUTEX(cpufreq_governor_mutex); @@ -503,48 +503,99 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, /** * show_scaling_governor - show the current policy for the specified CPU */ -static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) + +static ssize_t show_scaling_governor(struct kobject *kobj, + struct attribute *attr, char *buf) { - if (policy->policy == CPUFREQ_POLICY_POWERSAVE) - return sprintf(buf, "powersave\n"); - else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) - return sprintf(buf, "performance\n"); - else if (policy->governor) + if (current_driver_policy) { + if (current_driver_policy == CPUFREQ_POLICY_POWERSAVE) + return sprintf(buf, "powersave\n"); + else if (current_driver_policy == CPUFREQ_POLICY_PERFORMANCE) + return sprintf(buf, "performance\n"); + } else if (cpufreq_current_governor) return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", - policy->governor->name); + cpufreq_current_governor->name); return -EINVAL; } - /** * store_scaling_governor - store policy for the specified CPU */ -static ssize_t store_scaling_governor(struct cpufreq_policy *policy, - const char *buf, size_t count) +static ssize_t store_scaling_governor(struct kobject *kobj, + struct attribute *attr, const char *buf, + size_t count) { unsigned int ret = -EINVAL; + int i,j; char str_governor[16]; + struct cpufreq_policy *old_policy; struct cpufreq_policy new_policy; - - ret = cpufreq_get_policy(&new_policy, policy->cpu); - if (ret) - return ret; + struct cpufreq_governor *old_gov = cpufreq_current_governor; + int old_driver_policy = current_driver_policy; + struct cpumask already_handled; + int is_handled = 0; + cpus_clear(already_handled); ret = sscanf(buf, "%15s", str_governor); if (ret != 1) return -EINVAL; - if (cpufreq_parse_governor(str_governor, &new_policy.policy, - &new_policy.governor)) + if (cpufreq_parse_governor(str_governor, ¤t_driver_policy, + &cpufreq_current_governor)) return -EINVAL; - /* Do not use cpufreq_set_policy here or the user_policy.max - will be wrongly overridden */ - ret = __cpufreq_set_policy(policy, &new_policy); + if (old_gov == cpufreq_current_governor && + old_driver_policy == current_driver_policy) + return count; - policy->user_policy.policy = policy->policy; - policy->user_policy.governor = policy->governor; + for_each_present_cpu(i) { + ret = cpufreq_get_policy(&new_policy, i); + if (ret) { + ret = 0; + continue; + } + + /* Do not set managed policies/cpus twice */ + for_each_cpu(j, new_policy.cpus) { + if (cpu_isset(j, already_handled)) + is_handled = 1; + } + if (is_handled) { + dprintk("CPU %d already handled\n", i); + is_handled = 0; + continue; + } + ret = unlikely(lock_policy_rwsem_write(i)); + if (ret) + continue; + + /* race can happen with offlining, handle it */ + if (cpu_is_offline(i)) { + unlock_policy_rwsem_write(i); + continue; + } + + old_policy = cpufreq_cpu_get(i); + if (!old_policy) { + unlock_policy_rwsem_write(i); + continue; + } + + new_policy.governor = cpufreq_current_governor; + new_policy.policy = current_driver_policy; + ret = __cpufreq_set_policy(old_policy, &new_policy); + cpufreq_cpu_put(old_policy); + WARN_ON(ret); + if (ret) { + unlock_policy_rwsem_write(i); + continue; + } + old_policy->user_policy.governor = cpufreq_current_governor; + old_policy->user_policy.policy = current_driver_policy; + unlock_policy_rwsem_write(i); + cpu_set(new_policy.cpu, already_handled); + } if (ret) return ret; else @@ -554,7 +605,8 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, /** * show_scaling_driver - show the cpufreq driver currently loaded */ -static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) +static ssize_t show_scaling_driver(struct kobject *kobj, + struct attribute *attr, char *buf) { return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name); } @@ -562,7 +614,8 @@ static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) /** * show_scaling_available_governors - show the available CPUfreq governors */ -static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, +static ssize_t show_scaling_available_governors(struct kobject *kobj, + struct attribute *attr, char *buf) { ssize_t i = 0; @@ -584,6 +637,61 @@ out: return i; } +/*** delete after deprecation time ***/ + +#define DEPRECATION_MSG(file_name) \ + printk_once(KERN_INFO "CPUFREQ: Per core cpufreq sysfs " \ + "interface is deprecated - " #file_name "\n"); + +static ssize_t show_scaling_governor_old(struct cpufreq_policy *policy, + char *buf) +{ + DEPRECATION_MSG(show_scaling_governor); + return show_scaling_governor(NULL, NULL, buf); +}; + +static ssize_t store_scaling_governor_old(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + int ret; + + DEPRECATION_MSG(store_scaling_governor); + cpufreq_cpu_put(policy); + ret = store_scaling_governor(NULL, NULL, buf, count); + cpufreq_cpu_get(policy->cpu); + return ret; +} + +static ssize_t show_scaling_driver_old(struct cpufreq_policy *policy, + char *buf) +{ + DEPRECATION_MSG(show_scaling_driver); + return show_scaling_driver(NULL, NULL, buf); +} + +static ssize_t show_scaling_available_governors_old(struct cpufreq_policy *p, + char *buf) +{ + DEPRECATION_MSG(show_scaling_available_governors); + return show_scaling_available_governors(NULL, NULL, buf); +} + +#define define_one_ro_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0444, show_##_name##_old, NULL) + +#define define_one_rw_old(object, _name) \ +static struct freq_attr object = \ +__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old) + +define_one_ro_old(scaling_available_governors_old, + scaling_available_governors); +define_one_ro_old(scaling_driver_old, scaling_driver); +define_one_rw_old(scaling_governor_old, scaling_governor); + +/*** delete after deprecation time ***/ + + static ssize_t show_cpus(const struct cpumask *mask, char *buf) { ssize_t i = 0; @@ -635,14 +743,11 @@ define_one_ro0400(cpuinfo_cur_freq); define_one_ro(cpuinfo_min_freq); define_one_ro(cpuinfo_max_freq); define_one_ro(cpuinfo_transition_latency); -define_one_ro(scaling_available_governors); -define_one_ro(scaling_driver); define_one_ro(scaling_cur_freq); define_one_ro(related_cpus); define_one_ro(affected_cpus); define_one_rw(scaling_min_freq); define_one_rw(scaling_max_freq); -define_one_rw(scaling_governor); static struct attribute *default_attrs[] = { &cpuinfo_min_freq.attr, @@ -652,12 +757,24 @@ static struct attribute *default_attrs[] = { &scaling_max_freq.attr, &affected_cpus.attr, &related_cpus.attr, - &scaling_governor.attr, - &scaling_driver.attr, - &scaling_available_governors.attr, + &scaling_governor_old.attr, + &scaling_driver_old.attr, + &scaling_available_governors_old.attr, NULL }; +#define define_one_global_ro(_name) \ +static struct global_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +#define define_one_global_rw(_name) \ +static struct global_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + +define_one_global_ro(scaling_available_governors); +define_one_global_ro(scaling_driver); +define_one_global_rw(scaling_governor); + struct kobject *cpufreq_global_kobject; #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) @@ -693,19 +810,33 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct cpufreq_policy *policy = to_policy(kobj); struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; + /* + * TBD: remove scal_gov and rest after per cpu governor + * deprecation time + */ + int scal_gov = 0; policy = cpufreq_cpu_get(policy->cpu); if (!policy) goto no_policy; - if (lock_policy_rwsem_write(policy->cpu) < 0) - goto fail; + if(!strncmp(attr->name, "scaling_governor", 16)) { + printk("Scaling gov found\n"); + scal_gov = 1; + } + + if(!scal_gov) { + if(lock_policy_rwsem_write(policy->cpu) < 0) + goto fail; + } if (fattr->store) ret = fattr->store(policy, buf, count); else ret = -EIO; - unlock_policy_rwsem_write(policy->cpu); + if(!scal_gov) + unlock_policy_rwsem_write(policy->cpu); + scal_gov = 0; fail: cpufreq_cpu_put(policy); no_policy: @@ -744,14 +875,6 @@ int cpufreq_add_dev_policy(unsigned int cpu, struct cpufreq_policy *policy, unsigned long flags; unsigned int j; -#ifdef CONFIG_HOTPLUG_CPU - if (per_cpu(cpufreq_cpu_governor, cpu)) { - policy->governor = per_cpu(cpufreq_cpu_governor, cpu); - dprintk("Restoring governor %s for cpu %d\n", - policy->governor->name, cpu); - } -#endif - for_each_cpu(j, policy->cpus) { struct cpufreq_policy *managed_policy; @@ -961,7 +1084,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) INIT_WORK(&policy->update, handle_update); /* Set governor before ->init, so that driver could check it */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->governor = cpufreq_current_governor; /* call driver. From then on the cpufreq must be able * to accept all calls to ->verify and ->setpolicy for this CPU */ @@ -1072,10 +1195,6 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) #ifdef CONFIG_SMP -#ifdef CONFIG_HOTPLUG_CPU - per_cpu(cpufreq_cpu_governor, cpu) = data->governor; -#endif - /* if we have other CPUs still registered, we need to unlink them, * or else wait_for_completion below will lock up. Clean the * per_cpu(cpufreq_cpu_data) while holding the lock, and remove @@ -1096,9 +1215,6 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) if (j == cpu) continue; dprintk("removing link for cpu %u\n", j); -#ifdef CONFIG_HOTPLUG_CPU - per_cpu(cpufreq_cpu_governor, j) = data->governor; -#endif cpu_sys_dev = get_cpu_sysdev(j); sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq"); cpufreq_cpu_put(data); @@ -1967,7 +2083,13 @@ static int __init cpufreq_core_init(void) cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_sysdev_class.kset.kobj); - BUG_ON(!cpufreq_global_kobject); + WARN_ON(!cpufreq_global_kobject); + + WARN_ON(sysfs_create_file(cpufreq_global_kobject, + &scaling_governor.attr)); + WARN_ON(sysfs_create_file(cpufreq_global_kobject, &scaling_driver.attr)); + WARN_ON(sysfs_create_file(cpufreq_global_kobject, + &scaling_available_governors.attr)); return 0; } -- 1.6.0.2 -- To unsubscribe from this list: send the line "unsubscribe cpufreq" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html