Re: [PATCH v3 2/6] thermal: Add generic cpufreq cooling implementation

Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> · Tue, 8 May 2012 13:16:26 -0700

On Tue,  8 May 2012 21:48:14 +0530
Amit Daniel Kachhap <amit.kachhap@xxxxxxxxxx> wrote:

> This patch adds support for generic cpu thermal cooling low level
> implementations using frequency scaling up/down based on the registration
> parameters. Different cpu related cooling devices can be registered by the
> user and the binding of these cooling devices to the corresponding
> trip points can be easily done as the registration APIs return the
> cooling device pointer. The user of these APIs are responsible for
> passing clipping frequency . The drivers can also register to recieve
> notification about any cooling action called. Even the driver can effect
> the cooling action by modifying the default data such as freq_clip_max if
> needed.
> 
>
> ...
>
> +struct cpufreq_cooling_device {
> +	int id;
> +	struct thermal_cooling_device *cool_dev;
> +	struct freq_clip_table *tab_ptr;
> +	unsigned int tab_size;
> +	unsigned int cpufreq_state;
> +	const struct cpumask *allowed_cpus;
> +	struct list_head node;
> +};

It would be nice to document the fields.  Especially id, tab_size,
cpufreq_state and node.  For `node' we should describe the locking for
the list, and describe which list_head anchors this list.

> +static LIST_HEAD(cooling_cpufreq_list);
> +static DEFINE_MUTEX(cooling_cpufreq_lock);
> +static DEFINE_IDR(cpufreq_idr);
> +static DEFINE_PER_CPU(unsigned int, max_policy_freq);
> +static struct freq_clip_table *notify_table;
> +static int notify_state;
> +static BLOCKING_NOTIFIER_HEAD(cputherm_state_notifier_list);
> +
> +static int get_idr(struct idr *idr, struct mutex *lock, int *id)
> +{
> +	int err;
> +again:
> +	if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
> +		return -ENOMEM;
> +
> +	if (lock)
> +		mutex_lock(lock);

The test for NULL `lock' is unneeded.  In fact the `lock' argument
could be removed altogether - just use cooling_cpufreq_lock directly.

> +	err = idr_get_new(idr, NULL, id);
> +	if (lock)
> +		mutex_unlock(lock);
> +	if (unlikely(err == -EAGAIN))
> +		goto again;
> +	else if (unlikely(err))
> +		return err;
> +
> +	*id = *id & MAX_ID_MASK;
> +	return 0;
> +}
> +
> +static void release_idr(struct idr *idr, struct mutex *lock, int id)
> +{
> +	if (lock)
> +		mutex_lock(lock);

Ditto.

> +	idr_remove(idr, id);
> +	if (lock)
> +		mutex_unlock(lock);
> +}
> +
>
> ...
>
> +
> +/*Below codes defines functions to be used for cpufreq as cooling device*/
> +static bool is_cpufreq_valid(int cpu)
> +{
> +	struct cpufreq_policy policy;
> +	return !cpufreq_get_policy(&policy, cpu) ? true : false;

Can use

	return !cpufreq_get_policy(&policy, cpu);

> +}
> +
> +static int cpufreq_apply_cooling(struct cpufreq_cooling_device *cpufreq_device,
> +				unsigned long cooling_state)
> +{
> +	unsigned int event, cpuid;
> +	struct freq_clip_table *th_table;
> +
> +	if (cooling_state > cpufreq_device->tab_size)
> +		return -EINVAL;
> +
> +	cpufreq_device->cpufreq_state = cooling_state;
> +
> +	/*cpufreq thermal notifier uses this cpufreq device pointer*/

This code looks like it was written by two people.

	/* One who does this */
	/*And one who does this*/

The first one was right.  Please go through all the comments in all the
patches and get the layout consistent?

> +	notify_state = cooling_state;
> +
> +	if (notify_state > 0) {
> +		th_table = &(cpufreq_device->tab_ptr[cooling_state - 1]);
> +		memcpy(notify_table, th_table, sizeof(struct freq_clip_table));
> +		event = CPUFREQ_COOLING_TYPE;
> +		blocking_notifier_call_chain(&cputherm_state_notifier_list,
> +						event, notify_table);
> +	}
> +
> +	for_each_cpu(cpuid, cpufreq_device->allowed_cpus) {
> +		if (is_cpufreq_valid(cpuid))
> +			cpufreq_update_policy(cpuid);
> +	}
> +
> +	notify_state = -1;
> +
> +	return 0;
> +}
> +
> +static int cpufreq_thermal_notifier(struct notifier_block *nb,
> +					unsigned long event, void *data)
> +{
> +	struct cpufreq_policy *policy = data;
> +	unsigned long max_freq = 0;
> +
> +	if ((event != CPUFREQ_ADJUST) || (notify_state == -1))

Please document `notify_state', at its definition site.  This reader
doesn't know what "notify_state == -1" *means*.  

> +		return 0;
> +
> +	if (notify_state > 0) {
> +		max_freq = notify_table->freq_clip_max;
> +
> +		if (per_cpu(max_policy_freq, policy->cpu) == 0)
> +			per_cpu(max_policy_freq, policy->cpu) = policy->max;
> +	} else {
> +		if (per_cpu(max_policy_freq, policy->cpu) != 0) {
> +			max_freq = per_cpu(max_policy_freq, policy->cpu);
> +			per_cpu(max_policy_freq, policy->cpu) = 0;
> +		} else {
> +			max_freq = policy->max;
> +		}
> +	}
> +
> +	/* Never exceed user_policy.max*/
> +	if (max_freq > policy->user_policy.max)
> +		max_freq = policy->user_policy.max;
> +
> +	if (policy->max != max_freq)
> +		cpufreq_verify_within_limits(policy, 0, max_freq);
> +
> +	return 0;
> +}
> +
>
> ...
>
> +/*This cooling may be as PASSIVE/ACTIVE type*/
> +static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
> +				 unsigned long state)
> +{
> +	int ret = -EINVAL;
> +	struct cpufreq_cooling_device *cpufreq_device;
> +
> +	mutex_lock(&cooling_cpufreq_lock);
> +	list_for_each_entry(cpufreq_device, &cooling_cpufreq_list, node) {
> +		if (cpufreq_device && cpufreq_device->cool_dev == cdev) {
> +			ret = 0;
> +			break;
> +		}
> +	}
> +	mutex_unlock(&cooling_cpufreq_lock);
> +
> +	if (!ret)
> +		ret = cpufreq_apply_cooling(cpufreq_device, state);

Now that we've dropped the lock, what prevents *cpufreq_device from
getting freed, or undesirably altered?

> +	return ret;
> +}
> +
> +/* bind cpufreq callbacks to cpufreq cooling device */
> +static struct thermal_cooling_device_ops cpufreq_cooling_ops = {

Can it be made const?

> +	.get_max_state = cpufreq_get_max_state,
> +	.get_cur_state = cpufreq_get_cur_state,
> +	.set_cur_state = cpufreq_set_cur_state,
> +};
> +
> +static struct notifier_block thermal_cpufreq_notifier_block = {
> +	.notifier_call = cpufreq_thermal_notifier,
> +};
> +
> +struct thermal_cooling_device *cpufreq_cooling_register(
> +	struct freq_clip_table *tab_ptr, unsigned int tab_size,
> +	const struct cpumask *mask_val)
> +{
> +	struct thermal_cooling_device *cool_dev;
> +	struct cpufreq_cooling_device *cpufreq_dev = NULL;
> +	unsigned int cpufreq_dev_count = 0;
> +	char dev_name[THERMAL_NAME_LENGTH];
> +	int ret = 0, id = 0, i;
> +
> +	if (tab_ptr == NULL || tab_size == 0)
> +		return ERR_PTR(-EINVAL);
> +
> +	list_for_each_entry(cpufreq_dev, &cooling_cpufreq_list, node)
> +		cpufreq_dev_count++;
> +
> +	cpufreq_dev =
> +		kzalloc(sizeof(struct cpufreq_cooling_device), GFP_KERNEL);

The 80-col contortions are ugly.  Alternatives are

	cpufreq_dev = kzalloc(sizeof(struct cpufreq_cooling_device),
			      GFP_KERNEL);

or, better,

	cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);

> +	if (!cpufreq_dev)
> +		return ERR_PTR(-ENOMEM);
> +
> +	if (cpufreq_dev_count == 0) {
> +		notify_table = kzalloc(sizeof(struct freq_clip_table),
> +					GFP_KERNEL);
> +		if (!notify_table) {
> +			kfree(cpufreq_dev);
> +			return ERR_PTR(-ENOMEM);
> +		}
> +	}
> +
> +	cpufreq_dev->tab_ptr = tab_ptr;
> +	cpufreq_dev->tab_size = tab_size;
> +	cpufreq_dev->allowed_cpus = mask_val;
> +
> +	/* Initialize all the tab_ptr->mask_val to the passed mask_val */
> +	for (i = 0; i < tab_size; i++)
> +		((struct freq_clip_table *)&tab_ptr[i])->mask_val = mask_val;
> +
> +	ret = get_idr(&cpufreq_idr, &cooling_cpufreq_lock, &cpufreq_dev->id);

hm, "get_idr" is a poor name.  One would expect it to do a lookup, but
it actually does an installation.  That's a result of the poorly-named
idr_get_new(), I expect.

> +	if (ret) {
> +		kfree(cpufreq_dev);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	sprintf(dev_name, "thermal-cpufreq-%d", cpufreq_dev->id);
> +
> +	cool_dev = thermal_cooling_device_register(dev_name, cpufreq_dev,
> +						&cpufreq_cooling_ops);
> +	if (!cool_dev) {
> +		release_idr(&cpufreq_idr, &cooling_cpufreq_lock,
> +						cpufreq_dev->id);
> +		kfree(cpufreq_dev);
> +		return ERR_PTR(-EINVAL);
> +	}
> +	cpufreq_dev->id = id;
> +	cpufreq_dev->cool_dev = cool_dev;
> +	mutex_lock(&cooling_cpufreq_lock);
> +	list_add_tail(&cpufreq_dev->node, &cooling_cpufreq_list);
> +	mutex_unlock(&cooling_cpufreq_lock);
> +
> +	/*Register the notifier for first cpufreq cooling device*/
> +	if (cpufreq_dev_count == 0)
> +		cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
> +						CPUFREQ_POLICY_NOTIFIER);
> +	return cool_dev;
> +}
> +EXPORT_SYMBOL(cpufreq_cooling_register);
> +
> +void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
> +{
> +	struct cpufreq_cooling_device *cpufreq_dev = NULL;
> +	unsigned int cpufreq_dev_count = 0;
> +
> +	mutex_lock(&cooling_cpufreq_lock);
> +	list_for_each_entry(cpufreq_dev, &cooling_cpufreq_list, node) {
> +		if (cpufreq_dev && cpufreq_dev->cool_dev == cdev)
> +			break;
> +		cpufreq_dev_count++;
> +	}
> +
> +	if (!cpufreq_dev || cpufreq_dev->cool_dev != cdev) {
> +		mutex_unlock(&cooling_cpufreq_lock);
> +		return;
> +	}
> +
> +	list_del(&cpufreq_dev->node);
> +	mutex_unlock(&cooling_cpufreq_lock);
> +
> +	/*Unregister the notifier for the last cpufreq cooling device*/
> +	if (cpufreq_dev_count == 1) {

But we dropped the lock, so local variable cpufreq_dev_count is now
meaningless.  What prevents a race here?

> +		cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
> +					CPUFREQ_POLICY_NOTIFIER);
> +		kfree(notify_table);
> +	}
> +
> +	thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
> +	release_idr(&cpufreq_idr, &cooling_cpufreq_lock, cpufreq_dev->id);
> +	kfree(cpufreq_dev);
> +}
> +EXPORT_SYMBOL(cpufreq_cooling_unregister);
>
> ...
>
> +struct freq_clip_table {
> +	unsigned int freq_clip_max;
> +	unsigned int polling_interval;
> +	unsigned int temp_level;
> +	const struct cpumask *mask_val;
> +};

hm, what does this thing do.  Needs a nice comment for the uninitiated,
please.  Something which describes the overall roles, responsibilities
and general reasons for existence.

> +int cputherm_register_notifier(struct notifier_block *nb, unsigned int list);
> +int cputherm_unregister_notifier(struct notifier_block *nb, unsigned int list);
> +
> +#ifdef CONFIG_CPU_FREQ
> +struct thermal_cooling_device *cpufreq_cooling_register(
> +	struct freq_clip_table *tab_ptr, unsigned int tab_size,
> +	const struct cpumask *mask_val);
> +
> +void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev);
> +#else /*!CONFIG_CPU_FREQ*/

(more whacky comment layout)

>
> ...
>

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html