Re: [RFC PATCH] thermal: Add support to report cooling statistics achieved by cooling devices

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello Amit,

some comments embedded.

On Wed, Jan 18, 2012 at 02:51:07PM +0530, Amit Daniel Kachhap wrote:
> Add a sysfs node code to report effective cooling of all cooling devices
> attached to each trip points of a thermal zone. The cooling data reported
> will be absolute if the higher temperature trip points are arranged first
> otherwise the cooling stats is the cumulative effect of the earlier
> invoked cooling handlers.
> 
> The basic assumption is that cooling devices will bring down the temperature
> in a symmetric manner and those statistics can be stored back and used for
> further tuning of the system.
> 
> Signed-off-by: Amit Daniel Kachhap <amit.kachhap@xxxxxxxxxx>
> ---
>  Documentation/thermal/sysfs-api.txt |   10 ++++
>  drivers/thermal/thermal_sys.c       |   96 +++++++++++++++++++++++++++++++++++
>  include/linux/thermal.h             |    8 +++
>  3 files changed, 114 insertions(+), 0 deletions(-)
> 
> diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
> index b61e46f..1db9a9d 100644
> --- a/Documentation/thermal/sysfs-api.txt
> +++ b/Documentation/thermal/sysfs-api.txt
> @@ -209,6 +209,13 @@ passive
>  	Valid values: 0 (disabled) or greater than 1000
>  	RW, Optional
>  
> +trip_stats
> +	This attribute presents the effective cooling generated from all the
> +	cooling devices attached to a trip point. The output is a pair of value
> +	for each trip point. Each pair represents
> +	(trip index, cooling temperature difference in millidegree Celsius)
> +	RO, Optional
> +
>  *****************************
>  * Cooling device attributes *
>  *****************************
> @@ -261,6 +268,9 @@ method, the sys I/F structure will be built like this:
>      |---cdev0_trip_point:	1	/* cdev0 can be used for passive */
>      |---cdev1:			--->/sys/class/thermal/cooling_device3
>      |---cdev1_trip_point:	2	/* cdev1 can be used for active[0]*/
> +    |---trip_stats		0 0
> +				1 8000	/*trip 1 causes 8 degree Celsius drop*/
> +				2 3000	/*trip 2 causes 3 degree Celsius drop*/
>  
>  |cooling_device0:
>      |---type:			Processor
> diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
> index dd9a574..47e7b6e 100644
> --- a/drivers/thermal/thermal_sys.c
> +++ b/drivers/thermal/thermal_sys.c
> @@ -92,6 +92,64 @@ static void release_idr(struct idr *idr, struct mutex *lock, int id)
>  	if (lock)
>  		mutex_unlock(lock);
>  }
> +static void update_cooling_stats(struct thermal_zone_device *tz, long cur_temp)
> +{
> +	int count, max_index, cur_interval;
> +	long trip_temp, max_temp = 0, cool_temp;
> +	static int last_trip_level = -1;

I got confused here. Are you sure using a static variable here is safe? I suppose this function
is called for any thermal_zone_device, which in turns, may have different amount of trips, and
different update rate. You may be using last_trip_level as reference for a different tz. Meaning,
you would be screwing the stat buffers silently.

If that is the case, I suggest you to move this to your stat structure.

> +
> +	if (cur_temp >= tz->last_temperature)
> +		return;
> +
> +	/* find the trip according to last temperature */
> +	for (count = 0; count < tz->trips; count++) {
> +		tz->ops->get_trip_temp(tz, count, &trip_temp);
> +		if (tz->last_temperature >= trip_temp) {
> +			if (max_temp < trip_temp) {
> +				max_temp = trip_temp;
> +				max_index = count;
> +			}
> +		}
> +	}
> +
> +	if (!max_temp) {
> +		last_trip_level = -1;
> +		return;
> +	}
> +
> +	cur_interval = tz->stat[max_index].interval_ptr;
> +	cool_temp = tz->last_temperature - cur_temp;
> +
> +	if (last_trip_level != max_index) {
> +		if (++cur_interval == INTERVAL_HISTORY)
> +			cur_interval = 0;
> +		tz->stat[max_index].cool_temp[cur_interval] = cool_temp;
> +		tz->stat[max_index].interval_ptr = cur_interval;
> +		last_trip_level = max_index;
> +	} else {
> +		tz->stat[max_index].cool_temp[cur_interval] += cool_temp;

Why do you need to sum up here? Wouldn't this break your statistics? (I may completely missed your idea for last_trip_level).

> +	}
> +}
> +
> +static int calculate_cooling_temp_avg(struct thermal_zone_device *tz, int trip,
> +					int *avg_cool)
> +{
> +	int result = 0, count = 0, used_data = 0;
> +
> +	if (trip > THERMAL_MAX_TRIPS)

Shouldn't this be checked against tz->trips ? At least you allocate your *stat based on it.

> +		return -EINVAL;
> +
> +	*avg_cool = 0;
> +	for (count = 0; count < INTERVAL_HISTORY; count++) {
> +		if (tz->stat[trip].cool_temp[count] > 0) {
> +			*avg_cool += tz->stat[trip].cool_temp[count];
> +			used_data++;
> +		}
> +	}
> +	if (used_data > 1)
> +		*avg_cool = (*avg_cool)/used_data;

IIRC, the preferred operator style is (*avg_cool) / used_data

> +	return result;

result is used only to hold a 0 here?

> +}
>  
>  /* sys I/F for thermal zone */
>  
> @@ -493,6 +551,26 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
>  	return sprintf(buf, "%ld\n", temperature);
>  }
>  
> +static ssize_t
> +thermal_cooling_trip_stats_show(struct device *dev,
> +				struct device_attribute *attr,
> +				char *buf)
> +{
> +	struct thermal_zone_device *tz = to_thermal_zone(dev);
> +	int avg_cool = 0, result, trip_index;
> +	ssize_t len = 0;
> +
> +	for (trip_index = 0; trip_index < tz->trips; trip_index++) {
> +		result  = calculate_cooling_temp_avg(tz,
> +						trip_index, &avg_cool);
> +		if (!result)
> +			len += sprintf(buf + len, "%d %d\n",
> +					trip_index, avg_cool);
> +	}
> +	return len;
> +}
> +static DEVICE_ATTR(trip_stats, 0444,
> +		   thermal_cooling_trip_stats_show, NULL);
>  
>  static struct thermal_hwmon_device *
>  thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
> @@ -1049,6 +1127,8 @@ void thermal_zone_device_update(struct thermal_zone_device *tz)
>  		goto leave;
>  	}
>  
> +	update_cooling_stats(tz, temp);
> +
>  	for (count = 0; count < tz->trips; count++) {
>  		tz->ops->get_trip_type(tz, count, &trip_type);
>  		tz->ops->get_trip_temp(tz, count, &trip_temp);
> @@ -1181,6 +1261,13 @@ struct thermal_zone_device *thermal_zone_device_register(char *type,
>  		return ERR_PTR(result);
>  	}
>  
> +	/*Allocate variables for cooling stats*/
> +	tz->stat  = devm_kzalloc(&tz->device,
> +				sizeof(struct thermal_cooling_stats) * trips,
> +				GFP_KERNEL);

You never free this memory here.

> +	if (!tz->stat)
> +		goto unregister;
> +
>  	/* sys I/F */
>  	if (type) {
>  		result = device_create_file(&tz->device, &dev_attr_type);
> @@ -1207,6 +1294,12 @@ struct thermal_zone_device *thermal_zone_device_register(char *type,
>  			passive = 1;
>  	}
>  
> +	if (trips > 0) {
> +		result = device_create_file(&tz->device, &dev_attr_trip_stats);
> +		if (result)
> +			goto unregister;

The failing paths after your allocation point must consider freeing the memory you requested.

> +	}
> +
>  	if (!passive)
>  		result = device_create_file(&tz->device,
>  					    &dev_attr_passive);
> @@ -1282,6 +1375,9 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
>  	for (count = 0; count < tz->trips; count++)
>  		TRIP_POINT_ATTR_REMOVE(&tz->device, count);
>  
> +	if (tz->trips > 0)
> +		device_remove_file(&tz->device, &dev_attr_trip_stats);
> +

Amit,

I think here it would be a good place to free the memory you allocated for *stat

>  	thermal_remove_hwmon_sysfs(tz);
>  	release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
>  	idr_destroy(&tz->idr);
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 47b4a27..47504c7 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -72,6 +72,13 @@ struct thermal_cooling_device_ops {
>  #define THERMAL_TRIPS_NONE -1
>  #define THERMAL_MAX_TRIPS 12
>  #define THERMAL_NAME_LENGTH 20
> +#define INTERVAL_HISTORY 12
> +
> +struct thermal_cooling_stats {
> +	int cool_temp[INTERVAL_HISTORY];
> +	int interval_ptr;
> +};
> +
>  struct thermal_cooling_device {
>  	int id;
>  	char type[THERMAL_NAME_LENGTH];
> @@ -102,6 +109,7 @@ struct thermal_zone_device {
>  	struct list_head cooling_devices;
>  	struct idr idr;
>  	struct mutex lock;	/* protect cooling devices list */
> +	struct thermal_cooling_stats *stat;
>  	struct list_head node;
>  	struct delayed_work poll_queue;
>  };
> -- 
> 1.7.1
> 
> _______________________________________________
> linux-pm mailing list
> linux-pm@xxxxxxxxxxxxxxxxxxxxxxxxxx
> https://lists.linuxfoundation.org/mailman/listinfo/linux-pm
_______________________________________________
linux-pm mailing list
linux-pm@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/linux-pm


[Index of Archives]     [Linux ACPI]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [CPU Freq]     [Kernel Newbies]     [Fedora Kernel]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux