Re: [PATCH RFC 3/3] hwmon: (coretemp) Report package power end energy consumption

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Dec 19, 2012 at 10:29:36PM -0800, Guenter Roeck wrote:
> Supported by SandyBridge and newer CPUs.
> 
> Signed-off-by: Guenter Roeck <linux@xxxxxxxxxxxx>
> ---
> Tbd:	- labels
> 	- do we want to report other power/energy attributes (cores/dram/gpu) ?
> 	  - if yes - how [ does not fit numbering model ]
> 
This is working quite nicely. After about one day of runtime on an i7-2600:

coretemp-isa-0000
Adapter: ISA adapter
Physical id 0:  +29.0°C  (high = +80.0°C, crit = +98.0°C)
Core 0:         +18.0°C  (high = +80.0°C, crit = +98.0°C)
Core 1:         +23.0°C  (high = +80.0°C, crit = +98.0°C)
Core 2:         +29.0°C  (high = +80.0°C, crit = +98.0°C)
Core 3:         +24.0°C  (high = +80.0°C, crit = +98.0°C)
Pkg 0 power:     3.29 W  (max = 120.00 W, cap = 118.75 W)
Pkg 0 energy:    4.04 MJ

>  Documentation/hwmon/coretemp |    6 ++
>  drivers/hwmon/coretemp.c     |  192 +++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 195 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp
> index fec5a9b..fdbd111 100644
> --- a/Documentation/hwmon/coretemp
> +++ b/Documentation/hwmon/coretemp
> @@ -43,6 +43,12 @@ tempX_crit_alarm - Set when Out-of-spec bit is set, never clears.
>  tempX_label	 - Contains string "Core X", where X is processor
>  		   number. For Package temp, this will be "Physical id Y",
>  		   where Y is the package number.
> +energyX_input	 - Package energy consumption since driver was loaded
> +powerX_cap	 - Package power limit
> +powerX_cap_max	 - Maximum value for powerX_cap
> +powerX_cap_min	 - Minimum value for powerX_cap
> +powerX_input	 - Package power consumption
> +powerX_max	 - Maximum package power
>  
>  On CPU models which support it, TjMax is read from a model-specific register.
>  On other models, it is set to an arbitrary value based on weak heuristics.
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index 02ac401..63d6cff 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -54,7 +54,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
>  #define NUM_REAL_CORES		32	/* Number of Real cores per cpu */
>  #define CORETEMP_NAME_LENGTH	17	/* String Length of attrs */
>  #define MAX_CORE_ATTRS		4	/* Maximum no of basic attrs */
> -#define TOTAL_ATTRS		(MAX_CORE_ATTRS + 1)
> +#define TOTAL_ATTRS		(MAX_CORE_ATTRS + 9)
>  #define MAX_CORE_DATA		(NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>  
>  #define TO_PHYS_ID(cpu)		(cpu_data(cpu).phys_proc_id)
> @@ -79,6 +79,15 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
>   * @is_pkg_data: If this is true, the core_data holds pkgtemp data.
>   *		Otherwise, core_data holds coretemp data.
>   * @valid: If this is true, the current temperature is valid.
> + * @has_rapl:		true if the CPU supports RAPL (power measurement)
> + * @rapl_power_units:	Units of power as reported by the chip
> + * @rapl_energy_units:	Units of energy as reported by the chip
> + * @rapl_energy_raw:	Most recent energy measurement (raw)
> + * @rapl_energy:	cumulative energy (mJ)
> + * @rapl_power:		current power usage (mW)
> + * @rapl_power_max:	maximum power (mW) as reported by the chip
> + * @rapl_power_cap_min:	minimum power limit (mW) as reported by the chip
> + * @rapl_power_cap_max:	maximum power limit (mW) as reported by the chip
>   */
>  struct core_data {
>  	int temp;
> @@ -94,6 +103,17 @@ struct core_data {
>  	struct sensor_device_attribute sd_attrs[TOTAL_ATTRS];
>  	char attr_name[TOTAL_ATTRS][CORETEMP_NAME_LENGTH];
>  	struct mutex update_lock;
> +	/* power values */
> +	bool has_rapl;
> +	u32 rapl_power_units;
> +	u32 rapl_energy_units;
> +	u32 rapl_energy_raw;
> +	u64 rapl_energy;
> +	u32 rapl_power;
> +	u32 rapl_power_max;
> +	u32 rapl_power_cap_min;
> +	u32 rapl_power_cap_max;
> +	struct delayed_work rapl_wq;
>  };
>  
>  /* Platform Data per Physical CPU */
> @@ -132,6 +152,26 @@ static ssize_t show_label(struct device *dev,
>  	return sprintf(buf, "Core %u\n", tdata->cpu_core_id);
>  }
>  
> +static ssize_t show_power_label(struct device *dev,
> +				struct device_attribute *devattr, char *buf)
> +{
> +	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> +	struct platform_data *pdata = dev_get_drvdata(dev);
> +	struct core_data *tdata = pdata->core_data[attr->index];
> +
> +	return sprintf(buf, "Pkg %u power\n", tdata->cpu_core_id);
> +}
> +
> +static ssize_t show_energy_label(struct device *dev,
> +				 struct device_attribute *devattr, char *buf)
> +{
> +	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> +	struct platform_data *pdata = dev_get_drvdata(dev);
> +	struct core_data *tdata = pdata->core_data[attr->index];
> +
> +	return sprintf(buf, "Pkg %u energy\n", tdata->cpu_core_id);
> +}
> +
>  static ssize_t show_crit_alarm(struct device *dev,
>  				struct device_attribute *devattr, char *buf)
>  {
> @@ -190,6 +230,73 @@ static ssize_t show_temp(struct device *dev,
>  	return tdata->valid ? sprintf(buf, "%d\n", tdata->temp) : -EAGAIN;
>  }
>  
> +static ssize_t show_power(struct device *dev,
> +			  struct device_attribute *devattr, char *buf)
> +{
> +	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> +	struct platform_data *pdata = dev_get_drvdata(dev);
> +
> +	return sprintf(buf, "%u\n",
> +		       pdata->core_data[attr->index]->rapl_power * 1000);
> +}
> +
> +static ssize_t show_power_max(struct device *dev,
> +			      struct device_attribute *devattr, char *buf)
> +{
> +	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> +	struct platform_data *pdata = dev_get_drvdata(dev);
> +	struct core_data *tdata = pdata->core_data[attr->index];
> +
> +	return sprintf(buf, "%u\n", tdata->rapl_power_max * 1000);
> +}
> +
> +static ssize_t show_power_cap(struct device *dev,
> +			      struct device_attribute *devattr, char *buf)
> +{
> +	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> +	struct platform_data *pdata = dev_get_drvdata(dev);
> +	struct core_data *tdata = pdata->core_data[attr->index];
> +	u32 cap1, cap2, eax, edx;
> +	u64 cap;
> +
> +	rdmsr_on_cpu(tdata->cpu, MSR_PKG_POWER_LIMIT, &eax, &edx);
> +	cap1 = (eax & 0x8000) ? (eax & 0x7fff) : 0;
> +	cap2 = (edx & 0x8000) ? (edx & 0x7fff) : 0;
> +	cap = max(cap1, cap2) * 1000000LL / (1 << tdata->rapl_power_units);
> +
> +	return sprintf(buf, "%llu\n", cap);
> +}
> +
> +static ssize_t show_power_cap_min(struct device *dev,
> +				  struct device_attribute *devattr, char *buf)
> +{
> +	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> +	struct platform_data *pdata = dev_get_drvdata(dev);
> +	struct core_data *tdata = pdata->core_data[attr->index];
> +
> +	return sprintf(buf, "%u\n", tdata->rapl_power_cap_min * 1000);
> +}
> +
> +static ssize_t show_power_cap_max(struct device *dev,
> +				  struct device_attribute *devattr, char *buf)
> +{
> +	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> +	struct platform_data *pdata = dev_get_drvdata(dev);
> +	struct core_data *tdata = pdata->core_data[attr->index];
> +
> +	return sprintf(buf, "%u\n", tdata->rapl_power_cap_max * 1000);
> +}
> +
> +static ssize_t show_energy(struct device *dev,
> +			   struct device_attribute *devattr, char *buf)
> +{
> +	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> +	struct platform_data *pdata = dev_get_drvdata(dev);
> +	struct core_data *tdata = pdata->core_data[attr->index];
> +
> +	return sprintf(buf, "%llu\n", tdata->rapl_energy * 1000ULL);
> +}
> +
>  struct tjmax {
>  	char const *id;
>  	int tjmax;
> @@ -374,11 +481,17 @@ static int __cpuinit create_core_attrs(struct core_data *tdata,
>  	static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev,
>  			struct device_attribute *devattr, char *buf) = {
>  			show_label, show_crit_alarm, show_temp, show_tjmax,
> -			show_ttarget };
> +			show_ttarget, show_power_label, show_power,
> +			show_power_max, show_power_cap, show_power_cap_min,
> +			show_power_cap_max, show_energy_label, show_energy };
>  	static const char *const names[TOTAL_ATTRS] = {
>  					"temp%d_label", "temp%d_crit_alarm",
>  					"temp%d_input", "temp%d_crit",
> -					"temp%d_max" };
> +					"temp%d_max", "power%d_label",
> +					"power%d_input", "power%d_max",
> +					"power%d_cap", "power%d_cap_min",
> +					"power%d_cap_max", "energy%d_label",
> +					"energy%d_input" };
>  
>  	for (i = 0; i < tdata->attr_size; i++) {
>  		snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, names[i],
> @@ -454,6 +567,70 @@ static struct core_data __cpuinit *init_core_data(unsigned int cpu,
>  	return tdata;
>  }
>  
> +static u32 coretemp_delta_wrap(u32 new, u32 old)
> +{
> +	if (new > old)
> +		return new - old;
> +	return 0x100000000LL + new - old;
> +}
> +
> +static void coretemp_rapl_work(struct work_struct *work)
> +{
> +	struct core_data *tdata = container_of(work, struct core_data,
> +					       rapl_wq.work);
> +	u32 eax, edx;
> +	u32 delta;
> +	u32 power;
> +
> +	rdmsr_on_cpu(tdata->cpu, MSR_PKG_ENERGY_STATUS, &eax, &edx);
> +	delta = coretemp_delta_wrap(eax, tdata->rapl_energy_raw);
> +	tdata->rapl_energy_raw = eax;
> +
> +	power = delta * 1000LL / (1 << tdata->rapl_energy_units);
> +	tdata->rapl_power = power;
> +	tdata->rapl_energy += power;
> +
> +	pr_info("cpu %d: energy=%llu mJ power=%u mW\n",
> +		tdata->cpu,
> +		tdata->rapl_energy, tdata->rapl_power);
> +
> +	schedule_delayed_work(&tdata->rapl_wq, HZ);
> +}
> +
> +static void coretemp_init_rapl(struct platform_device *pdev,
> +			       int cpu, struct core_data *tdata)
> +{
> +	u32 eax, edx;
> +	int err;
> +
> +	/* Test if we can access rapl registers */
> +	err = rdmsr_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &eax, &edx);
> +	if (err)
> +		return;
> +
> +	tdata->rapl_power_units = eax & 0x000f;
> +	tdata->rapl_energy_units = (eax >> 8) & 0x001f;
> +
> +	err = rdmsr_safe_on_cpu(cpu, MSR_PKG_POWER_INFO, &eax, &edx);
> +	if (err)
> +		return;
> +
> +	tdata->rapl_power_cap_min =
> +	  ((eax >> 16) & 0x7fff) * 1000 / (1 << tdata->rapl_power_units);
> +
> +	tdata->rapl_power_cap_max = tdata->rapl_power_max =
> +	  (edx & 0x7fff) * 1000 / (1 << tdata->rapl_power_units);
> +
> +	rdmsr_on_cpu(tdata->cpu, MSR_PKG_ENERGY_STATUS, &eax, &edx);
> +	tdata->rapl_energy_raw = eax;
> +	tdata->rapl_energy = eax * 1000LL / (1 << tdata->rapl_energy_units);
> +
> +	INIT_DELAYED_WORK(&tdata->rapl_wq, coretemp_rapl_work);
> +
> +	tdata->has_rapl = true;
> +	tdata->attr_size += 8;
> +}
> +
>  static int __cpuinit create_core_data(struct platform_device *pdev,
>  				unsigned int cpu, bool pkg_flag)
>  {
> @@ -511,6 +688,9 @@ static int __cpuinit create_core_data(struct platform_device *pdev,
>  		}
>  	}
>  
> +	if (pkg_flag)
> +		coretemp_init_rapl(pdev, cpu, tdata);
> +
>  	pdata->core_data[attr_no] = tdata;
>  
>  	/* Create sysfs interfaces */
> @@ -518,6 +698,9 @@ static int __cpuinit create_core_data(struct platform_device *pdev,
>  	if (err)
>  		goto exit_free;
>  
> +	if (tdata->has_rapl)
> +		schedule_delayed_work(&tdata->rapl_wq, HZ);
> +
>  	return 0;
>  exit_free:
>  	pdata->core_data[attr_no] = NULL;
> @@ -544,6 +727,9 @@ static void coretemp_remove_core(struct platform_data *pdata,
>  	int i;
>  	struct core_data *tdata = pdata->core_data[indx];
>  
> +	if (tdata->has_rapl)
> +		cancel_delayed_work(&tdata->rapl_wq);
> +
>  	/* Remove the sysfs attributes */
>  	for (i = 0; i < tdata->attr_size; i++)
>  		device_remove_file(dev, &tdata->sd_attrs[i].dev_attr);
> -- 
> 1.7.9.7
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux