Re: [PATCH v3] hwmon: (k10temp) Display up to eight sets of CCD temperatures

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sorry for the noise - please don't bother with this version.
I'll send v4 in a minute, with improved support for older EPYC CPUs.

Guenter

On Thu, Jan 23, 2020 at 02:04:22PM -0800, Guenter Roeck wrote:
> In HWiNFO, we see support for Tccd1, Tccd3, Tccd5, and Tccd7 temperature
> sensors on Zen2 based Threadripper CPUs. Checking register maps on
> Threadripper 3970X confirms SMN register addresses and values for those
> sensors.
> 
> Register values observed in an idle system:
> 
> 0x059950: 00000000 00000abc 00000000 00000ad8
> 0x059960: 00000000 00000ade 00000000 00000ae4
> 
> Under load:
> 
> 0x059950: 00000000 00000c02 00000000 00000c14
> 0x059960: 00000000 00000c30 00000000 00000c22
> 
> More analysis shows that EPYC CPUs support up to 8 CCD temperature
> sensors.
> 
> On top of that, in thm_10_0_sh_mask.h in the Linux kernel, we find
> definitions for THM_DIE{1-3}_TEMP__VALID_MASK, set to 0x00000800, as well
> as matching SMN addresses. This lets us conclude that bit 11 of the
> respective registers is a valid bit. With this assumption, the temperature
> offset is now 49 degrees C. This conveniently matches the documented
> temperature offset for Tdie, again suggesting that above registers indeed
> report temperatures sensor values. Assume that bit 11 is indeed a valid
> bit, and add support for the additional sensors.
> 
> With this patch applied, output from 3970X (idle) looks as follows:
> 
> k10temp-pci-00c3
> Adapter: PCI adapter
> Tdie:         +55.9°C
> Tctl:         +55.9°C
> Tccd1:        +39.8°C
> Tccd3:        +43.8°C
> Tccd5:        +43.8°C
> Tccd7:        +44.8°C
> 
> Tested-by: Michael Larabel <michael@xxxxxxxxxxxx>
> Signed-off-by: Guenter Roeck <linux@xxxxxxxxxxxx>
> ---
> v3: Increased number of supported CCD sensors to 8.
>     Dropped note that functionality on high-end CPUs is not known,
>     since that is no longer correct.
> 
> v2: Added additional sensors to k10temp_info[].
> 
>  drivers/hwmon/k10temp.c | 70 ++++++++++++++++++-----------------------
>  1 file changed, 31 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
> index 5e3f43594084..1634cb6394f0 100644
> --- a/drivers/hwmon/k10temp.c
> +++ b/drivers/hwmon/k10temp.c
> @@ -7,7 +7,7 @@
>   * Copyright (c) 2020 Guenter Roeck <linux@xxxxxxxxxxxx>
>   *
>   * Implementation notes:
> - * - CCD1 and CCD2 register address information as well as the calculation to
> + * - CCD register address information as well as the calculation to
>   *   convert raw register values is from https://github.com/ocerman/zenpower.
>   *   The information is not confirmed from chip datasheets, but experiments
>   *   suggest that it provides reasonable temperature values.
> @@ -18,11 +18,6 @@
>   *   normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC
>   *   current. Reported values can be adjusted using the sensors configuration
>   *   file.
> - * - It is unknown if the mechanism to read CCD1/CCD2 temperature as well as
> - *   current and voltage information works on higher-end Ryzen CPUs.
> - *   Information reported by Windows tools suggests that additional sensors
> - *   (both temperature and voltage/current) are supported, but their register
> - *   location is currently unknown.
>   */
>  
>  #include <linux/bitops.h>
> @@ -80,8 +75,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
>  
>  /* F17h M01h Access througn SMN */
>  #define F17H_M01H_REPORTED_TEMP_CTRL_OFFSET	0x00059800
> -#define F17H_M70H_CCD1_TEMP			0x00059954
> -#define F17H_M70H_CCD2_TEMP			0x00059958
> +
> +#define F17H_M70H_CCD_TEMP(x)			(0x00059954 + ((x) * 4))
> +#define F17H_M70H_CCD_TEMP_VALID		BIT(11)
> +#define F17H_M70H_CCD_TEMP_MASK			GENMASK(10, 0)
>  
>  #define F17H_M01H_SVI				0x0005A000
>  #define F17H_M01H_SVI_TEL_PLANE0		(F17H_M01H_SVI + 0xc)
> @@ -100,8 +97,7 @@ struct k10temp_data {
>  	int temp_offset;
>  	u32 temp_adjust_mask;
>  	bool show_tdie;
> -	bool show_tccd1;
> -	bool show_tccd2;
> +	u32 show_tccd;
>  	u32 svi_addr[2];
>  	bool show_current;
>  	int cfactor[2];
> @@ -188,6 +184,12 @@ const char *k10temp_temp_label[] = {
>  	"Tctl",
>  	"Tccd1",
>  	"Tccd2",
> +	"Tccd3",
> +	"Tccd4",
> +	"Tccd5",
> +	"Tccd6",
> +	"Tccd7",
> +	"Tccd8",
>  };
>  
>  const char *k10temp_in_label[] = {
> @@ -277,15 +279,10 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
>  			if (*val < 0)
>  				*val = 0;
>  			break;
> -		case 2:		/* Tccd1 */
> -			amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
> -				     F17H_M70H_CCD1_TEMP, &regval);
> -			*val = (regval & 0xfff) * 125 - 305000;
> -			break;
> -		case 3:		/* Tccd2 */
> +		case 2 ... 9:		/* Tccd{1-8} */
>  			amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
> -				     F17H_M70H_CCD2_TEMP, &regval);
> -			*val = (regval & 0xfff) * 125 - 305000;
> +				     F17H_M70H_CCD_TEMP(channel - 2), &regval);
> +			*val = (regval & F17H_M70H_CCD_TEMP_MASK) * 125 - 49000;
>  			break;
>  		default:
>  			return -EOPNOTSUPP;
> @@ -343,12 +340,8 @@ static umode_t k10temp_is_visible(const void *_data,
>  				if (!data->show_tdie)
>  					return 0;
>  				break;
> -			case 2:		/* Tccd1 */
> -				if (!data->show_tccd1)
> -					return 0;
> -				break;
> -			case 3:		/* Tccd2 */
> -				if (!data->show_tccd2)
> +			case 2 ... 9:		/* Tccd{1-8} */
> +				if (!(data->show_tccd & BIT(channel - 2)))
>  					return 0;
>  				break;
>  			default:
> @@ -382,12 +375,8 @@ static umode_t k10temp_is_visible(const void *_data,
>  			case 0:		/* Tdie */
>  			case 1:		/* Tctl */
>  				break;
> -			case 2:		/* Tccd1 */
> -				if (!data->show_tccd1)
> -					return 0;
> -				break;
> -			case 3:		/* Tccd2 */
> -				if (!data->show_tccd2)
> +			case 2 ... 9:		/* Tccd{1-8} */
> +				if (!(data->show_tccd & BIT(channel - 2)))
>  					return 0;
>  				break;
>  			default:
> @@ -520,6 +509,12 @@ static const struct hwmon_channel_info *k10temp_info[] = {
>  			   HWMON_T_LABEL,
>  			   HWMON_T_INPUT | HWMON_T_LABEL,
>  			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
> +			   HWMON_T_INPUT | HWMON_T_LABEL,
>  			   HWMON_T_INPUT | HWMON_T_LABEL),
>  	HWMON_CHANNEL_INFO(in,
>  			   HWMON_I_INPUT | HWMON_I_LABEL,
> @@ -595,15 +590,12 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  			data->cfactor[1] = CFACTOR_ISOC;
>  			data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE1;
>  			data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE0;
> -			amd_smn_read(amd_pci_dev_to_node_id(pdev),
> -				     F17H_M70H_CCD1_TEMP, &regval);
> -			if (regval & 0xfff)
> -				data->show_tccd1 = true;
> -
> -			amd_smn_read(amd_pci_dev_to_node_id(pdev),
> -				     F17H_M70H_CCD2_TEMP, &regval);
> -			if (regval & 0xfff)
> -				data->show_tccd2 = true;
> +			for (i = 0; i < 8; i++) {
> +				amd_smn_read(amd_pci_dev_to_node_id(pdev),
> +					     F17H_M70H_CCD_TEMP(i), &regval);
> +				if (regval & F17H_M70H_CCD_TEMP_VALID)
> +					data->show_tccd |= BIT(i);
> +			}
>  			break;
>  		}
>  	} else {
> -- 
> 2.17.1
> 



[Index of Archives]     [LM Sensors]     [Linux Sound]     [ALSA Users]     [ALSA Devel]     [Linux Audio Users]     [Linux Media]     [Kernel]     [Gimp]     [Yosemite News]     [Linux Media]

  Powered by Linux