Re: [PATCH v2 1/3] thermal: intel: intel_tcc: Add model checks for temperature registers

"Rafael J. Wysocki" <rafael@xxxxxxxxxx> · Tue, 30 Apr 2024 21:07:39 +0200

On Thu, Apr 25, 2024 at 7:06 PM Ricardo Neri
<ricardo.neri-calderon@xxxxxxxxxxxxxxx> wrote:
>
> The register MSR_TEMPERATURE_TARGET is not architectural. Its fields may be
> defined differently for each processor model. TCC_OFFSET is an example of
> such case.
>
> Despite being specified as architectural, the registers IA32_[PACKAGE]_
> THERM_STATUS have become model-specific: in recent processors, the
> digital temperature readout uses bits [23:16] whereas the Intel Software
> Developer's manual specifies bits [22:16].
>
> Create an array of processor models and their bitmasks for TCC_OFFSET and
> the digital temperature readout fields. Do not include recent processors.
> Instead, use the bitmasks of these recent processors as default.
>
> Use these model-specific bitmasks when reading TCC_OFFSET or the
> temperature sensors.
>
> Initialize a model-specific data structure during subsys_initcall() to
> have it ready when thermal drivers are loaded.
>
> Expose the new interface intel_tcc_get_offset_mask(). The
> intel_tcc_cooling driver will use it.
>
> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@xxxxxxxxxxxxxxx>
> ---
> Cc: Daniel Lezcano <daniel.lezcano@xxxxxxxxxx>
> Cc: Lukasz Luba <lukasz.luba@xxxxxxx>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@xxxxxxxxxxxxxxx>
> Cc: linux-hwmon@xxxxxxxxxxxxxxx
> Cc: linux-pm@xxxxxxxxxxxxxxx
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> Cc: stable@xxxxxxxxxxxxxxx # v6.7+
> ---
> Changes since v1:
>  * Renamed TCC_FAM6_MODEL_TEMP_MASKS as TCC_MODEL_TEMP_MASKS. (Rui)
>  * Renamed get_tcc_offset_mask() as intel_tcc_get_offset_mask(). (Rui)
>  * Do not export intel_tcc_get_temp_mask() as it is no longer used
>    outside intel_tcc.c
>  * Dropped stub functions for digital temperature readout and TCC
>    offset. They are not needed as users select CONFIG_INTEL_TCC.
> ---
>  drivers/thermal/intel/intel_tcc.c | 177 +++++++++++++++++++++++++++++-
>  include/linux/intel_tcc.h         |   1 +
>  2 files changed, 173 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c
> index 5e8b7f34b395..9943c43c06df 100644
> --- a/drivers/thermal/intel/intel_tcc.c
> +++ b/drivers/thermal/intel/intel_tcc.c
> @@ -6,8 +6,170 @@
>
>  #include <linux/errno.h>
>  #include <linux/intel_tcc.h>
> +#include <asm/cpu_device_id.h>
> +#include <asm/intel-family.h>
>  #include <asm/msr.h>
>
> +/**
> + * struct temp_masks - Bitmasks for temperature readings
> + * @tcc_offset:                        TCC offset in MSR_TEMPERATURE_TARGET
> + * @digital_readout:           Digital readout in MSR_IA32_THERM_STATUS
> + * @pkg_digital_readout:       Digital readout in MSR_IA32_PACKAGE_THERM_STATUS
> + *
> + * Bitmasks to extract the fields of the MSR_TEMPERATURE and IA32_[PACKAGE]_
> + * THERM_STATUS registers for different processor models.
> + *
> + * The bitmask of TjMax is not included in this structure. It is always 0xff.
> + */
> +struct temp_masks {
> +       u32 tcc_offset;
> +       u32 digital_readout;
> +       u32 pkg_digital_readout;
> +};
> +
> +#define TCC_MODEL_TEMP_MASKS(model, _tcc_offset, _digital_readout,     \
> +                            _pkg_digital_readout)                      \
> +       static const struct temp_masks temp_##model __initconst = {     \
> +               .tcc_offset = _tcc_offset,                              \
> +               .digital_readout = _digital_readout,                    \
> +               .pkg_digital_readout = _pkg_digital_readout             \
> +       }
> +
> +TCC_MODEL_TEMP_MASKS(nehalem, 0, 0x7f, 0x7f);
> +TCC_MODEL_TEMP_MASKS(haswell_x, 0xf, 0x7f, 0x7f);
> +TCC_MODEL_TEMP_MASKS(broadwell, 0x3f, 0x7f, 0x7f);
> +TCC_MODEL_TEMP_MASKS(goldmont, 0x7f, 0x7f, 0x7f);
> +TCC_MODEL_TEMP_MASKS(tigerlake, 0x3f, 0xff, 0xff);
> +TCC_MODEL_TEMP_MASKS(sapphirerapids, 0x3f, 0x7f, 0xff);
> +
> +/* Use these masks for processors not included in @tcc_cpu_ids. */
> +static struct temp_masks intel_tcc_temp_masks __ro_after_init = {
> +       .tcc_offset = 0x7f,
> +       .digital_readout = 0xff,
> +       .pkg_digital_readout = 0xff,
> +};
> +
> +static const struct x86_cpu_id intel_tcc_cpu_ids[] __initconst = {
> +       X86_MATCH_INTEL_FAM6_MODEL(CORE_YONAH,          &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(CORE2_MEROM,         &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(CORE2_MEROM_L,       &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(CORE2_PENRYN,        &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(CORE2_DUNNINGTON,    &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,           &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &temp_haswell_x),
> +       X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &temp_haswell_x),
> +       X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &temp_haswell_x),
> +       X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &temp_haswell_x),
> +       X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &temp_haswell_x),
> +       X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L,        &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,        &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         &temp_tigerlake),
> +       X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &temp_tigerlake),
> +       X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &temp_sapphirerapids),
> +       X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X,     &temp_sapphirerapids),
> +       X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD,           &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &temp_tigerlake),
> +       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &temp_tigerlake),
> +       X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &temp_tigerlake),
> +       X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,        &temp_tigerlake),
> +       X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,        &temp_tigerlake),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,        &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,    &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,       &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL_MID,   &temp_nehalem),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,     &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,   &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,        &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_MID,    &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT_NP,     &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,       &temp_goldmont),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,     &temp_goldmont),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,  &temp_goldmont),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,        &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,      &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,      &temp_tigerlake),
> +       X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &temp_broadwell),
> +       X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &temp_broadwell),
> +       {}
> +};
> +
> +static int __init intel_tcc_init(void)
> +{
> +       const struct x86_cpu_id *id;
> +
> +       id = x86_match_cpu(intel_tcc_cpu_ids);
> +       if (id)
> +               memcpy(&intel_tcc_temp_masks, (const void *)id->driver_data,
> +                      sizeof(intel_tcc_temp_masks));
> +
> +       return 0;
> +}
> +/*
> + * Use subsys_initcall to ensure temperature bitmasks are initialized before
> + * the drivers that use this library.
> + */
> +subsys_initcall(intel_tcc_init);
> +
> +/**
> + * intel_tcc_get_offset_mask() - Returns the bitmask to read TCC offset
> + *
> + * Get the model-specific bitmask to extract TCC_OFFSET from the MSR
> + * TEMPERATURE_TARGET register. If the mask is 0, it means the processor does
> + * not support TCC offset.
> + *
> + * Return: The model-specific bitmask for TCC offset.
> + */
> +u32 intel_tcc_get_offset_mask(void)
> +{
> +       return intel_tcc_temp_masks.tcc_offset;
> +}
> +EXPORT_SYMBOL_NS(intel_tcc_get_offset_mask, INTEL_TCC);
> +
> +/**
> + * get_temp_mask() - Returns the model-specific bitmask for temperature
> + *
> + * @pkg: true: Package Thermal Sensor. false: Core Thermal Sensor.
> + *
> + * Get the model-specific bitmask to extract the temperature reading from the
> + * MSR_IA32_[PACKAGE]_THERM_STATUS register.
> + *
> + * Callers must check if the thermal status registers are supported.
> + *
> + * Return: The model-specific bitmask for temperature reading
> + */
> +static u32 get_temp_mask(bool pkg)
> +{
> +       return pkg ? intel_tcc_temp_masks.pkg_digital_readout :
> +              intel_tcc_temp_masks.digital_readout;
> +}
> +
>  /**
>   * intel_tcc_get_tjmax() - returns the default TCC activation Temperature
>   * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
> @@ -56,7 +218,7 @@ int intel_tcc_get_offset(int cpu)
>         if (err)
>                 return err;
>
> -       return (low >> 24) & 0x3f;
> +       return (low >> 24) & intel_tcc_temp_masks.tcc_offset;
>  }
>  EXPORT_SYMBOL_NS_GPL(intel_tcc_get_offset, INTEL_TCC);
>
> @@ -76,7 +238,10 @@ int intel_tcc_set_offset(int cpu, int offset)
>         u32 low, high;
>         int err;
>
> -       if (offset < 0 || offset > 0x3f)
> +       if (!intel_tcc_temp_masks.tcc_offset)
> +               return -ENODEV;
> +
> +       if (offset < 0 || offset > intel_tcc_temp_masks.tcc_offset)
>                 return -EINVAL;
>
>         if (cpu < 0)
> @@ -90,7 +255,7 @@ int intel_tcc_set_offset(int cpu, int offset)
>         if (low & BIT(31))
>                 return -EPERM;
>
> -       low &= ~(0x3f << 24);
> +       low &= ~(intel_tcc_temp_masks.tcc_offset << 24);
>         low |= offset << 24;
>
>         if (cpu < 0)
> @@ -113,8 +278,8 @@ EXPORT_SYMBOL_NS_GPL(intel_tcc_set_offset, INTEL_TCC);
>   */
>  int intel_tcc_get_temp(int cpu, int *temp, bool pkg)
>  {
> -       u32 low, high;
>         u32 msr = pkg ? MSR_IA32_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS;
> +       u32 low, high, mask;
>         int tjmax, err;
>
>         tjmax = intel_tcc_get_tjmax(cpu);
> @@ -132,7 +297,9 @@ int intel_tcc_get_temp(int cpu, int *temp, bool pkg)
>         if (!(low & BIT(31)))
>                 return -ENODATA;
>
> -       *temp = tjmax - ((low >> 16) & 0x7f);
> +       mask = get_temp_mask(pkg);
> +
> +       *temp = tjmax - ((low >> 16) & mask);
>
>         return 0;
>  }
> diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h
> index 8ff8eabb4a98..fa788817acfc 100644
> --- a/include/linux/intel_tcc.h
> +++ b/include/linux/intel_tcc.h
> @@ -14,5 +14,6 @@ int intel_tcc_get_tjmax(int cpu);
>  int intel_tcc_get_offset(int cpu);
>  int intel_tcc_set_offset(int cpu, int offset);
>  int intel_tcc_get_temp(int cpu, int *temp, bool pkg);
> +u32 intel_tcc_get_offset_mask(void);
>
>  #endif /* __INTEL_TCC_H__ */
> --

This clashes with the Tony Luck's rework of Intel CPU model defines
(see https://lore.kernel.org/lkml/20240430164913.73473-1-tony.luck@xxxxxxxxx/),
so I'd rather defer it until commit 2eda374e883a ("x86/mm: Switch to
new Intel CPU model defines") reaches the mainline and make it use the
new CPU model defines from the start.