On Tue, 2025-02-11 at 13:38 -0800, Sohil Mehta wrote: > On 2/11/2025 12:58 PM, Dave Hansen wrote: > > On 2/11/25 11:43, Sohil Mehta wrote: > > > + /* > > > + * Return without adjustment if the Family isn't 6. > > > + * The rest of the function assumes Family 6. > > > + */ > > > + if (c->x86 != 6) > > > + return tjmax; > > > > Shouldn't we be converting this over to the vfm matches? > > > > For drivers/, I mainly focused on fixes instead of cleanups. > > Converting drivers over to VFM checks is significant work. There are > a > lot of such comparisons and switch cases (probably more than 50) > across > drivers/cpufreq/ and drivers/hwmon/. > > Some of the functions might need significant refactoring and > rewrites. I > think someone with expertise in that particular driver should > probably > do it. I did start with it initially but it is beyond my bandwidth at > the moment. > I agree. adjust_tjmax() contains a list of quirks based on PCI- ID/x86_vendor_id/x86_model/x86_stepping. The common problem is that all the quirks are for Fam6 processors but the family id is not checked. So the fix is sufficient. In fact, I think it is better to move the check to the very beginning of adjust_tjmax(). Plus that, I do think we can have more cleanups on top 1. rename adjust_tjmax() to adjust_tjmax_for_fam6() 2. move all model specific quirks altogether and avoid model checks in the main functions. 3. for processors newer than fam6, the driver should fail to probe rather than using a hardcoded value when reading MSR_IA32_TEMPERATURE_TARGET fails. maybe I can start with something like below. --- drivers/hwmon/coretemp.c | 98 +++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 1aa67a2b5f18..fc2cf607aa36 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -99,6 +99,7 @@ struct platform_data { struct device_attribute name_attr; }; +/* Beginning of Model specific quirks */ struct tjmax_pci { unsigned int device; int tjmax; @@ -147,12 +148,11 @@ static const struct tjmax_model tjmax_model_table[] = { */ }; -static bool is_pkg_temp_data(struct temp_data *tdata) -{ - return tdata->index < 0; -} - -static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) +/* + * Adjust tjmax value for early Fam6 CPUs with unreadable MSR_IA32_TEMPERATURE_TARGET + * NOTE: the calculated value may not be correct. + */ +static int adjust_tjmax_for_fam6(struct cpuinfo_x86 *c, u32 id, struct device *dev) { /* The 100C is default for both mobile and non mobile CPUs */ @@ -163,8 +163,16 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) u32 eax, edx; int i; u16 devfn = PCI_DEVFN(0, 0); - struct pci_dev *host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn); + struct pci_dev *host_bridge; + + /* + * Return without adjustment if the Family isn't 6. + * The rest of the function assumes Family 6. + */ + if (c->x86 != 6) + return tjmax; + host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn); /* * Explicit tjmax table entries override heuristics. * First try PCI host bridge IDs, followed by model ID strings @@ -185,12 +193,6 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) return tjmax_table[i].tjmax; } - /* - * Return without adjustment if the Family isn't 6. - * The rest of the function assumes Family 6. - */ - if (c->x86 != 6) - return tjmax; for (i = 0; i < ARRAY_SIZE(tjmax_model_table); i++) { const struct tjmax_model *tm = &tjmax_model_table[i]; @@ -280,6 +282,37 @@ static bool cpu_has_tjmax(struct cpuinfo_x86 *c) model != 0x36); } +static bool cpu_has_ttarget(struct temp_data *tdata) +{ + struct cpuinfo_x86 *c = &cpu_data(tdata->cpu); + + /* + * The target temperature is available on older CPUs but not in the + * MSR_IA32_TEMPERATURE_TARGET register. Atoms don't have the register + * at all. + */ + if (c->x86 > 15 || (c->x86 == 6 && c->x86_model > 0xe && c->x86_model != 0x1c)) + return true; + return false; +} + +static bool cpu_has_broken_ucode(unsigned int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + + /* + * Check if we have problem with errata AE18 of Core processors: + * Readings might stop update when processor visited too deep sleep, + * fixed for stepping D0 (6EC). + */ + if (c->x86 == 6 && c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) { + pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n"); + return true; + } + return false; +} +/* End of Model specific quirks */ + static int get_tjmax(struct temp_data *tdata, struct device *dev) { struct cpuinfo_x86 *c = &cpu_data(tdata->cpu); @@ -312,9 +345,8 @@ static int get_tjmax(struct temp_data *tdata, struct device *dev) } else { /* * An assumption is made for early CPUs and unreadable MSR. - * NOTE: the calculated value may not be correct. */ - tdata->tjmax = adjust_tjmax(c, tdata->cpu, dev); + tdata->tjmax = adjust_tjmax_for_fam6(c, tdata->cpu, dev); } return tdata->tjmax; } @@ -324,6 +356,8 @@ static int get_ttarget(struct temp_data *tdata, struct device *dev) u32 eax, edx; int tjmax, ttarget_offset, ret; + if (!cpu_has_ttarget(tdata)) + return -ENODEV; /* * ttarget is valid only if tjmax can be retrieved from * MSR_IA32_TEMPERATURE_TARGET @@ -348,6 +382,11 @@ static int max_zones __read_mostly; /* Array of zone pointers. Serialized by cpu hotplug lock */ static struct platform_device **zone_devices; +static bool is_pkg_temp_data(struct temp_data *tdata) +{ + return tdata->index < 0; +} + static ssize_t show_label(struct device *dev, struct device_attribute *devattr, char *buf) { @@ -460,23 +499,6 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev) return sysfs_create_group(&dev->kobj, &tdata->attr_group); } - -static int chk_ucode_version(unsigned int cpu) -{ - struct cpuinfo_x86 *c = &cpu_data(cpu); - - /* - * Check if we have problem with errata AE18 of Core processors: - * Readings might stop update when processor visited too deep sleep, - * fixed for stepping D0 (6EC). - */ - if (c->x86 == 6 && c->x86_model == 0xe && c->x86_stepping < 0xc && c->microcode < 0x39) { - pr_err("Errata AE18 not fixed, update BIOS or microcode of the CPU!\n"); - return -ENODEV; - } - return 0; -} - static struct platform_device *coretemp_get_pdev(unsigned int cpu) { int id = topology_logical_die_id(cpu); @@ -585,14 +607,8 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, /* Make sure tdata->tjmax is a valid indicator for dynamic/static tjmax */ get_tjmax(tdata, &pdev->dev); - /* - * The target temperature is available on older CPUs but not in the - * MSR_IA32_TEMPERATURE_TARGET register. Atoms don't have the register - * at all. - */ - if (c->x86 > 15 || (c->x86 == 6 && c->x86_model > 0xe && c->x86_model != 0x1c)) - if (get_ttarget(tdata, &pdev->dev) >= 0) - tdata->attr_size++; + if (get_ttarget(tdata, &pdev->dev) >= 0) + tdata->attr_size++; /* Create sysfs interfaces */ err = create_core_attrs(tdata, pdata->hwmon_dev); @@ -696,7 +712,7 @@ static int coretemp_cpu_online(unsigned int cpu) struct device *hwmon; /* Check the microcode version of the CPU */ - if (chk_ucode_version(cpu)) + if (cpu_has_broken_ucode(cpu)) return -EINVAL; /* -- 2.43.0