On Sat, 2009-05-23 at 18:59 +0200, Rudolf Marek wrote: > Hi, > > Yes it works for most CPUs, but for unlucky users we might get complains from > them - and false alarms. > > I'm talking about: > > http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/41322.pdf > > Errata #319 > > So far all CPUs have the issue. > > Rudolf > Here is the patch which will prepare the report for Inaccurate Temperature Measurement to fix/suppress inaccurate Monitoring. [PATCH] x86: hwmon/k8temp.c Add support for AMD 10H and 11H Some AMD 10H cpus reports Inaccurate Temperature Measurement: http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/41322.pdf Errata #319 So Added report message to fix/suppress inaccurate Monitoring. Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput at gmail.com> --- drivers/hwmon/k8temp.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 files changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c index 1fe9951..1e9594a 100644 --- a/drivers/hwmon/k8temp.c +++ b/drivers/hwmon/k8temp.c @@ -1,5 +1,6 @@ /* * k8temp.c - Linux kernel module for hardware monitoring + * for AMD K8 and derivates * * Copyright (C) 2006 Rudolf Marek <r.marek at assembler.cz> * @@ -33,7 +34,7 @@ #include <linux/mutex.h> #include <asm/processor.h> -#define TEMP_FROM_REG(val) (((((val) >> 16) & 0xff) - 49) * 1000) +#define REG_TCTL 0xa4 #define REG_TEMP 0xe4 #define SEL_PLACE 0x40 #define SEL_CORE 0x04 @@ -52,6 +53,20 @@ struct k8temp_data { u32 temp_offset; }; +static const char report[] = "Please send below message to: " + "LKML <linux-kernel at vger.kernel.org>, " + "Rudolf Marek <r.marek at assembler.cz>, " + "Jaswinder Singh Rajput <jaswinder at kernel.org>" + "\n\"Invalid temperature:"; + +static unsigned long temp_from_reg(unsigned long val) +{ + if (boot_cpu_data.x86 > 0xf) + return ((val) >> 21) * 125; + else + return ((((val) >> 16) & 0xff) - 49) * 1000; +} + static struct k8temp_data *k8temp_update_device(struct device *dev) { struct k8temp_data *data = dev_get_drvdata(dev); @@ -62,6 +77,11 @@ static struct k8temp_data *k8temp_update_device(struct device *dev) if (!data->valid || time_after(jiffies, data->last_updated + HZ)) { + if (boot_cpu_data.x86 > 0xf) { + pci_read_config_dword(pdev, REG_TCTL, + &data->temp[0][0]); + goto update_done; + } pci_read_config_byte(pdev, REG_TEMP, &tmp); tmp &= ~(SEL_PLACE | SEL_CORE); /* Select sensor 0, core0 */ pci_write_config_byte(pdev, REG_TEMP, tmp); @@ -89,6 +109,7 @@ static struct k8temp_data *k8temp_update_device(struct device *dev) } } +update_done: data->last_updated = jiffies; data->valid = 1; } @@ -123,7 +144,14 @@ static ssize_t show_temp(struct device *dev, if (data->swap_core_select) core = core ? 0 : 1; - temp = TEMP_FROM_REG(data->temp[core][place]) + data->temp_offset; + temp = temp_from_reg(data->temp[core][place]) + data->temp_offset; + + /* Some AMD 10H cpus reports Inaccurate Temperature Measurement */ + if ((temp < 21000) || (temp > 90000)) + WARN_ONCE(1, "%s reg 0x%x temp %d for %d:%d:%d:%d\"\n", + report, data->temp[core][place], temp, + boot_cpu_data.x86_vendor, boot_cpu_data.x86, + boot_cpu_data.x86_model, boot_cpu_data.x86_mask); return sprintf(buf, "%d\n", temp); } @@ -138,6 +166,8 @@ static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); static struct pci_device_id k8temp_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, { 0 }, }; @@ -157,6 +187,9 @@ static int __devinit k8temp_probe(struct pci_dev *pdev, goto exit; } + if (boot_cpu_data.x86 > 0xf) + goto probe_done; + model = boot_cpu_data.x86_model; stepping = boot_cpu_data.x86_mask; @@ -226,6 +259,7 @@ static int __devinit k8temp_probe(struct pci_dev *pdev, data->sensorsp &= ~SEL_CORE; } +probe_done: data->name = "k8temp"; mutex_init(&data->update_lock); dev_set_drvdata(&pdev->dev, data); -- 1.6.0.6