On Mon, Jul 1, 2013 at 4:30 PM, Andy Lutomirski <luto@xxxxxxxxxxxxxx> wrote: > Sandy Bridge Xeon and Extreme chips have integrated memory controllers > with (rather limited) onboard SMBUS masters. This driver gives access > to the bus. Ugh! I think that this is related to erratum BT109 in the Xeon E5 spec update. (But then the erratum description is wrong.) Any Intel engineers want to comment? The workaround is going to be unpleasant. I wonder if there's some clean way to temporarily disable package c-states. Otherwise the driver may have to execute in a workqueue on the target package and spin or otherwise disable idle to keep the package awake. --Andy > > Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx> > --- > > This is buggy as noted in the series description. Help would be much > appreciated! > > drivers/i2c/busses/Kconfig | 14 ++ > drivers/i2c/busses/Makefile | 1 + > drivers/i2c/busses/i2c-imc.c | 462 +++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 477 insertions(+) > create mode 100644 drivers/i2c/busses/i2c-imc.c > > diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig > index adfee98..91fd632 100644 > --- a/drivers/i2c/busses/Kconfig > +++ b/drivers/i2c/busses/Kconfig > @@ -133,6 +133,20 @@ config I2C_ISMT > This driver can also be built as a module. If so, the module will be > called i2c-ismt. > > +config I2C_IMC > + tristate "Intel iMC (LGA 2011) SMBus Controller" > + depends on PCI && X86 > + help > + If you say yes to this option, support will be included for the Intel > + Integrated Memory Controller SMBus host controller interface. This > + controller is found on LGA 2011 Xeons and Core i7 Extremes. > + > + It is possibly, although unlikely, that the use of this driver will > + interfere with your platform's RAM thermal management. > + > + This driver can also be built as a module. If so, the module will be > + called i2c-imc. > + > config I2C_PIIX4 > tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)" > depends on PCI > diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile > index 8f4fc23..de46683 100644 > --- a/drivers/i2c/busses/Makefile > +++ b/drivers/i2c/busses/Makefile > @@ -15,6 +15,7 @@ obj-$(CONFIG_I2C_AMD8111) += i2c-amd8111.o > obj-$(CONFIG_I2C_I801) += i2c-i801.o > obj-$(CONFIG_I2C_ISCH) += i2c-isch.o > obj-$(CONFIG_I2C_ISMT) += i2c-ismt.o > +obj-$(CONFIG_I2C_IMC) += i2c-imc.o > obj-$(CONFIG_I2C_NFORCE2) += i2c-nforce2.o > obj-$(CONFIG_I2C_NFORCE2_S4985) += i2c-nforce2-s4985.o > obj-$(CONFIG_I2C_PIIX4) += i2c-piix4.o > diff --git a/drivers/i2c/busses/i2c-imc.c b/drivers/i2c/busses/i2c-imc.c > new file mode 100644 > index 0000000..47ec903 > --- /dev/null > +++ b/drivers/i2c/busses/i2c-imc.c > @@ -0,0 +1,462 @@ > +/* > + Copyright (c) 2013 Andrew Lutomirski <luto@xxxxxxxxxxxxxx> > + > + This program is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License version 2 > + as published by the Free Software Foundation. > + > + This program is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with this program; if not, write to the Free Software > + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > +*/ > + > +#include <linux/module.h> > +#include <linux/pci.h> > +#include <linux/kernel.h> > +#include <linux/delay.h> > +#include <linux/i2c.h> > + > +/* > + * The datasheet can be found here, for example: > + * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-1600-2600-vol-2-datasheet.pdf > + */ > + > +/* Register offsets (in PCI configuration space) */ > +#define SMBSTAT(i) (0x180 + 0x10*i) > +#define SMBCMD(i) (0x184 + 0x10*i) > +#define SMBCNTL(i) (0x188 + 0x10*i) > +#define SMB_TSOD_POLL_RATE_CNTR(i) (0x18C + 0x10*i) > +#define SMB_TSOD_POLL_RATE (0x1A8) > + > +/* SMBSTAT fields */ > +#define SMBSTAT_RDO (1U << 31) /* Read Data Valid */ > +#define SMBSTAT_WOD (1U << 30) /* Write Operation Done */ > +#define SMBSTAT_SBE (1U << 29) /* SMBus Error */ > +#define SMBSTAT_SMB_BUSY (1U << 28) /* SMBus Busy State */ > +/* 26:24 is the last automatically polled TSOD address */ > +#define SMBSTAT_RDATA_MASK 0xffff /* result of a read */ > + > +/* SMBCMD fields */ > +#define SMBCMD_TRIGGER (1U << 31) /* CMD Trigger */ > +#define SMBCMD_PNTR_SEL (1U << 30) /* HW polls TSOD with pointer */ > +#define SMBCMD_WORD_ACCESS (1U << 29) /* word (vs byte) access */ > +#define SMBCMD_TYPE_MASK (3U << 27) /* Mask for access type */ > +#define SMBCMD_TYPE_READ (0U << 27) /* Read */ > +#define SMBCMD_TYPE_WRITE (1U << 27) /* Write */ > +#define SMBCMD_TYPE_PNTR_WRITE (3U << 27) /* Write to pointer */ > +#define SMBCMD_SA_MASK (7U << 24) /* Slave Address high bits */ > +#define SMBCMD_SA_SHIFT 24 > +#define SMBCMD_BA_MASK 0xff0000 /* Bus Txn address */ > +#define SMBCMD_BA_SHIFT 16 > +#define SMBCMD_WDATA_MASK 0xffff /* data to write */ > + > +/* SMBCNTL fields */ > +#define SMBCNTL_DTI_MASK 0xf0000000 /* Slave Address low bits */ > +#define SMBCNTL_DTI_SHIFT 28 /* Slave Address low bits */ > +#define SMBCNTL_CKOVRD (1U << 27) /* # Clock Override */ > +#define SMBCNTL_DIS_WRT (1U << 26) /* Disable Write (sadly) */ > +#define SMBCNTL_SOFT_RST (1U << 10) /* Soft Reset */ > +#define SMBCNTL_TSOD_POLL_EN (1U << 8) /* TSOD Polling Enable */ > +/* Bits 0-3 and 4-6 indicate TSOD presence in various slots */ > + > +/* System Address Controller, PCI dev 13 fn 6, 8086.3cf5 */ > +#define SAD_CONTROL 0xf4 > + > +/* > + * The clock is around 100kHz, and transactions are nine cycles per byte > + * plus a few start/stop cycles, plus whatever clock streching is involved. > + * This is a guess at the polling interval. > + */ > + > +#define TXN_LEN_US (20 * 10) > + > +#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */ > +#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */ > + > +struct imc_channel { > + struct i2c_adapter adapter; > + struct mutex mutex; > + bool can_write, suspended; > +}; > + > +struct imc_priv { > + struct pci_dev *pci_dev; > + struct imc_channel channels[2]; > +}; > + > +static void imc_channel_release(struct imc_priv *priv, int chan) > +{ > + /* Return to HW control. */ > + u32 cntl; > + pci_read_config_dword(priv->pci_dev, SMBCNTL(chan), &cntl); > + cntl |= SMBCNTL_TSOD_POLL_EN; > + pci_write_config_dword(priv->pci_dev, SMBCNTL(chan), cntl); > +} > + > +static int imc_channel_claim(struct imc_priv *priv, int chan) > +{ > + /* > + * The docs are a bit confused here. We're supposed to disable TSOD > + * polling, then wait for busy to be cleared, then set > + * SMBCNTL_TSOD_POLL_EN to zero to switch to software control. But > + * SMBCNTL_TSOD_POLL_EN is the only documented way to turn off polling. > + */ > + > + u32 cntl, stat; > + int i; > + > + if (priv->channels[chan].suspended) > + return -EIO; > + > + pci_read_config_dword(priv->pci_dev, SMBCNTL(chan), &cntl); > + cntl &= ~SMBCNTL_TSOD_POLL_EN; > + pci_write_config_dword(priv->pci_dev, SMBCNTL(chan), cntl); > + > + for (i = 0; i < 20; i++) { > + pci_read_config_dword(priv->pci_dev, SMBSTAT(chan), &stat); > + if (!(stat & SMBSTAT_SMB_BUSY)) > + return 0; /* The channel is ours. */ > + usleep_range(TXN_LEN_US, 3*TXN_LEN_US); > + } > + > + /* We failed to take control of the channel. Return to HW control. */ > + imc_channel_release(priv, chan); > + return -EBUSY; > +} > + > +/* > + * The iMC supports five access types. The terminology is rather > + * inconsistent. These are the types: > + * > + * "Write to pointer register SMBus": I2C_SMBUS_WRITE, I2C_SMBUS_BYTE > + * > + * Read byte/word: I2C_SMBUS_READ, I2C_SMBUS_{BYTE|WORD}_DATA > + * > + * Write byte/word: I2C_SMBUS_WRITE, I2C_SMBUS_{BYTE|WORD}_DATA > + * > + * The pointer write operations is AFAICT completely useless for > + * software control, for two reasons. First, HW periodically polls any > + * TSODs on the bus, so it will corrupt the pointer in between SW > + * transactions. More importantly, the matching "read byte"/"receive > + * byte" (the address-less single-byte read) is not available for SW > + * control. Therefore, this driver doesn't implement pointer writes > + * > + * There is no PEC support. > + */ > + > +static u32 imc_func(struct i2c_adapter *adapter) > +{ > + int chan; > + struct imc_channel *ch; > + struct imc_priv *priv = i2c_get_adapdata(adapter); > + > + chan = (adapter == &priv->channels[0].adapter ? 0 : 1); > + ch = &priv->channels[chan]; > + > + if (ch->can_write) > + return I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA; > + else > + return I2C_FUNC_SMBUS_READ_BYTE_DATA | > + I2C_FUNC_SMBUS_READ_WORD_DATA; > +} > + > +static s32 imc_smbus_xfer(struct i2c_adapter *adap, u16 addr, > + unsigned short flags, char read_write, u8 command, > + int size, union i2c_smbus_data *data) > +{ > + int i, ret, chan; > + u32 tmp, cmdbits = 0, cntlbits = 0, stat; > + struct imc_channel *ch; > + struct imc_priv *priv = i2c_get_adapdata(adap); > + > + chan = (adap == &priv->channels[0].adapter ? 0 : 1); > + ch = &priv->channels[chan]; > + > + if (addr > 0x7f) > + return -EOPNOTSUPP; /* No large address support */ > + if (flags) > + return -EOPNOTSUPP; /* No PEC */ > + > + cmdbits |= ((u32)addr & 0x7) << SMBCMD_SA_SHIFT; > + cntlbits |= ((u32)addr >> 3) << SMBCNTL_DTI_SHIFT; > + > + switch (size) { > + case I2C_SMBUS_BYTE_DATA: > + cmdbits |= ((u32)command) << SMBCMD_BA_SHIFT; > + if (read_write == I2C_SMBUS_READ) > + cmdbits |= SMBCMD_TYPE_READ; > + else > + cmdbits |= SMBCMD_TYPE_WRITE | data->byte; > + break; > + case I2C_SMBUS_WORD_DATA: > + cmdbits |= ((u32)command) << SMBCMD_BA_SHIFT; > + cmdbits |= SMBCMD_WORD_ACCESS; > + if (read_write == I2C_SMBUS_READ) > + cmdbits |= SMBCMD_TYPE_READ; > + else > + cmdbits |= SMBCMD_TYPE_WRITE | cpu_to_be16(data->word); > + break; > + default: > + return -EOPNOTSUPP; > + } > + > + mutex_lock(&ch->mutex); > + > + ret = imc_channel_claim(priv, chan); > + if (ret) > + goto out_unlock; > + > + pci_read_config_dword(priv->pci_dev, SMBCNTL(chan), &tmp); > + tmp &= ~SMBCNTL_DTI_MASK; > + tmp |= cntlbits; > + pci_write_config_dword(priv->pci_dev, SMBCNTL(chan), tmp); > + > + /* > + * This clears SMBCMD_PNTR_SEL. We leave it cleared so that we don't > + * need to think about keeping the TSOD pointer state consistent with > + * the hardware's expectation. This probably has some miniscule > + * power cost, as TSOD polls will take 9 extra cycles. > + */ > + cmdbits |= SMBCMD_TRIGGER; > + pci_write_config_dword(priv->pci_dev, SMBCMD(chan), cmdbits); > + > + for (i = 0; ; i++) { > + pci_read_config_dword(priv->pci_dev, SMBSTAT(chan), &stat); > + if (!(stat & SMBSTAT_SMB_BUSY)) > + break; > + if (i < 50) { > + usleep_range(TXN_LEN_US, 3*TXN_LEN_US); > + continue; > + } > + > + /* Timeout. TODO: Reset the controller. */ > + ret = -EIO; > + dev_err(&priv->pci_dev->dev, "controller is wedged\n"); > + goto out_release; > + } > + > + if (stat & SMBSTAT_SBE) { > + /* > + * Clear the error to re-enable TSOD polling. The docs say > + * that, as long as SBE is set, TSOD polling won't happen. > + * The docs also say that writing zero to this bit (which is > + * the only writable bit in the whole register) will clear > + * the error. Empirically, writing 0 does not clear SBE, but > + * it's probably still good to do the write in compliance with > + * the spec. (TSOD polling still happens and seems to > + * clear SBE on its own.) > + */ > + pci_write_config_dword(priv->pci_dev, SMBSTAT(chan), 0); > + ret = -ENXIO; > + goto out_release; > + } > + > + if (read_write == I2C_SMBUS_READ) { > + if (stat & SMBSTAT_RDO) { > + /* > + * Note: the controller will often (always?) get > + * WOD here. This is probably a bug. > + */ > + if (size == I2C_SMBUS_WORD_DATA) > + data->word = > + be16_to_cpu(stat & SMBSTAT_RDATA_MASK); > + else > + data->byte = stat & 0xFF; > + ret = 0; > + } else { > + dev_err(&priv->pci_dev->dev, > + "Unexpected read status 0x%08X\n", stat); > + ret = -EIO; > + } > + } else { > + if ((stat & (SMBSTAT_RDO | SMBSTAT_WOD)) == SMBSTAT_WOD) { > + ret = 0; > + } else { > + dev_err(&priv->pci_dev->dev, > + "Unexpected write status 0x%08X\n", stat); > + ret = -EIO; > + } > + } > + > +out_release: > + imc_channel_release(priv, chan); > + > +out_unlock: > + mutex_unlock(&ch->mutex); > + > + return ret; > +} > + > +static const struct i2c_algorithm imc_smbus_algorithm = { > + .smbus_xfer = imc_smbus_xfer, > + .functionality = imc_func, > +}; > + > +/* i2c_new_device? */ > + > +static int imc_init_channel(struct imc_priv *priv, int i, int socket) > +{ > + int err; > + u32 val; > + struct imc_channel *ch = &priv->channels[i]; > + > + i2c_set_adapdata(&ch->adapter, priv); > + ch->adapter.owner = THIS_MODULE; > + ch->adapter.class = I2C_CLASS_SPD; /* Don't probe for sensors. */ > + ch->adapter.algo = &imc_smbus_algorithm; > + ch->adapter.dev.parent = &priv->pci_dev->dev; > + > + pci_read_config_dword(priv->pci_dev, SMBCNTL(i), &val); > + ch->can_write = !(val & SMBCNTL_DIS_WRT); > + > + /* > + * If an old-style driver like eeprom is already loaded, then > + * imc_smbus_xfer will be called immediately. We need to be > + * ready. > + */ > + mutex_init(&ch->mutex); > + > + snprintf(ch->adapter.name, sizeof(ch->adapter.name), > + "iMC SMBUS, socket %d (bus %x) channel %d", > + socket, priv->pci_dev->bus->number, i); > + err = i2c_add_adapter(&ch->adapter); > + if (err) { > + mutex_destroy(&ch->mutex); > + return err; > + } > + > + return 0; > +} > + > +static void imc_free_channel(struct imc_priv *priv, int i) > +{ > + struct imc_channel *ch = &priv->channels[i]; > + > + mutex_lock(&ch->mutex); > + i2c_del_adapter(&ch->adapter); > + mutex_unlock(&ch->mutex); > + mutex_destroy(&ch->mutex); > +} > + > +static int imc_probe(struct pci_dev *dev, const struct pci_device_id *id) > +{ > + int i, err; > + struct imc_priv *priv; > + struct pci_dev *sad; /* System Address Decoder */ > + u32 sad_control; > + > + /* Paranoia: the datasheet says this is always at 15.0 */ > + if (dev->devfn != PCI_DEVFN(15, 0)) > + return -ENODEV; > + > + /* > + * The socket number is hidden away on a different PCI device. > + * There's another copy at devfn 11.0 offset 0x40, and an even > + * less convincing copy at 5.0 0x140. The actual APICID register > + * is "not used ... and is still implemented in hardware because > + * of FUD". > + * > + * In principle we could double-check that the socket matches > + * the numa_node from SRAT, but this is probably not worth it. > + */ > + sad = pci_get_slot(dev->bus, PCI_DEVFN(13, 6)); > + if (!sad) > + return -ENODEV; > + if (sad->vendor != PCI_VENDOR_ID_INTEL || > + sad->device != PCI_DEVICE_ID_INTEL_SBRIDGE_BR) { > + pci_dev_put(sad); > + return -ENODEV; > + } > + pci_read_config_dword(sad, SAD_CONTROL, &sad_control); > + pci_dev_put(sad); > + > + priv = kzalloc(sizeof(*priv), GFP_KERNEL); > + if (!priv) > + return -ENOMEM; > + priv->pci_dev = dev; > + > + pci_set_drvdata(dev, priv); > + > + for (i = 0; i < 2; i++) { > + int j; > + err = imc_init_channel(priv, i, sad_control & 0x7); > + if (err) { > + for (j = 0; j < i; j++) > + imc_free_channel(priv, j); > + goto exit_free; > + } > + } > + > + return 0; > + > +exit_free: > + kfree(priv); > + return err; > +} > + > +static void imc_remove(struct pci_dev *dev) > +{ > + int i; > + struct imc_priv *priv = pci_get_drvdata(dev); > + > + for (i = 0; i < 2; i++) > + imc_free_channel(priv, i); > + > + kfree(priv); > +} > + > +static int imc_suspend(struct pci_dev *dev, pm_message_t mesg) > +{ > + int i; > + struct imc_priv *priv = pci_get_drvdata(dev); > + > + /* BIOS is in charge. We should finish any pending transaction */ > + for (i = 0; i < 2; i++) { > + mutex_lock(&priv->channels[i].mutex); > + priv->channels[i].suspended = true; > + mutex_unlock(&priv->channels[i].mutex); > + } > + > + return 0; > +} > + > +static int imc_resume(struct pci_dev *dev) > +{ > + int i; > + struct imc_priv *priv = pci_get_drvdata(dev); > + > + for (i = 0; i < 2; i++) { > + mutex_lock(&priv->channels[i].mutex); > + priv->channels[i].suspended = false; > + mutex_unlock(&priv->channels[i].mutex); > + } > + > + return 0; > +} > + > +static DEFINE_PCI_DEVICE_TABLE(imc_ids) = { > + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA) }, > + { 0, } > +}; > + > +MODULE_DEVICE_TABLE(pci, imc_ids); > + > +static struct pci_driver imc_pci_driver = { > + .name = "imc_smbus", > + .id_table = imc_ids, > + .probe = imc_probe, > + .remove = imc_remove, > + .suspend = imc_suspend, > + .resume = imc_resume, > +}; > + > +module_pci_driver(imc_pci_driver); > + > +MODULE_AUTHOR("Andrew Lutomirski <luto@xxxxxxxxxxxxxx>"); > +MODULE_DESCRIPTION("iMC SMBus driver"); > +MODULE_LICENSE("GPL"); > -- > 1.8.1.4 > -- Andy Lutomirski AMA Capital Management, LLC -- To unsubscribe from this list: send the line "unsubscribe linux-i2c" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html