On 12/16/2019 3:27 PM, Guenter Roeck wrote:
On Mon, Dec 16, 2019 at 02:17:34PM -0800, Jae Hyun Yoo wrote:
[...]
+static int get_dimm_temp(struct peci_dimmtemp *priv, int dimm_no)
+{
+ int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
+ int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
+ struct peci_rd_pci_cfg_local_msg rp_msg;
+ u8 cfg_data[4];
+ int ret;
+
+ if (!peci_sensor_need_update(&priv->temp[dimm_no]))
+ return 0;
+
+ ret = read_ddr_dimm_temp_config(priv, chan_rank, cfg_data);
+ if (ret)
+ return ret;
+
+ priv->temp[dimm_no].value = cfg_data[dimm_order] * 1000;
+
+ switch (priv->gen_info->model) {
+ case INTEL_FAM6_SKYLAKE_X:
+ rp_msg.addr = priv->mgr->client->addr;
+ rp_msg.bus = 2;
+ /*
+ * Device 10, Function 2: IMC 0 channel 0 -> rank 0
+ * Device 10, Function 6: IMC 0 channel 1 -> rank 1
+ * Device 11, Function 2: IMC 0 channel 2 -> rank 2
+ * Device 12, Function 2: IMC 1 channel 0 -> rank 3
+ * Device 12, Function 6: IMC 1 channel 1 -> rank 4
+ * Device 13, Function 2: IMC 1 channel 2 -> rank 5
+ */
+ rp_msg.device = 10 + chan_rank / 3 * 2 +
+ (chan_rank % 3 == 2 ? 1 : 0);
+ rp_msg.function = chan_rank % 3 == 1 ? 6 : 2;
+ rp_msg.reg = 0x120 + dimm_order * 4;
+ rp_msg.rx_len = 4;
+
+ ret = peci_command(priv->mgr->client->adapter,
+ PECI_CMD_RD_PCI_CFG_LOCAL, &rp_msg);
+ if (rp_msg.cc != PECI_DEV_CC_SUCCESS)
+ ret = -EAGAIN;
+ if (ret)
+ return ret;
+
+ priv->temp_max[dimm_no] = rp_msg.pci_config[1] * 1000;
+ priv->temp_crit[dimm_no] = rp_msg.pci_config[2] * 1000;
+ break;
+ case INTEL_FAM6_SKYLAKE_XD:
+ rp_msg.addr = priv->mgr->client->addr;
+ rp_msg.bus = 2;
+ /*
+ * Device 10, Function 2: IMC 0 channel 0 -> rank 0
+ * Device 10, Function 6: IMC 0 channel 1 -> rank 1
+ * Device 12, Function 2: IMC 1 channel 0 -> rank 2
+ * Device 12, Function 6: IMC 1 channel 1 -> rank 3
+ */
+ rp_msg.device = 10 + chan_rank / 2 * 2;
+ rp_msg.function = (chan_rank % 2) ? 6 : 2;
+ rp_msg.reg = 0x120 + dimm_order * 4;
+ rp_msg.rx_len = 4;
+
+ ret = peci_command(priv->mgr->client->adapter,
+ PECI_CMD_RD_PCI_CFG_LOCAL, &rp_msg);
+ if (rp_msg.cc != PECI_DEV_CC_SUCCESS)
+ ret = -EAGAIN;
+ if (ret)
+ return ret;
+
+ priv->temp_max[dimm_no] = rp_msg.pci_config[1] * 1000;
+ priv->temp_crit[dimm_no] = rp_msg.pci_config[2] * 1000;
+ break;
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_BROADWELL_X:
+ rp_msg.addr = priv->mgr->client->addr;
+ rp_msg.bus = 1;
+ /*
+ * Device 20, Function 0: IMC 0 channel 0 -> rank 0
+ * Device 20, Function 1: IMC 0 channel 1 -> rank 1
+ * Device 21, Function 0: IMC 0 channel 2 -> rank 2
+ * Device 21, Function 1: IMC 0 channel 3 -> rank 3
+ * Device 23, Function 0: IMC 1 channel 0 -> rank 4
+ * Device 23, Function 1: IMC 1 channel 1 -> rank 5
+ * Device 24, Function 0: IMC 1 channel 2 -> rank 6
+ * Device 24, Function 1: IMC 1 channel 3 -> rank 7
+ */
+ rp_msg.device = 20 + chan_rank / 2 + chan_rank / 4;
+ rp_msg.function = chan_rank % 2;
+ rp_msg.reg = 0x120 + dimm_order * 4;
+ rp_msg.rx_len = 4;
+
+ ret = peci_command(priv->mgr->client->adapter,
+ PECI_CMD_RD_PCI_CFG_LOCAL, &rp_msg);
+ if (rp_msg.cc != PECI_DEV_CC_SUCCESS)
+ ret = -EAGAIN;
+ if (ret)
+ return ret;
+
+ priv->temp_max[dimm_no] = rp_msg.pci_config[1] * 1000;
+ priv->temp_crit[dimm_no] = rp_msg.pci_config[2] * 1000;
+ break;
+ default:
+ return -EOPNOTSUPP;
It looks like the sensors are created even on unsupported platforms,
which would generate error messages whenever someone tries to read
the attributes.
There should be some code early on checking this, and the driver
should not even instantiate if the CPU model is not supported.
Actually, this 'default' case will not be happened because this driver
will be registered only when the CPU model is supported. The CPU model
checking code is in 'intel-peci-client.c' which is [11/14] of this
patch set.
That again assumes that both drivers will be modified in sync in the future.
We can not make that assumption.
As you said, both drivers must be modified in sync in the future because
each Intel CPU family uses different way of reading DIMM temperature.
In case if supported CPU checking code updated without making sync with
it, this driver will return the error.
... and in that situation the driver should not instantiate in the
first place. Its probe function should return -ENODEV.
Got the point. I'll add the checking code even in this driver module
too.
Thanks a lot!
-Jae