On Wed, 27 Apr 2022 at 14:34, Guenter Roeck <linux@xxxxxxxxxxxx> wrote: > > On Wed, Apr 27, 2022 at 09:04:43AM -0500, Eddie James wrote: > > Instead of registering the hwmon device at probe time, use the > > existing "occ_active" sysfs file to control when the driver polls > > the OCC for sensor data and registers with hwmon. The reason for > > this change is that the SBE, which is the device by which the > > driver communicates with the OCC, cannot handle communications > > during certain system state transitions, resulting in > > unrecoverable system errors. > > > > Signed-off-by: Eddie James <eajames@xxxxxxxxxxxxx> > > Applied to hwmon-next. Will this change break existing userspace? As I understand it existing systems rely on the driver probing for the occ without extra interaction. >From your commit message, it sounds like we should inhibit SBE communication during the sensitive period. This would stop any devices, not just the OCC, from generating unwanted traffic. > > Guenter > > > --- > > Changes since v1: > > - Update commit message to for clarity. > > > > drivers/hwmon/occ/common.c | 100 +++++++++++++++++++-------- > > drivers/hwmon/occ/common.h | 5 +- > > drivers/hwmon/occ/p8_i2c.c | 2 +- > > drivers/hwmon/occ/p9_sbe.c | 2 +- > > drivers/hwmon/occ/sysfs.c | 137 ++++++++++++++++++++++--------------- > > 5 files changed, 156 insertions(+), 90 deletions(-) > > > > diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c > > index f00cd59f1d19..d78f4bebc718 100644 > > --- a/drivers/hwmon/occ/common.c > > +++ b/drivers/hwmon/occ/common.c > > @@ -1149,44 +1149,75 @@ static void occ_parse_poll_response(struct occ *occ) > > sizeof(*header), size + sizeof(*header)); > > } > > > > -int occ_setup(struct occ *occ, const char *name) > > +int occ_active(struct occ *occ, bool active) > > { > > - int rc; > > - > > - mutex_init(&occ->lock); > > - occ->groups[0] = &occ->group; > > + int rc = mutex_lock_interruptible(&occ->lock); > > > > - /* no need to lock */ > > - rc = occ_poll(occ); > > - if (rc == -ESHUTDOWN) { > > - dev_info(occ->bus_dev, "host is not ready\n"); > > - return rc; > > - } else if (rc < 0) { > > - dev_err(occ->bus_dev, > > - "failed to get OCC poll response=%02x: %d\n", > > - occ->resp.return_status, rc); > > + if (rc) > > return rc; > > - } > > > > - occ->next_update = jiffies + OCC_UPDATE_FREQUENCY; > > - occ_parse_poll_response(occ); > > + if (active) { > > + if (occ->active) { > > + rc = -EALREADY; > > + goto unlock; > > + } > > > > - rc = occ_setup_sensor_attrs(occ); > > - if (rc) { > > - dev_err(occ->bus_dev, "failed to setup sensor attrs: %d\n", > > - rc); > > - return rc; > > - } > > + occ->error_count = 0; > > + occ->last_safe = 0; > > > > - occ->hwmon = devm_hwmon_device_register_with_groups(occ->bus_dev, name, > > - occ, occ->groups); > > - if (IS_ERR(occ->hwmon)) { > > - rc = PTR_ERR(occ->hwmon); > > - dev_err(occ->bus_dev, "failed to register hwmon device: %d\n", > > - rc); > > - return rc; > > + rc = occ_poll(occ); > > + if (rc < 0) { > > + dev_err(occ->bus_dev, > > + "failed to get OCC poll response=%02x: %d\n", > > + occ->resp.return_status, rc); > > + goto unlock; > > + } > > + > > + occ->active = true; > > + occ->next_update = jiffies + OCC_UPDATE_FREQUENCY; > > + occ_parse_poll_response(occ); > > + > > + rc = occ_setup_sensor_attrs(occ); > > + if (rc) { > > + dev_err(occ->bus_dev, > > + "failed to setup sensor attrs: %d\n", rc); > > + goto unlock; > > + } > > + > > + occ->hwmon = hwmon_device_register_with_groups(occ->bus_dev, > > + "occ", occ, > > + occ->groups); > > + if (IS_ERR(occ->hwmon)) { > > + rc = PTR_ERR(occ->hwmon); > > + occ->hwmon = NULL; > > + dev_err(occ->bus_dev, > > + "failed to register hwmon device: %d\n", rc); > > + goto unlock; > > + } > > + } else { > > + if (!occ->active) { > > + rc = -EALREADY; > > + goto unlock; > > + } > > + > > + if (occ->hwmon) > > + hwmon_device_unregister(occ->hwmon); > > + occ->active = false; > > + occ->hwmon = NULL; > > } > > > > +unlock: > > + mutex_unlock(&occ->lock); > > + return rc; > > +} > > + > > +int occ_setup(struct occ *occ) > > +{ > > + int rc; > > + > > + mutex_init(&occ->lock); > > + occ->groups[0] = &occ->group; > > + > > rc = occ_setup_sysfs(occ); > > if (rc) > > dev_err(occ->bus_dev, "failed to setup sysfs: %d\n", rc); > > @@ -1195,6 +1226,15 @@ int occ_setup(struct occ *occ, const char *name) > > } > > EXPORT_SYMBOL_GPL(occ_setup); > > > > +void occ_shutdown(struct occ *occ) > > +{ > > + occ_shutdown_sysfs(occ); > > + > > + if (occ->hwmon) > > + hwmon_device_unregister(occ->hwmon); > > +} > > +EXPORT_SYMBOL_GPL(occ_shutdown); > > + > > MODULE_AUTHOR("Eddie James <eajames@xxxxxxxxxxxxx>"); > > MODULE_DESCRIPTION("Common OCC hwmon code"); > > MODULE_LICENSE("GPL"); > > diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h > > index 2dd4a4d240c0..64d5ec7e169b 100644 > > --- a/drivers/hwmon/occ/common.h > > +++ b/drivers/hwmon/occ/common.h > > @@ -106,6 +106,7 @@ struct occ { > > struct attribute_group group; > > const struct attribute_group *groups[2]; > > > > + bool active; > > int error; /* final transfer error after retry */ > > int last_error; /* latest transfer error */ > > unsigned int error_count; /* number of xfr errors observed */ > > @@ -123,9 +124,11 @@ struct occ { > > u8 prev_mode; > > }; > > > > -int occ_setup(struct occ *occ, const char *name); > > +int occ_active(struct occ *occ, bool active); > > +int occ_setup(struct occ *occ); > > int occ_setup_sysfs(struct occ *occ); > > void occ_shutdown(struct occ *occ); > > +void occ_shutdown_sysfs(struct occ *occ); > > void occ_sysfs_poll_done(struct occ *occ); > > int occ_update_response(struct occ *occ); > > > > diff --git a/drivers/hwmon/occ/p8_i2c.c b/drivers/hwmon/occ/p8_i2c.c > > index 9e61e1fb5142..da39ea28df31 100644 > > --- a/drivers/hwmon/occ/p8_i2c.c > > +++ b/drivers/hwmon/occ/p8_i2c.c > > @@ -223,7 +223,7 @@ static int p8_i2c_occ_probe(struct i2c_client *client) > > occ->poll_cmd_data = 0x10; /* P8 OCC poll data */ > > occ->send_cmd = p8_i2c_occ_send_cmd; > > > > - return occ_setup(occ, "p8_occ"); > > + return occ_setup(occ); > > } > > > > static int p8_i2c_occ_remove(struct i2c_client *client) > > diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c > > index 49b13cc01073..42fc7b97bb34 100644 > > --- a/drivers/hwmon/occ/p9_sbe.c > > +++ b/drivers/hwmon/occ/p9_sbe.c > > @@ -145,7 +145,7 @@ static int p9_sbe_occ_probe(struct platform_device *pdev) > > occ->poll_cmd_data = 0x20; /* P9 OCC poll data */ > > occ->send_cmd = p9_sbe_occ_send_cmd; > > > > - rc = occ_setup(occ, "p9_occ"); > > + rc = occ_setup(occ); > > if (rc == -ESHUTDOWN) > > rc = -ENODEV; /* Host is shutdown, don't spew errors */ > > > > diff --git a/drivers/hwmon/occ/sysfs.c b/drivers/hwmon/occ/sysfs.c > > index b2f788a77746..2317301fc1e9 100644 > > --- a/drivers/hwmon/occ/sysfs.c > > +++ b/drivers/hwmon/occ/sysfs.c > > @@ -6,13 +6,13 @@ > > #include <linux/export.h> > > #include <linux/hwmon-sysfs.h> > > #include <linux/kernel.h> > > +#include <linux/kstrtox.h> > > #include <linux/sysfs.h> > > > > #include "common.h" > > > > /* OCC status register */ > > #define OCC_STAT_MASTER BIT(7) > > -#define OCC_STAT_ACTIVE BIT(0) > > > > /* OCC extended status register */ > > #define OCC_EXT_STAT_DVFS_OT BIT(7) > > @@ -22,6 +22,25 @@ > > #define OCC_EXT_STAT_DVFS_VDD BIT(3) > > #define OCC_EXT_STAT_GPU_THROTTLE GENMASK(2, 0) > > > > +static ssize_t occ_active_store(struct device *dev, > > + struct device_attribute *attr, > > + const char *buf, size_t count) > > +{ > > + int rc; > > + bool active; > > + struct occ *occ = dev_get_drvdata(dev); > > + > > + rc = kstrtobool(buf, &active); > > + if (rc) > > + return rc; > > + > > + rc = occ_active(occ, active); > > + if (rc) > > + return rc; > > + > > + return count; > > +} > > + > > static ssize_t occ_sysfs_show(struct device *dev, > > struct device_attribute *attr, char *buf) > > { > > @@ -31,54 +50,64 @@ static ssize_t occ_sysfs_show(struct device *dev, > > struct occ_poll_response_header *header; > > struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); > > > > - rc = occ_update_response(occ); > > - if (rc) > > - return rc; > > + if (occ->active) { > > + rc = occ_update_response(occ); > > + if (rc) > > + return rc; > > > > - header = (struct occ_poll_response_header *)occ->resp.data; > > - > > - switch (sattr->index) { > > - case 0: > > - val = !!(header->status & OCC_STAT_MASTER); > > - break; > > - case 1: > > - val = !!(header->status & OCC_STAT_ACTIVE); > > - break; > > - case 2: > > - val = !!(header->ext_status & OCC_EXT_STAT_DVFS_OT); > > - break; > > - case 3: > > - val = !!(header->ext_status & OCC_EXT_STAT_DVFS_POWER); > > - break; > > - case 4: > > - val = !!(header->ext_status & OCC_EXT_STAT_MEM_THROTTLE); > > - break; > > - case 5: > > - val = !!(header->ext_status & OCC_EXT_STAT_QUICK_DROP); > > - break; > > - case 6: > > - val = header->occ_state; > > - break; > > - case 7: > > - if (header->status & OCC_STAT_MASTER) > > - val = hweight8(header->occs_present); > > - else > > + header = (struct occ_poll_response_header *)occ->resp.data; > > + > > + switch (sattr->index) { > > + case 0: > > + val = !!(header->status & OCC_STAT_MASTER); > > + break; > > + case 1: > > val = 1; > > - break; > > - case 8: > > - val = header->ips_status; > > - break; > > - case 9: > > - val = header->mode; > > - break; > > - case 10: > > - val = !!(header->ext_status & OCC_EXT_STAT_DVFS_VDD); > > - break; > > - case 11: > > - val = header->ext_status & OCC_EXT_STAT_GPU_THROTTLE; > > - break; > > - default: > > - return -EINVAL; > > + break; > > + case 2: > > + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_OT); > > + break; > > + case 3: > > + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_POWER); > > + break; > > + case 4: > > + val = !!(header->ext_status & > > + OCC_EXT_STAT_MEM_THROTTLE); > > + break; > > + case 5: > > + val = !!(header->ext_status & OCC_EXT_STAT_QUICK_DROP); > > + break; > > + case 6: > > + val = header->occ_state; > > + break; > > + case 7: > > + if (header->status & OCC_STAT_MASTER) > > + val = hweight8(header->occs_present); > > + else > > + val = 1; > > + break; > > + case 8: > > + val = header->ips_status; > > + break; > > + case 9: > > + val = header->mode; > > + break; > > + case 10: > > + val = !!(header->ext_status & OCC_EXT_STAT_DVFS_VDD); > > + break; > > + case 11: > > + val = header->ext_status & OCC_EXT_STAT_GPU_THROTTLE; > > + break; > > + default: > > + return -EINVAL; > > + } > > + } else { > > + if (sattr->index == 1) > > + val = 0; > > + else if (sattr->index <= 11) > > + val = -ENODATA; > > + else > > + return -EINVAL; > > } > > > > return sysfs_emit(buf, "%d\n", val); > > @@ -95,7 +124,8 @@ static ssize_t occ_error_show(struct device *dev, > > } > > > > static SENSOR_DEVICE_ATTR(occ_master, 0444, occ_sysfs_show, NULL, 0); > > -static SENSOR_DEVICE_ATTR(occ_active, 0444, occ_sysfs_show, NULL, 1); > > +static SENSOR_DEVICE_ATTR(occ_active, 0644, occ_sysfs_show, occ_active_store, > > + 1); > > static SENSOR_DEVICE_ATTR(occ_dvfs_overtemp, 0444, occ_sysfs_show, NULL, 2); > > static SENSOR_DEVICE_ATTR(occ_dvfs_power, 0444, occ_sysfs_show, NULL, 3); > > static SENSOR_DEVICE_ATTR(occ_mem_throttle, 0444, occ_sysfs_show, NULL, 4); > > @@ -139,7 +169,7 @@ void occ_sysfs_poll_done(struct occ *occ) > > * On the first poll response, we haven't yet created the sysfs > > * attributes, so don't make any notify calls. > > */ > > - if (!occ->hwmon) > > + if (!occ->active) > > goto done; > > > > if ((header->status & OCC_STAT_MASTER) != > > @@ -148,12 +178,6 @@ void occ_sysfs_poll_done(struct occ *occ) > > sysfs_notify(&occ->bus_dev->kobj, NULL, name); > > } > > > > - if ((header->status & OCC_STAT_ACTIVE) != > > - (occ->prev_stat & OCC_STAT_ACTIVE)) { > > - name = sensor_dev_attr_occ_active.dev_attr.attr.name; > > - sysfs_notify(&occ->bus_dev->kobj, NULL, name); > > - } > > - > > if ((header->ext_status & OCC_EXT_STAT_DVFS_OT) != > > (occ->prev_ext_stat & OCC_EXT_STAT_DVFS_OT)) { > > name = sensor_dev_attr_occ_dvfs_overtemp.dev_attr.attr.name; > > @@ -227,8 +251,7 @@ int occ_setup_sysfs(struct occ *occ) > > return sysfs_create_group(&occ->bus_dev->kobj, &occ_sysfs); > > } > > > > -void occ_shutdown(struct occ *occ) > > +void occ_shutdown_sysfs(struct occ *occ) > > { > > sysfs_remove_group(&occ->bus_dev->kobj, &occ_sysfs); > > } > > -EXPORT_SYMBOL_GPL(occ_shutdown);