On Tue, 14 Jun 2022 20:29:38 +0800 Kai Ye via Linux-accelerators <linux-accelerators@xxxxxxxxxxxxxxxx> wrote: > UACCE add the hardware error isolation API. Users can configure > the error frequency threshold by this vfs node. This API interface > certainly supports the configuration of user protocol strategy. Then > parse it inside the device driver. UACCE only reports the device > isolate state. When the error frequency is exceeded, the device > will be isolated. The isolation strategy should be defined in each > driver module. > > Signed-off-by: Kai Ye <yekai13@xxxxxxxxxx> > Reviewed-by: Zhou Wang <wangzhou1@xxxxxxxxxxxxx> > --- > drivers/misc/uacce/uacce.c | 37 +++++++++++++++++++++++++++++++++++++ > include/linux/uacce.h | 16 +++++++++++++--- > 2 files changed, 50 insertions(+), 3 deletions(-) > > diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c > index b6219c6bfb48..525623215132 100644 > --- a/drivers/misc/uacce/uacce.c > +++ b/drivers/misc/uacce/uacce.c > @@ -346,12 +346,47 @@ static ssize_t region_dus_size_show(struct device *dev, > uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT); > } > > +static ssize_t isolate_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct uacce_device *uacce = to_uacce_device(dev); > + > + return sysfs_emit(buf, "%d\n", uacce->ops->get_isolate_state(uacce)); > +} > + > +static ssize_t isolate_strategy_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct uacce_device *uacce = to_uacce_device(dev); > + > + return sysfs_emit(buf, "%s\n", uacce->isolate_strategy); > +} > + > +static ssize_t isolate_strategy_store(struct device *dev, > + struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + struct uacce_device *uacce = to_uacce_device(dev); > + int ret; > + > + if (!buf || sizeof(buf) > UACCE_MAX_ISOLATE_STRATEGY_LEN) > + return -EINVAL; > + > + memcpy(uacce->isolate_strategy, buf, strlen(buf)); What if it's not a valid strategy for the driver? We shouldn't store this until we know it's valid. > + > + ret = uacce->ops->isolate_strategy_write(uacce, buf); Having copied the buf into uacce, why pass it as well? My preference would be to pass buf and length and not do the memcpy in here. Leave that choice to the driver. If this were a single value, it would be better stored as an integer than as a string. Obviously that means you need an isolate_strategy_read() as well (that also solves the comment above about not storing what was written until we know it was valid. Thanks, Jonathan > + > + return ret ? ret : count; > +} > + > static DEVICE_ATTR_RO(api); > static DEVICE_ATTR_RO(flags); > static DEVICE_ATTR_RO(available_instances); > static DEVICE_ATTR_RO(algorithms); > static DEVICE_ATTR_RO(region_mmio_size); > static DEVICE_ATTR_RO(region_dus_size); > +static DEVICE_ATTR_RO(isolate); > +static DEVICE_ATTR_RW(isolate_strategy); > > static struct attribute *uacce_dev_attrs[] = { > &dev_attr_api.attr, > @@ -360,6 +395,8 @@ static struct attribute *uacce_dev_attrs[] = { > &dev_attr_algorithms.attr, > &dev_attr_region_mmio_size.attr, > &dev_attr_region_dus_size.attr, > + &dev_attr_isolate.attr, > + &dev_attr_isolate_strategy.attr, > NULL, > }; > > diff --git a/include/linux/uacce.h b/include/linux/uacce.h > index 48e319f40275..0f7668bfa645 100644 > --- a/include/linux/uacce.h > +++ b/include/linux/uacce.h > @@ -8,6 +8,7 @@ > #define UACCE_NAME "uacce" > #define UACCE_MAX_REGION 2 > #define UACCE_MAX_NAME_SIZE 64 > +#define UACCE_MAX_ISOLATE_STRATEGY_LEN 256 > > struct uacce_queue; > struct uacce_device; > @@ -30,6 +31,8 @@ struct uacce_qfile_region { > * @is_q_updated: check whether the task is finished > * @mmap: mmap addresses of queue to user space > * @ioctl: ioctl for user space users of the queue > + * @get_isolate_state: get the device state after set the isolate strategy > + * @isolate_strategy_store: stored the isolate strategy to the device > */ > struct uacce_ops { > int (*get_available_instances)(struct uacce_device *uacce); > @@ -43,6 +46,8 @@ struct uacce_ops { > struct uacce_qfile_region *qfr); > long (*ioctl)(struct uacce_queue *q, unsigned int cmd, > unsigned long arg); > + enum uacce_dev_state (*get_isolate_state)(struct uacce_device *uacce); > + int (*isolate_strategy_write)(struct uacce_device *uacce, const char *buf); > }; > > /** > @@ -57,6 +62,12 @@ struct uacce_interface { > const struct uacce_ops *ops; > }; > > +enum uacce_dev_state { > + UACCE_DEV_ERR = -1, > + UACCE_DEV_NORMAL, > + UACCE_DEV_ISOLATE, > +}; > + > enum uacce_q_state { > UACCE_Q_ZOMBIE = 0, > UACCE_Q_INIT, > @@ -117,6 +128,7 @@ struct uacce_device { > struct list_head queues; > struct mutex queues_lock; > struct inode *inode; > + char isolate_strategy[UACCE_MAX_ISOLATE_STRATEGY_LEN]; > }; > > #if IS_ENABLED(CONFIG_UACCE) > @@ -125,7 +137,7 @@ struct uacce_device *uacce_alloc(struct device *parent, > struct uacce_interface *interface); > int uacce_register(struct uacce_device *uacce); > void uacce_remove(struct uacce_device *uacce); > - > +struct uacce_device *dev_to_uacce(struct device *dev); > #else /* CONFIG_UACCE */ > > static inline > @@ -140,8 +152,6 @@ static inline int uacce_register(struct uacce_device *uacce) > return -EINVAL; > } > > -static inline void uacce_remove(struct uacce_device *uacce) {} > - > #endif /* CONFIG_UACCE */ > > #endif /* _LINUX_UACCE_H */