On Sun, Mar 24, 2019 at 10:24 PM Wu Hao <hao.wu@xxxxxxxxx> wrote: Hi Hao, > > This patch adds support for global error reporting for FPGA > Management Engine (FME), it introduces sysfs interfaces to > report different error detected by the hardware, and allow > user to clear errors or inject error for testing purpose. > > Signed-off-by: Luwei Kang <luwei.kang@xxxxxxxxx> > Signed-off-by: Ananda Ravuri <ananda.ravuri@xxxxxxxxx> > Signed-off-by: Xu Yilun <yilun.xu@xxxxxxxxx> > Signed-off-by: Wu Hao <hao.wu@xxxxxxxxx> > --- > Documentation/ABI/testing/sysfs-platform-dfl-fme | 58 ++++ > drivers/fpga/Makefile | 2 +- > drivers/fpga/dfl-fme-error.c | 390 +++++++++++++++++++++++ > drivers/fpga/dfl-fme-main.c | 4 + > drivers/fpga/dfl-fme.h | 2 + > drivers/fpga/dfl.h | 2 + > 6 files changed, 457 insertions(+), 1 deletion(-) > create mode 100644 drivers/fpga/dfl-fme-error.c > > diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-fme b/Documentation/ABI/testing/sysfs-platform-dfl-fme > index 4b6448f..38f9cdd 100644 > --- a/Documentation/ABI/testing/sysfs-platform-dfl-fme > +++ b/Documentation/ABI/testing/sysfs-platform-dfl-fme > @@ -156,3 +156,61 @@ KernelVersion: 5.2 > Contact: Wu Hao <hao.wu@xxxxxxxxx> > Description: Read-only. Read this file to get power limit for fpga, it > is only valid for integrated solution. > + > +What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/errors > +Date: March 2019 > +KernelVersion: 5.2 > +Contact: Wu Hao <hao.wu@xxxxxxxxx> > +Description: Read-only. Read this file to get errors detected by hardware. > + > +What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/first_error > +Date: March 2019 > +KernelVersion: 5.2 > +Contact: Wu Hao <hao.wu@xxxxxxxxx> > +Description: Read-only. Read this file to get the first error detected by > + hardware. > + > +What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/next_error > +Date: March 2019 > +KernelVersion: 5.2 > +Contact: Wu Hao <hao.wu@xxxxxxxxx> > +Description: Read-only. Read this file to get the second error detected by > + hardware. > + > +What: /sys/bus/platform/devices/dfl-fme.0/errors/fme-errors/clear > +Date: March 2019 > +KernelVersion: 5.2 > +Contact: Wu Hao <hao.wu@xxxxxxxxx> > +Description: Write-only. Write error code to this file to clear errors. If > + the input error code doesn't match, it returns -EBUSY. As with the afu errors patch, seems like -EINVAL would be better. > + > +What: /sys/bus/platform/devices/dfl-fme.0/errors/pcie0_errors > +Date: March 2019 > +KernelVersion: 5.2 > +Contact: Wu Hao <hao.wu@xxxxxxxxx> > +Description: Read-only. It returns errors detected on pcie0 link. > + > +What: /sys/bus/platform/devices/dfl-fme.0/errors/pcie1_errors > +Date: March 2019 > +KernelVersion: 5.2 > +Contact: Wu Hao <hao.wu@xxxxxxxxx> > +Description: Read-only. It returns errors detected on pcie1 link. > + > +What: /sys/bus/platform/devices/dfl-fme.0/errors/nonfatal_errors > +Date: March 2019 > +KernelVersion: 5.2 > +Contact: Wu Hao <hao.wu@xxxxxxxxx> > +Description: Read-only. It returns non-fatal errors detected. > + > +What: /sys/bus/platform/devices/dfl-fme.0/errors/catfatal_errors > +Date: March 2019 > +KernelVersion: 5.2 > +Contact: Wu Hao <hao.wu@xxxxxxxxx> > +Description: Read-only. It returns catastrophic and fatal errors detected. > + > +What: /sys/bus/platform/devices/dfl-fme.0/errors/inject_error > +Date: March 2019 > +KernelVersion: 5.2 > +Contact: Wu Hao <hao.wu@xxxxxxxxx> > +Description: Read-Write. Write this file to inject errors for testing > + purpose. Read this file to check errors injected. > diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile > index f1f0af7..1a9fa3d 100644 > --- a/drivers/fpga/Makefile > +++ b/drivers/fpga/Makefile > @@ -38,7 +38,7 @@ obj-$(CONFIG_FPGA_DFL_FME_BRIDGE) += dfl-fme-br.o > obj-$(CONFIG_FPGA_DFL_FME_REGION) += dfl-fme-region.o > obj-$(CONFIG_FPGA_DFL_AFU) += dfl-afu.o > > -dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o > +dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o dfl-fme-error.o > dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o > dfl-afu-objs += dfl-afu-error.o > > diff --git a/drivers/fpga/dfl-fme-error.c b/drivers/fpga/dfl-fme-error.c > new file mode 100644 > index 0000000..f2bd5f8 > --- /dev/null > +++ b/drivers/fpga/dfl-fme-error.c > @@ -0,0 +1,390 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Driver for FPGA Management Engine Error Management > + * > + * Copyright 2019 Intel Corporation, Inc. > + * > + * Authors: > + * Kang Luwei <luwei.kang@xxxxxxxxx> > + * Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx> > + * Wu Hao <hao.wu@xxxxxxxxx> > + * Joseph Grecco <joe.grecco@xxxxxxxxx> > + * Enno Luebbers <enno.luebbers@xxxxxxxxx> > + * Tim Whisonant <tim.whisonant@xxxxxxxxx> > + * Ananda Ravuri <ananda.ravuri@xxxxxxxxx> > + * Mitchel, Henry <henry.mitchel@xxxxxxxxx> > + */ > + > +#include <linux/uaccess.h> > + > +#include "dfl.h" > +#include "dfl-fme.h" > + > +#define FME_ERROR_MASK 0x8 > +#define FME_ERROR 0x10 > +#define MBP_ERROR BIT_ULL(6) > +#define PCIE0_ERROR_MASK 0x18 > +#define PCIE0_ERROR 0x20 > +#define PCIE1_ERROR_MASK 0x28 > +#define PCIE1_ERROR 0x30 > +#define FME_FIRST_ERROR 0x38 > +#define FME_NEXT_ERROR 0x40 > +#define RAS_NONFAT_ERROR_MASK 0x48 > +#define RAS_NONFAT_ERROR 0x50 > +#define RAS_CATFAT_ERROR_MASK 0x58 > +#define RAS_CATFAT_ERROR 0x60 > +#define RAS_ERROR_INJECT 0x68 > +#define INJECT_ERROR_MASK GENMASK_ULL(2, 0) > + > +static ssize_t errors_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct device *err_dev = dev->parent; > + void __iomem *base; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", > + (unsigned long long)readq(base + FME_ERROR)); > +} > +static DEVICE_ATTR_RO(errors); > + > +static ssize_t first_error_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct device *err_dev = dev->parent; > + void __iomem *base; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", > + (unsigned long long)readq(base + FME_FIRST_ERROR)); > +} > +static DEVICE_ATTR_RO(first_error); > + > +static ssize_t next_error_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct device *err_dev = dev->parent; > + void __iomem *base; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", > + (unsigned long long)readq(base + FME_NEXT_ERROR)); > +} > +static DEVICE_ATTR_RO(next_error); > + > +static ssize_t clear_store(struct device *dev, struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent); > + struct device *err_dev = dev->parent; > + struct dfl_feature *feature; > + void __iomem *base; > + u64 v, val; > + int ret; > + > + ret = kstrtou64(buf, 0, &val); > + if (ret) > + return ret; from kstrtox.c: * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. Your sysfs doc could be updated to explain these return codes. > + > + feature = dfl_get_feature_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + base = feature->ioaddr; > + > + mutex_lock(&pdata->lock); > + writeq(GENMASK_ULL(63, 0), base + FME_ERROR_MASK); > + > + v = readq(base + FME_ERROR); > + if (val != v) { > + ret = -EINVAL; Oh wait, that's what I thought it should be ;) so the doc just needs to change. > + goto done; It would be easy to avoid using 'goto' here. > + } > + > + writeq(v, base + FME_ERROR); > + v = readq(base + FME_FIRST_ERROR); > + writeq(v, base + FME_FIRST_ERROR); > + v = readq(base + FME_NEXT_ERROR); > + writeq(v, base + FME_NEXT_ERROR); > + > +done: > + /* Workaround: disable MBP_ERROR if feature revision is 0 */ > + writeq(dfl_feature_revision(feature->ioaddr) ? 0ULL : MBP_ERROR, > + base + FME_ERROR_MASK); > + mutex_unlock(&pdata->lock); > + return ret ? ret : count; > +} > +static DEVICE_ATTR_WO(clear); > + > +static ssize_t revision_show(struct device *dev, struct device_attribute *attr, > + char *buf) > +{ > + struct device *err_dev = dev->parent; > + void __iomem *base; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + return scnprintf(buf, PAGE_SIZE, "%u\n", dfl_feature_revision(base)); > +} > +static DEVICE_ATTR_RO(revision); The revision attr to be documented in the sysfs doc. > + > +static ssize_t pcie0_errors_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct device *err_dev = dev->parent; > + void __iomem *base; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", > + (unsigned long long)readq(base + PCIE0_ERROR)); > +} > + > +static ssize_t pcie0_errors_store(struct device *dev, The sysfs doc shows this as read-only. > + struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent); > + struct device *err_dev = dev->parent; > + void __iomem *base; > + u64 v, val; > + int ret; > + > + ret = kstrtou64(buf, 0, &val); > + if (ret) > + return ret; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + mutex_lock(&pdata->lock); > + writeq(GENMASK_ULL(63, 0), base + PCIE0_ERROR_MASK); > + > + v = readq(base + PCIE0_ERROR); > + if (val != v) > + ret = -EBUSY; > + else > + writeq(v, base + PCIE0_ERROR); > + > + writeq(0ULL, base + PCIE0_ERROR_MASK); > + mutex_unlock(&pdata->lock); > + return ret ? ret : count; > +} > +static DEVICE_ATTR_RW(pcie0_errors); > + > +static ssize_t pcie1_errors_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct device *err_dev = dev->parent; > + void __iomem *base; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", > + (unsigned long long)readq(base + PCIE1_ERROR)); > +} > + > +static ssize_t pcie1_errors_store(struct device *dev, > + struct device_attribute *attr, > + const char *buf, size_t count) Same here, documentation needs to show as R/W and explain what happens when written. > +{ > + struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent); > + struct device *err_dev = dev->parent; > + void __iomem *base; > + u64 v, val; > + int ret; > + > + ret = kstrtou64(buf, 0, &val); > + if (ret) > + return ret; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + mutex_lock(&pdata->lock); > + writeq(GENMASK_ULL(63, 0), base + PCIE1_ERROR_MASK); > + > + v = readq(base + PCIE1_ERROR); > + if (val != v) > + ret = -EBUSY; > + else > + writeq(v, base + PCIE1_ERROR); > + > + writeq(0ULL, base + PCIE1_ERROR_MASK); > + mutex_unlock(&pdata->lock); > + return ret ? ret : count; > +} > +static DEVICE_ATTR_RW(pcie1_errors); > + > +static ssize_t nonfatal_errors_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct device *err_dev = dev->parent; > + void __iomem *base; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", > + (unsigned long long)readq(base + RAS_NONFAT_ERROR)); > +} > +static DEVICE_ATTR_RO(nonfatal_errors); > + > +static ssize_t catfatal_errors_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct device *err_dev = dev->parent; > + void __iomem *base; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", > + (unsigned long long)readq(base + RAS_CATFAT_ERROR)); > +} > +static DEVICE_ATTR_RO(catfatal_errors); > + > +static ssize_t inject_error_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct device *err_dev = dev->parent; > + void __iomem *base; > + u64 v; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + v = readq(base + RAS_ERROR_INJECT); > + > + return scnprintf(buf, PAGE_SIZE, "0x%llx\n", > + (unsigned long long)FIELD_GET(INJECT_ERROR_MASK, v)); > +} > + > +static ssize_t inject_error_store(struct device *dev, > + struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent); > + struct device *err_dev = dev->parent; > + void __iomem *base; > + u8 inject_error; > + int ret; > + u64 v; > + > + ret = kstrtou8(buf, 0, &inject_error); > + if (ret) > + return ret; > + > + if (inject_error & ~INJECT_ERROR_MASK) > + return -EINVAL; > + > + base = dfl_get_feature_ioaddr_by_id(err_dev, FME_FEATURE_ID_GLOBAL_ERR); > + > + mutex_lock(&pdata->lock); > + v = readq(base + RAS_ERROR_INJECT); > + v &= ~INJECT_ERROR_MASK; > + v |= FIELD_PREP(INJECT_ERROR_MASK, inject_error); > + writeq(v, base + RAS_ERROR_INJECT); > + mutex_unlock(&pdata->lock); > + > + return count; > +} > +static DEVICE_ATTR_RW(inject_error); > + > +static struct attribute *fme_errors_attrs[] = { > + &dev_attr_errors.attr, > + &dev_attr_first_error.attr, > + &dev_attr_next_error.attr, > + &dev_attr_clear.attr, > + NULL, > +}; > + > +static struct attribute_group fme_errors_attr_group = { > + .attrs = fme_errors_attrs, > + .name = "fme-errors", > +}; > + > +static struct attribute *errors_attrs[] = { > + &dev_attr_revision.attr, > + &dev_attr_pcie0_errors.attr, > + &dev_attr_pcie1_errors.attr, > + &dev_attr_nonfatal_errors.attr, > + &dev_attr_catfatal_errors.attr, > + &dev_attr_inject_error.attr, > + NULL, > +}; > + > +static struct attribute_group errors_attr_group = { > + .attrs = errors_attrs, > +}; > + > +static const struct attribute_group *error_groups[] = { > + &fme_errors_attr_group, > + &errors_attr_group, > + NULL > +}; > + > +static void fme_error_enable(struct dfl_feature *feature) > +{ > + void __iomem *base = feature->ioaddr; > + > + /* Workaround: disable MBP_ERROR if revision is 0 */ > + writeq(dfl_feature_revision(feature->ioaddr) ? 0ULL : MBP_ERROR, > + base + FME_ERROR_MASK); > + writeq(0ULL, base + PCIE0_ERROR_MASK); > + writeq(0ULL, base + PCIE1_ERROR_MASK); > + writeq(0ULL, base + RAS_NONFAT_ERROR_MASK); > + writeq(0ULL, base + RAS_CATFAT_ERROR_MASK); > +} > + > +static void err_dev_release(struct device *dev) > +{ > + kfree(dev); > +} > + > +static int fme_global_err_init(struct platform_device *pdev, > + struct dfl_feature *feature) > +{ > + struct device *dev; > + int ret = 0; > + > + dev = kzalloc(sizeof(*dev), GFP_KERNEL); > + if (!dev) > + return -ENOMEM; > + > + dev->parent = &pdev->dev; > + dev->release = err_dev_release; > + dev_set_name(dev, "errors"); > + > + fme_error_enable(feature); > + > + ret = device_register(dev); > + if (ret) { > + put_device(dev); > + return ret; > + } > + > + ret = sysfs_create_groups(&dev->kobj, error_groups); > + if (ret) { > + device_unregister(dev); > + return ret; > + } > + > + feature->priv = dev; > + > + return ret; > +} > + > +static void fme_global_err_uinit(struct platform_device *pdev, > + struct dfl_feature *feature) > +{ > + struct device *dev = feature->priv; > + > + sysfs_remove_groups(&dev->kobj, error_groups); > + device_unregister(dev); > +} > + > +const struct dfl_feature_id fme_global_err_id_table[] = { > + {.id = FME_FEATURE_ID_GLOBAL_ERR,}, > + {0,} > +}; > + > +const struct dfl_feature_ops fme_global_err_ops = { > + .init = fme_global_err_init, > + .uinit = fme_global_err_uinit, > +}; > diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c > index dafa6580..76cb112 100644 > --- a/drivers/fpga/dfl-fme-main.c > +++ b/drivers/fpga/dfl-fme-main.c > @@ -686,6 +686,10 @@ static struct dfl_feature_driver fme_feature_drvs[] = { > .ops = &fme_power_mgmt_ops, > }, > { > + .id_table = fme_global_err_id_table, > + .ops = &fme_global_err_ops, > + }, > + { > .ops = NULL, > }, > }; > diff --git a/drivers/fpga/dfl-fme.h b/drivers/fpga/dfl-fme.h > index 7a021c4..5fbe3f5 100644 > --- a/drivers/fpga/dfl-fme.h > +++ b/drivers/fpga/dfl-fme.h > @@ -37,5 +37,7 @@ struct dfl_fme { > > extern const struct dfl_feature_ops fme_pr_mgmt_ops; > extern const struct dfl_feature_id fme_pr_mgmt_id_table[]; > +extern const struct dfl_feature_ops fme_global_err_ops; > +extern const struct dfl_feature_id fme_global_err_id_table[]; > > #endif /* __DFL_FME_H */ > diff --git a/drivers/fpga/dfl.h b/drivers/fpga/dfl.h > index fbc57f0..6c32080 100644 > --- a/drivers/fpga/dfl.h > +++ b/drivers/fpga/dfl.h > @@ -197,12 +197,14 @@ struct dfl_feature_driver { > * feature dev (platform device)'s reources. > * @ioaddr: mapped mmio resource address. > * @ops: ops of this sub feature. > + * @priv: priv data of this feature. > */ > struct dfl_feature { > u64 id; > int resource_index; > void __iomem *ioaddr; > const struct dfl_feature_ops *ops; > + void *priv; > }; > > #define DEV_STATUS_IN_USE 0 > -- > 2.7.4 > Thanks, Alan