On Fri, Nov 22, 2024 at 03:36:25PM +0530, Shubhrajyoti Datta wrote: > The Versal NET edac listens to the notifications from NMC(Network > management controller) on rpmsg. The driver registers on the error_edac > channel. Send a RAS event trace upon a notification. On reading > the notification the user space application can decide on the response. > A sysfs reset entry is created for the same that sends an acknowledgment > back to the NMC. For reporting events register to the RAS framework. For > memory mc events are used other events use non-standard events. > > Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xxxxxxx> > --- > > .../ABI/testing/sysfs-driver-versalnet-edac | 11 + > drivers/edac/Kconfig | 9 + > drivers/edac/Makefile | 1 + > drivers/edac/versalnet_rpmsg_edac.c | 1321 +++++++++++++++++ > 4 files changed, 1342 insertions(+) > create mode 100644 Documentation/ABI/testing/sysfs-driver-versalnet-edac > create mode 100644 drivers/edac/versalnet_rpmsg_edac.c > > diff --git a/Documentation/ABI/testing/sysfs-driver-versalnet-edac b/Documentation/ABI/testing/sysfs-driver-versalnet-edac > new file mode 100644 > index 000000000000..598a6c6cef39 > --- /dev/null > +++ b/Documentation/ABI/testing/sysfs-driver-versalnet-edac > @@ -0,0 +1,11 @@ > +What: /sys/devices/system/edac/.../reset > +Date: Nov 2024 > +Contact: shubhrajyoti.datta@xxxxxxx > +Description: > + Writing 1/2 to this file will send a reset request to the NMC (Network > + Management Controller). 1 will request a SRST (Soft reset) and 2 will > + request a POR ( Power-On Reset). > + > + For example:: > + > + # echo 1 > /sys/devices/system/edac/.../reset ABI docs are always separate patches. Anyway, sysfs entry to reset memory is a NAK. NAK. > diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig > index 81af6c344d6b..c3e0644aed0d 100644 > --- a/drivers/edac/Kconfig > +++ b/drivers/edac/Kconfig > @@ -564,5 +564,14 @@ config EDAC_VERSAL > Support injecting both correctable and uncorrectable errors > for debugging purposes. > > +config EDAC_VERSALNET > + tristate "AMD Versal NET EDAC" No dependency on ARCH? Is this for OF platforms? > + depends on CDX_CONTROLLER > + help > + Support for error detection and correction on the AMD Versal NET DDR > + memory controller. > + > + The memory controller supports single bit error correction, double bit > + error detection. Report various errors to the userspace. > > endif # EDAC > diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile > index faf310eec4a6..7b1a8525c021 100644 > --- a/drivers/edac/Makefile > +++ b/drivers/edac/Makefile > @@ -88,3 +88,4 @@ obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o > obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o > obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o > obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o > +obj-$(CONFIG_EDAC_VERSALNET) += versalnet_rpmsg_edac.o ... > +}; > + > +/* The driver should have only one instance */ > +static int probe_once; Oh, no, this is just poor coding and design style. Such stuff is a big warning sign. How, really, how, OF platform could have two instances? ... > +static int mc_probe(struct platform_device *pdev) > +{ > + struct edac_mc_layer layers[2]; > + u32 num_chans, rank, dwidth; > + struct mem_ctl_info *mci; > + struct edac_priv *priv; > + int rc; > + > + rc = device_property_read_u32(&pdev->dev, "amd,rank", &rank); > + if (rc < 0) { > + dev_err(&pdev->dev, "unable to read rank property"); > + return rc; > + } > + > + rc = device_property_read_u32(&pdev->dev, "amd,num-chans", &num_chans); > + if (rc < 0) { > + dev_err(&pdev->dev, "unable to read num-chans property"); > + return rc; > + } > + > + rc = device_property_read_u32(&pdev->dev, "amd,dwidth", &dwidth); > + if (rc < 0) { > + dev_err(&pdev->dev, "unable to read dwidth property"); > + return rc; > + } > + > + mutex_lock(&vnet_edac_lock); > + if (probe_once) { Not possible. Drop. Fix your DTS (and the binding) instead. > + rc = -ENODEV; > + goto free_lock; > + } > + > + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; > + layers[0].size = rank; > + layers[0].is_virt_csrow = true; > + layers[1].type = EDAC_MC_LAYER_CHANNEL; > + layers[1].size = num_chans; > + layers[1].is_virt_csrow = false; > + > + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, > + sizeof(struct edac_priv)); > + if (!mci) { > + edac_printk(KERN_ERR, EDAC_MC, > + "Failed memory allocation for mc instance\n"); > + rc = -ENOMEM; > + goto free_lock; > + } > + > + priv = mci->pvt_info; > + priv->dwidth = dwidth; > + > + mc_init(mci, pdev); > + > + rc = edac_mc_add_mc(mci); > + if (rc) { > + edac_printk(KERN_ERR, EDAC_MC, > + "Failed to register with EDAC core\n"); > + goto free_edac_mc; > + } > + > + amd_rpmsg_id_table[0].driver_data = (kernel_ulong_t)mci; > + INIT_WORK(&priv->work, amd_rpmsg_post_probe_work); > + rc = register_rpmsg_driver(&amd_rpmsg_driver); > + if (rc) { > + edac_printk(KERN_ERR, EDAC_MC, > + "Failed to register RPMsg driver: %d\n", rc); > + goto del_edac_mc; > + } > + > + rc = device_create_file(&mci->dev, &dev_attr_reset); > + if (rc < 0) > + goto unregister; > + > + probe_once = 1; > + mutex_unlock(&vnet_edac_lock); > + > + return 0; > + > +unregister: > + unregister_rpmsg_driver(&amd_rpmsg_driver); > +del_edac_mc: > + edac_mc_del_mc(&pdev->dev); > +free_edac_mc: > + edac_mc_free(mci); > +free_lock: > + mutex_unlock(&vnet_edac_lock); > + > + return rc; > +} > + > +static void mc_remove(struct platform_device *pdev) > +{ > + struct mem_ctl_info *mci = platform_get_drvdata(pdev); > + > + mutex_lock(&vnet_edac_lock); > + probe_once = 0; > + unregister_rpmsg_driver(&amd_rpmsg_driver); > + edac_mc_del_mc(&pdev->dev); > + edac_mc_free(mci); > + mutex_unlock(&vnet_edac_lock); > +} > + > +static const struct of_device_id amd_edac_match[] = { > + { .compatible = "amd,versalnet-edac", }, > + { > + /* end of table */ Really? No, drop. > + } > +}; > +MODULE_DEVICE_TABLE(of, amd_edac_match); > + > +static struct platform_driver amd_ddr_edac_mc_driver = { > + .driver = { > + .name = "amd-ddrmc-edac", > + .of_match_table = amd_edac_match, > + }, > + .probe = mc_probe, > + .remove = mc_remove, > +}; > + > +module_platform_driver(amd_ddr_edac_mc_driver); > + > +MODULE_AUTHOR("AMD Inc"); > +MODULE_DESCRIPTION("AMD DDRMC ECC driver"); > +MODULE_LICENSE("GPL"); > -- > 2.17.1 >