Re: [PATCH v4 2/2] soc: amazon: al-pos-edac: Introduce Amazon's Annapurna Labs POS EDAC driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 03 Oct 2019 12:32:41 +0100,
Talel Shenhar <talel@xxxxxxxxxx> wrote:
> 
> The Amazon's Annapurna Labs SoCs includes Point Of Serialization error
> logging unit that reports an error in case write error (e.g . Attempt to
> write to a read only register).
> This error shall be reported to EDAC subsystem as uncorrectable-error.
> 
> Signed-off-by: Talel Shenhar <talel@xxxxxxxxxx>
> ---
>  MAINTAINERS                |   7 ++
>  drivers/edac/Kconfig       |   6 ++
>  drivers/edac/Makefile      |   1 +
>  drivers/edac/al_pos_edac.c | 173 +++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 187 insertions(+)
>  create mode 100644 drivers/edac/al_pos_edac.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index e7a47b5..f5ce446 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -751,6 +751,13 @@ F:	drivers/tty/serial/altera_jtaguart.c
>  F:	include/linux/altera_uart.h
>  F:	include/linux/altera_jtaguart.h
>  
> +AMAZON ANNAPURNA LABS POS EDAC DRIVER
> +M:	Talel Shenhar <talel@xxxxxxxxxx>
> +M:	Talel Shenhar <talelshenhar@xxxxxxxxx>
> +S:	Maintained
> +F:	Documentation/devicetree/bindings/edac/amazon,al-pos-edac.yaml
> +F:	drivers/edac/al-pos-edac.c
> +
>  AMAZON ANNAPURNA LABS THERMAL MMIO DRIVER
>  M:	Talel Shenhar <talel@xxxxxxxxxx>
>  S:	Maintained
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 200c04c..bb5805f 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -100,6 +100,12 @@ config EDAC_AMD64_ERROR_INJECTION
>  	  In addition, there are two control files, inject_read and inject_write,
>  	  which trigger the DRAM ECC Read and Write respectively.
>  
> +config EDAC_AL_POS
> +	tristate "Amazon's Annapurna Labs POS EDAC driver"
> +	depends on (ARCH_ALPINE || COMPILE_TEST)
> +	help
> +	  Include support for the SoC POS EDAC error capability.
> +
>  config EDAC_AMD76X
>  	tristate "AMD 76x (760, 762, 768)"
>  	depends on PCI && X86_32
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index 165ca65e..3571936 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -22,6 +22,7 @@ obj-$(CONFIG_EDAC_GHES)			+= ghes_edac.o
>  edac_mce_amd-y				:= mce_amd.o
>  obj-$(CONFIG_EDAC_DECODE_MCE)		+= edac_mce_amd.o
>  
> +obj-$(CONFIG_EDAC_AL_POS)		+= al_pos_edac.o
>  obj-$(CONFIG_EDAC_AMD76X)		+= amd76x_edac.o
>  obj-$(CONFIG_EDAC_CPC925)		+= cpc925_edac.o
>  obj-$(CONFIG_EDAC_I5000)		+= i5000_edac.o
> diff --git a/drivers/edac/al_pos_edac.c b/drivers/edac/al_pos_edac.c
> new file mode 100644
> index 00000000..bd6cd87
> --- /dev/null
> +++ b/drivers/edac/al_pos_edac.c
> @@ -0,0 +1,173 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
> + */
> +#include <linux/bitfield.h>
> +#include <linux/edac.h>
> +#include <linux/of_irq.h>
> +#include "edac_module.h"
> +
> +#define DRV_NAME "al_pos_edac"
> +#define AL_POS_EDAC_MSG_MAX 256
> +
> +/* Registers Offset */
> +#define AL_POS_ERROR_LOG_1	0x0
> +#define AL_POS_ERROR_LOG_0	0x4
> +
> +/* Registers Fields */
> +#define AL_POS_ERROR_LOG_1_VALID	BIT(31)
> +#define AL_POS_ERROR_LOG_1_BRESP	GENMASK(18, 17)
> +#define AL_POS_ERROR_LOG_1_REQUEST_ID	GENMASK(16, 8)
> +#define AL_POS_ERROR_LOG_1_ADDR_HIGH	GENMASK(7, 0)
> +
> +#define AL_POS_ERROR_LOG_0_ADDR_LOW	GENMASK(31, 0)
> +
> +struct al_pos_edac {
> +	struct edac_device_ctl_info *edac_dev;
> +	void __iomem *mmio_base;
> +	int irq;
> +};
> +
> +static int al_pos_handle(struct al_pos_edac *al_pos)
> +{
> +	u32 log0, log1;
> +	u64 addr;
> +	u16 request_id;
> +	u8 bresp;
> +	char msg[AL_POS_EDAC_MSG_MAX];
> +
> +	log1 = readl(al_pos->mmio_base + AL_POS_ERROR_LOG_1);

I already commented on the misuse of strict accesses. Unless you can
explain and document *why* you need the extra ordering, please use
relaxed accesses.

> +	if (!FIELD_GET(AL_POS_ERROR_LOG_1_VALID, log1))
> +		return 0;
> +
> +	log0 = readl(al_pos->mmio_base + AL_POS_ERROR_LOG_0);
> +	writel(0, al_pos->mmio_base + AL_POS_ERROR_LOG_1);
> +
> +	addr = FIELD_GET(AL_POS_ERROR_LOG_0_ADDR_LOW, log0);
> +	addr |= (((u64)FIELD_GET(AL_POS_ERROR_LOG_1_ADDR_HIGH, log1)) << 32);
> +	request_id = FIELD_GET(AL_POS_ERROR_LOG_1_REQUEST_ID, log1);
> +	bresp = FIELD_GET(AL_POS_ERROR_LOG_1_BRESP, log1);
> +
> +	snprintf(msg, sizeof(msg),
> +		 "addr=0x%llx request_id=0x%x bresp=0x%x\n",
> +		 addr, request_id, bresp);
> +
> +	edac_device_handle_ue(al_pos->edac_dev, 0, 0, msg);
> +
> +	return 1;
> +}
> +
> +static void al_pos_edac_check(struct edac_device_ctl_info *edac_dev)
> +{
> +	struct al_pos_edac *al_pos = edac_dev->pvt_info;
> +
> +	al_pos_handle(al_pos);
> +}
> +
> +static irqreturn_t al_pos_irq_handler(int irq, void *info)
> +{
> +	struct platform_device *pdev = info;
> +	struct al_pos_edac *al_pos = platform_get_drvdata(pdev);
> +
> +	if (al_pos_handle(al_pos))
> +		return IRQ_HANDLED;
> +	return IRQ_NONE;
> +}
> +
> +static int al_pos_probe(struct platform_device *pdev)
> +{
> +	struct edac_device_ctl_info *edac_dev;
> +	struct al_pos_edac *al_pos;
> +	int ret;
> +
> +	edac_dev = edac_device_alloc_ctl_info(sizeof(*al_pos), DRV_NAME, 1,
> +					      DRV_NAME, 1, 0, NULL, 0,
> +					      edac_device_alloc_index());
> +	if (!edac_dev)
> +		return -ENOMEM;
> +
> +	al_pos = edac_dev->pvt_info;
> +	al_pos->edac_dev = edac_dev;
> +	platform_set_drvdata(pdev, al_pos);
> +
> +	al_pos->mmio_base = devm_platform_ioremap_resource(pdev, 0);
> +	if (IS_ERR(al_pos->mmio_base)) {
> +		dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
> +			PTR_ERR(al_pos->mmio_base));
> +		return PTR_ERR(al_pos->mmio_base);
> +	}
> +
> +	al_pos->irq = platform_get_irq(pdev, 0);
> +	if (al_pos->irq <= 0)
> +		edac_dev->edac_check = al_pos_edac_check;
> +
> +	edac_dev->dev = &pdev->dev;
> +	edac_dev->mod_name = DRV_NAME;
> +	edac_dev->dev_name = dev_name(&pdev->dev);
> +	edac_dev->ctl_name = "POS";
> +
> +	ret = edac_device_add_device(edac_dev);
> +	if (ret) {
> +		dev_err(&pdev->dev, "Failed to add edac device\n");
> +		goto err_free_edac;
> +	}
> +
> +	if (al_pos->irq > 0) {
> +		ret = devm_request_irq(&pdev->dev,
> +				       al_pos->irq,
> +				       al_pos_irq_handler,
> +				       0,
> +				       pdev->name,
> +				       pdev);
> +		if (ret != 0) {
> +			dev_err(&pdev->dev,
> +				"failed to register to irq %d (%d)\n",
> +				al_pos->irq, ret);
> +			goto err_remove_edac;

Would it be worth continuing without interrupts? After all, the
interrupt seems to be an optional part of the device...

Thanks,

	M.

-- 
Jazz is not dead, it just smells funny.



[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux