RE: [PATCH v3 1/5] ACPI/RAS/AEST: Initial AEST driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello, some comments below.

> Subject: [PATCH v3 1/5] ACPI/RAS/AEST: Initial AEST driver
> 
> Add support for parsing the ARM Error Source Table and basic handling of
> errors reported through both memory mapped and system register interfaces.
> 
> Assume system register interfaces are only registered with private
> peripheral interrupts (PPIs); otherwise there is no guarantee the
> core handling the error is the core which took the error and has the
> syndrome info in its system registers.
> 
> In kernel-first mode, all configuration is controlled by kernel, include
> CE ce_threshold and interrupt enable/disable.
> 
> All detected errors will be processed as follow:
>   - CE, DE: use a workqueue to log this hare errors.
>   - UER, UEO: log it and call memory_failun workquee.
>   - UC, UEU: panic in irq context.
> 
> Signed-off-by: Tyler Baicar <baicar@xxxxxxxxxxxxxxxxxxxxxx>
> Signed-off-by: Ruidong Tian <tianruidong@xxxxxxxxxxxxxxxxx>
> ---
>  MAINTAINERS                  |  10 +
>  arch/arm64/include/asm/ras.h |  95 ++++
>  drivers/acpi/arm64/Kconfig   |  11 +
>  drivers/acpi/arm64/Makefile  |   1 +
>  drivers/acpi/arm64/aest.c    | 335 ++++++++++++
>  drivers/acpi/arm64/init.c    |   2 +
>  drivers/acpi/arm64/init.h    |   1 +
>  drivers/ras/Kconfig          |   1 +
>  drivers/ras/Makefile         |   1 +
>  drivers/ras/aest/Kconfig     |  17 +
>  drivers/ras/aest/Makefile    |   5 +
>  drivers/ras/aest/aest-core.c | 976 +++++++++++++++++++++++++++++++++++
>  drivers/ras/aest/aest.h      | 323 ++++++++++++
>  include/linux/acpi_aest.h    |  68 +++
>  include/linux/cpuhotplug.h   |   1 +
>  include/linux/ras.h          |   8 +
>  16 files changed, 1855 insertions(+)
>  create mode 100644 arch/arm64/include/asm/ras.h
>  create mode 100644 drivers/acpi/arm64/aest.c
>  create mode 100644 drivers/ras/aest/Kconfig
>  create mode 100644 drivers/ras/aest/Makefile
>  create mode 100644 drivers/ras/aest/aest-core.c
>  create mode 100644 drivers/ras/aest/aest.h
>  create mode 100644 include/linux/acpi_aest.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 637ddd44245f..d757f9339627 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -330,6 +330,16 @@ S:	Maintained
>  F:	drivers/acpi/arm64
>  F:	include/linux/acpi_iort.h
> 
> +ACPI AEST
> +M:	Ruidong Tian <tianruidond@xxxxxxxxxxxxxxxxx>
> +L:	linux-acpi@xxxxxxxxxxxxxxx
> +L:	linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
> +S:	Supported
> +F:	arch/arm64/include/asm/ras.h
> +F:	drivers/acpi/arm64/aest.c
> +F:	drivers/ras/aest/
> +F:	include/linux/acpi_aest.h
> +
>  ACPI FOR RISC-V (ACPI/riscv)
>  M:	Sunil V L <sunilvl@xxxxxxxxxxxxxxxx>
>  L:	linux-acpi@xxxxxxxxxxxxxxx
> diff --git a/arch/arm64/include/asm/ras.h b/arch/arm64/include/asm/ras.h
> new file mode 100644
> index 000000000000..7676add8a0ed
> --- /dev/null
> +++ b/arch/arm64/include/asm/ras.h
> @@ -0,0 +1,95 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_RAS_H
> +#define __ASM_RAS_H
> +
> +#include <linux/types.h>
> +#include <linux/bits.h>
> +
> +/* ERR<n>FR */
> +#define ERR_FR_CE                      GENMASK_ULL(54, 53)
> +#define ERR_FR_RP                      BIT(15)
> +#define ERR_FR_CEC                     GENMASK_ULL(14, 12)
> +
> +#define ERR_FR_RP_SINGLE_COUNTER       0
> +#define ERR_FR_RP_DOUBLE_COUNTER       1
> +
> +#define ERR_FR_CEC_0B_COUNTER          0
> +#define ERR_FR_CEC_8B_COUNTER          BIT(1)
> +#define ERR_FR_CEC_16B_COUNTER         BIT(2)
> +
> +/* ERR<n>STATUS */
> +#define ERR_STATUS_AV		BIT(31)
> +#define ERR_STATUS_V		BIT(30)
> +#define ERR_STATUS_UE		BIT(29)
> +#define ERR_STATUS_ER		BIT(28)
> +#define ERR_STATUS_OF		BIT(27)
> +#define ERR_STATUS_MV		BIT(26)
> +#define ERR_STATUS_CE		(BIT(25) | BIT(24))
> +#define ERR_STATUS_DE		BIT(23)
> +#define ERR_STATUS_PN		BIT(22)
> +#define ERR_STATUS_UET		(BIT(21) | BIT(20))
> +#define ERR_STATUS_CI		BIT(19)
> +#define ERR_STATUS_IERR		GENMASK_ULL(15, 8)
> +#define ERR_STATUS_SERR		GENMASK_ULL(7, 0)
> +
> +/* Theses bits are	 write-one-to-clear */
> +#define ERR_STATUS_W1TC		(ERR_STATUS_AV | ERR_STATUS_V | ERR_STATUS_UE | \
> +				ERR_STATUS_ER | ERR_STATUS_OF | ERR_STATUS_MV | \
> +				ERR_STATUS_CE | ERR_STATUS_DE | ERR_STATUS_PN | \
> +				ERR_STATUS_UET | ERR_STATUS_CI)
> +
> +#define ERR_STATUS_UET_UC	0
> +#define ERR_STATUS_UET_UEU	1
> +#define ERR_STATUS_UET_UEO	2
> +#define ERR_STATUS_UET_UER	3
> +
> +/* ERR<n>CTLR */
> +#define ERR_CTLR_CFI		BIT(8)
> +#define ERR_CTLR_FI		BIT(3)
> +#define ERR_CTLR_UI		BIT(2)
> +
> +/* ERR<n>ADDR */
> +#define ERR_ADDR_AI		BIT(61)
> +#define ERR_ADDR_PADDR		GENMASK_ULL(55, 0)
> +
> +/* ERR<n>MISC0 */
> +
> +/* ERR<n>FR.CEC == 0b010, ERR<n>FR.RP == 0  */
> +#define ERR_MISC0_8B_OF		BIT(39)
> +#define ERR_MISC0_8B_CEC	GENMASK_ULL(38, 32)
> +
> +/* ERR<n>FR.CEC == 0b100, ERR<n>FR.RP == 0  */
> +#define ERR_MISC0_16B_OF	BIT(47)
> +#define ERR_MISC0_16B_CEC	GENMASK_ULL(46, 32)
> +
> +#define ERR_MISC0_CEC_SHIFT	31
> +
> +#define ERR_8B_CEC_MAX		(ERR_MISC0_8B_CEC >> ERR_MISC0_CEC_SHIFT)
> +#define ERR_16B_CEC_MAX		(ERR_MISC0_16B_CEC >> ERR_MISC0_CEC_SHIFT)
> +
> +/* ERR<n>FR.CEC == 0b100, ERR<n>FR.RP == 1  */
> +#define ERR_MISC0_16B_OFO	BIT(63)
> +#define ERR_MISC0_16B_CECO	GENMASK_ULL(62, 48)
> +#define ERR_MISC0_16B_OFR	BIT(47)
> +#define ERR_MISC0_16B_CECR	GENMASK_ULL(46, 32)
> +
> +/* ERRDEVARCH */
> +#define ERRDEVARCH_REV		GENMASK(19, 16)
> +
> +enum ras_ce_threshold {
> +	RAS_CE_THRESHOLD_0B,
> +	RAS_CE_THRESHOLD_8B,
> +	RAS_CE_THRESHOLD_16B,
> +	RAS_CE_THRESHOLD_32B,
> +	UNKNOWN,
> +};
> +
> +struct ras_ext_regs {
> +	u64 err_fr;
> +	u64 err_ctlr;
> +	u64 err_status;
> +	u64 err_addr;
> +	u64 err_misc[4];
> +};
> +
> +#endif	/* __ASM_RAS_H */
> diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
> index b3ed6212244c..c8eb6de95733 100644
> --- a/drivers/acpi/arm64/Kconfig
> +++ b/drivers/acpi/arm64/Kconfig
> @@ -21,3 +21,14 @@ config ACPI_AGDI
> 
>  config ACPI_APMT
>  	bool
> +
> +config ACPI_AEST
> +	bool "ARM Error Source Table Support"
> +	depends on ARM64_RAS_EXTN
> +
> +	help
> +	  The Arm Error Source Table (AEST) provides details on ACPI
> +	  extensions that enable kernel-first handling of errors in a
> +	  system that supports the Armv8 RAS extensions.
> +
> +	  If set, the kernel will report and log hardware errors.
> diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
> index 05ecde9eaabe..8e240b281fd1 100644
> --- a/drivers/acpi/arm64/Makefile
> +++ b/drivers/acpi/arm64/Makefile
> @@ -6,5 +6,6 @@ obj-$(CONFIG_ACPI_GTDT) 	+= gtdt.o
>  obj-$(CONFIG_ACPI_IORT) 	+= iort.o
>  obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o
>  obj-$(CONFIG_ARM_AMBA)		+= amba.o
> +obj-$(CONFIG_ACPI_AEST) 	+= aest.o
>  obj-y				+= dma.o init.o
>  obj-y				+= thermal_cpufreq.o
> diff --git a/drivers/acpi/arm64/aest.c b/drivers/acpi/arm64/aest.c
> new file mode 100644
> index 000000000000..6dba9c23e04e
> --- /dev/null
> +++ b/drivers/acpi/arm64/aest.c
> @@ -0,0 +1,335 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * ARM Error Source Table Support
> + *
> + * Copyright (c) 2024, Alibaba Group.
> + */
> +
> +#include <linux/xarray.h>
> +#include <linux/platform_device.h>
> +#include <linux/acpi_aest.h>
> +
> +#include "init.h"
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt) "ACPI AEST: " fmt
> +
> +static struct xarray *aest_array;
> +
> +static void __init aest_init_interface(struct acpi_aest_hdr *hdr,
> +				       struct acpi_aest_node *node)
> +{
> +	struct acpi_aest_node_interface_header *interface;
> +
> +	interface = ACPI_ADD_PTR(struct acpi_aest_node_interface_header, hdr,
> +				 hdr->node_interface_offset);
> +
> +	node->type = hdr->type;
> +	node->interface_hdr = interface;
> +
> +	switch (interface->group_format) {
> +	case ACPI_AEST_NODE_GROUP_FORMAT_4K: {
> +		struct acpi_aest_node_interface_4k *interface_4k =
> +			(struct acpi_aest_node_interface_4k *)(interface + 1);
> +
> +		node->common = &interface_4k->common;
> +		node->record_implemented =
> +			(unsigned long *)&interface_4k->error_record_implemented;
> +		node->status_reporting =
> +			(unsigned long *)&interface_4k->error_status_reporting;
> +		node->addressing_mode =
> +			(unsigned long *)&interface_4k->addressing_mode;
> +		break;
> +	}
> +	case ACPI_AEST_NODE_GROUP_FORMAT_16K: {
> +		struct acpi_aest_node_interface_16k *interface_16k =
> +			(struct acpi_aest_node_interface_16k *)(interface + 1);
> +
> +		node->common = &interface_16k->common;
> +		node->record_implemented =
> +			(unsigned long *)interface_16k->error_record_implemented;
> +		node->status_reporting =
> +			(unsigned long *)interface_16k->error_status_reporting;
> +		node->addressing_mode =
> +			(unsigned long *)interface_16k->addressing_mode;
> +		break;
> +	}
> +	case ACPI_AEST_NODE_GROUP_FORMAT_64K: {
> +		struct acpi_aest_node_interface_64k *interface_64k =
> +			(struct acpi_aest_node_interface_64k *)(interface + 1);
> +
> +		node->common = &interface_64k->common;
> +		node->record_implemented =
> +			(unsigned long *)interface_64k->error_record_implemented;
> +		node->status_reporting =
> +			(unsigned long *)interface_64k->error_status_reporting;
> +		node->addressing_mode =
> +			(unsigned long *)interface_64k->addressing_mode;
> +		break;
> +	}
> +	default:
> +		pr_err("invalid group format: %d\n", interface->group_format);
> +	}
> +
> +	node->interrupt = ACPI_ADD_PTR(struct acpi_aest_node_interrupt_v2,
> +					hdr, hdr->node_interrupt_offset);
> +
> +	node->interrupt_count = hdr->node_interrupt_count;
> +}
> +
> +static int __init acpi_aest_init_node_common(struct acpi_aest_hdr *aest_hdr,
> +					struct acpi_aest_node *node)
> +{
> +	int ret;
> +	struct aest_hnode *hnode;
> +	u64 error_device_id;
> +
> +	aest_init_interface(aest_hdr, node);
> +
> +	error_device_id = node->common->error_node_device;

I think I see a problem with this.

[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]
  Powered by Linux