Add a trace event for hardware errors reported by the ARMv8.2 RAS extension registers. Signed-off-by: Tyler Baicar <baicar@xxxxxxxxxxxxxxxxxxxxxx> --- arch/arm64/kernel/ras.c | 3 +++ drivers/acpi/arm64/aest.c | 4 ++++ include/ras/ras_event.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c index ca47efa..4e34d63 100644 --- a/arch/arm64/kernel/ras.c +++ b/arch/arm64/kernel/ras.c @@ -5,6 +5,7 @@ #include <linux/smp.h> #include <asm/ras.h> +#include <ras/ras_event.h> void arch_arm_ras_report_error(void) { @@ -50,6 +51,8 @@ void arch_arm_ras_report_error(void) regs.err_misc1); } + trace_arm_ras_ext_event(0, cpu_num, ®s); + /* * In the future, we will treat UER conditions as potentially * recoverable. diff --git a/drivers/acpi/arm64/aest.c b/drivers/acpi/arm64/aest.c index fd4f3b5..21ec583 100644 --- a/drivers/acpi/arm64/aest.c +++ b/drivers/acpi/arm64/aest.c @@ -13,6 +13,7 @@ #include <linux/ratelimit.h> #include <asm/ras.h> +#include <ras/ras_event.h> #undef pr_fmt #define pr_fmt(fmt) "ACPI AEST: " fmt @@ -102,6 +103,9 @@ static void aest_proc(struct aest_node_data *data) aest_print(data, regs, i); + trace_arm_ras_ext_event(data->node_type, data->data.proc.id, + ®s); + if (regs.err_status & ERR_STATUS_UE) fatal = true; diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 36c5c5e..8b76cb1 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -339,6 +339,52 @@ ); /* + * ARM RAS Extension Events Report + * + * This event is generated when an error reported by the ARM RAS extension + * hardware is detected. + */ + +#ifdef CONFIG_ARM64 +#include <asm/ras.h> +TRACE_EVENT(arm_ras_ext_event, + + TP_PROTO(u8 type, u32 id, struct ras_ext_regs *regs), + + TP_ARGS(type, id, regs), + + TP_STRUCT__entry( + __field(u8, type) + __field(u32, id) + __field(u64, err_fr) + __field(u64, err_ctlr) + __field(u64, err_status) + __field(u64, err_addr) + __field(u64, err_misc0) + __field(u64, err_misc1) + ), + + TP_fast_assign( + __entry->type = type; + __entry->id = id; + __entry->err_fr = regs->err_fr; + __entry->err_ctlr = regs->err_ctlr; + __entry->err_status = regs->err_status; + __entry->err_addr = regs->err_addr; + __entry->err_misc0 = regs->err_misc0; + __entry->err_misc1 = regs->err_misc1; + ), + + TP_printk("type: %d; id: %d; ERR_FR: %llx; ERR_CTLR: %llx; " + "ERR_STATUS: %llx; ERR_ADDR: %llx; ERR_MISC0: %llx; " + "ERR_MISC1: %llx", + __entry->type, __entry->id, __entry->err_fr, + __entry->err_ctlr, __entry->err_status, __entry->err_addr, + __entry->err_misc0, __entry->err_misc1) +); +#endif + +/* * memory-failure recovery action result event * * unsigned long pfn - Page Frame Number of the corrupted page -- 1.8.3.1