APEI is unable to do all of its error handling work in nmi-context, so it defers non-fatal work onto the irq_work queue. arch_irq_work_raise() sends an IPI to the calling cpu, but this is not guaranteed to be taken before returning to user-space. Unless the exception interrupted a context with irqs-masked, irq_work_run() can run immediately. Otherwise return -EINPROGRESS to indicate ghes_notify_sea() found some work to do, but it hasn't finished yet. With this apei_claim_sea() returning '0' means this external-abort was also notification of a firmware-first RAS error, and that APEI has processed the CPER records. Signed-off-by: James Morse <james.morse@xxxxxxx> --- Changes since $last_year: * Dropped all the tags ... its been a year. * Added user_mode() test in do_sea() and expanded the comment. * Dont depend on daif value for return_to_irqs_enabled because of pNMI. * pr_warn() should be ratelimited --- arch/arm64/kernel/acpi.c | 25 +++++++++++++++++++++++++ arch/arm64/mm/fault.c | 12 +++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index a100483b47c4..46ec402e97ed 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/irq.h> #include <linux/irqdomain.h> +#include <linux/irq_work.h> #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/smp.h> @@ -269,6 +270,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) int apei_claim_sea(struct pt_regs *regs) { int err = -ENOENT; + bool return_to_irqs_enabled; unsigned long current_flags; if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) @@ -276,6 +278,12 @@ int apei_claim_sea(struct pt_regs *regs) current_flags = local_daif_save_flags(); + /* current_flags isn't useful here as daif doesn't tell us about pNMI */ + return_to_irqs_enabled = !irqs_disabled_flags(arch_local_save_flags()); + + if (regs) + return_to_irqs_enabled = interrupts_enabled(regs); + /* * SEA can interrupt SError, mask it and describe this as an NMI so * that APEI defers the handling. @@ -284,6 +292,23 @@ int apei_claim_sea(struct pt_regs *regs) nmi_enter(); err = ghes_notify_sea(); nmi_exit(); + + /* + * APEI NMI-like notifications are deferred to irq_work. Unless + * we interrupted irqs-masked code, we can do that now. + */ + if (!err) { + if (return_to_irqs_enabled) { + local_daif_restore(DAIF_PROCCTX_NOIRQ); + __irq_enter(); + irq_work_run(); + __irq_exit(); + } else { + pr_warn_ratelimited("APEI work queued but not completed"); + err = -EINPROGRESS; + } + } + local_daif_restore(current_flags); return err; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 85566d32958f..cefeb34580da 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -645,11 +645,13 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) inf = esr_to_fault_info(esr); - /* - * Return value ignored as we rely on signal merging. - * Future patches will make this more robust. - */ - apei_claim_sea(regs); + if (user_mode(regs) && apei_claim_sea(regs) == 0) { + /* + * APEI claimed this as a firmware-first notification. + * Some processing deferred to task_work before ret_to_user(). + */ + return 0; + } if (esr & ESR_ELx_FnV) siaddr = NULL; -- 2.24.1