The NMI-like notifications scribble over ghes->estatus, before copying it somewhere else. If this interrupts the ghes_probe() code calling ghes_proc() on each struct ghes, the data is corrupted. All the NMI-like notifications should use a queued estatus entry from the beginning, instead of the ghes version, then copying it. To do this, break up any use of "ghes->estatus" so that all functions take the estatus as an argument. This patch just moves these ghes->estatus dereferences into separate arguments, no change in behaviour. struct ghes becomes unused in ghes_clear_estatus() as it only wanted ghes->estatus, which we now pass directly. This is removed. Signed-off-by: James Morse <james.morse@xxxxxxx> --- Changes since v6: * Changed subject * Renamed ghes_estatus to src_estatus, which is a little clearer * Removed struct ghes from ghes_clear_estatus() now that this becomes unused in this patch. * Mangled the commit message to be different --- drivers/acpi/apei/ghes.c | 92 +++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ccad57468ab7..f95db2398dd5 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -293,9 +293,9 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, } } -static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr, - enum fixed_addresses fixmap_idx) - +static int ghes_read_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) { struct acpi_hest_generic *g = ghes->generic; u32 len; @@ -312,25 +312,25 @@ static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr, if (!*buf_paddr) return -ENOENT; - ghes_copy_tofrom_phys(ghes->estatus, *buf_paddr, - sizeof(*ghes->estatus), 1, fixmap_idx); - if (!ghes->estatus->block_status) { + ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, + fixmap_idx); + if (!estatus->block_status) { *buf_paddr = 0; return -ENOENT; } rc = -EIO; - len = cper_estatus_len(ghes->estatus); - if (len < sizeof(*ghes->estatus)) + len = cper_estatus_len(estatus); + if (len < sizeof(*estatus)) goto err_read_block; if (len > ghes->generic->error_block_length) goto err_read_block; - if (cper_estatus_check_header(ghes->estatus)) + if (cper_estatus_check_header(estatus)) goto err_read_block; - ghes_copy_tofrom_phys(ghes->estatus + 1, - *buf_paddr + sizeof(*ghes->estatus), - len - sizeof(*ghes->estatus), 1, fixmap_idx); - if (cper_estatus_check(ghes->estatus)) + ghes_copy_tofrom_phys(estatus + 1, + *buf_paddr + sizeof(*estatus), + len - sizeof(*estatus), 1, fixmap_idx); + if (cper_estatus_check(estatus)) goto err_read_block; rc = 0; @@ -342,16 +342,17 @@ static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr, return rc; } -static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr, - enum fixed_addresses fixmap_idx) +static void ghes_clear_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx) { - ghes->estatus->block_status = 0; + estatus->block_status = 0; if (!buf_paddr) return; - ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, - sizeof(ghes->estatus->block_status), 0, + ghes_copy_tofrom_phys(estatus, buf_paddr, + sizeof(estatus->block_status), 0, fixmap_idx); /* @@ -651,12 +652,13 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } -static void __ghes_panic(struct ghes *ghes, u64 buf_paddr, - enum fixed_addresses fixmap_idx) +static void __ghes_panic(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx) { - __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); + __ghes_print_estatus(KERN_EMERG, ghes->generic, estatus); - ghes_clear_estatus(ghes, buf_paddr, fixmap_idx); + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); /* reboot to log the error! */ if (!panic_timeout) @@ -666,25 +668,25 @@ static void __ghes_panic(struct ghes *ghes, u64 buf_paddr, static int ghes_proc(struct ghes *ghes) { + struct acpi_hest_generic_status *estatus = ghes->estatus; u64 buf_paddr; int rc; - rc = ghes_read_estatus(ghes, &buf_paddr, FIX_APEI_GHES_IRQ); + rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); if (rc) goto out; - if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { - __ghes_panic(ghes, buf_paddr, FIX_APEI_GHES_IRQ); - } + if (ghes_severity(estatus->error_severity) >= GHES_SEV_PANIC) + __ghes_panic(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); - if (!ghes_estatus_cached(ghes->estatus)) { - if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) - ghes_estatus_cache_add(ghes->generic, ghes->estatus); + if (!ghes_estatus_cached(estatus)) { + if (ghes_print_estatus(NULL, ghes->generic, estatus)) + ghes_estatus_cache_add(ghes->generic, estatus); } - ghes_do_proc(ghes, ghes->estatus); + ghes_do_proc(ghes, estatus); out: - ghes_clear_estatus(ghes, buf_paddr, FIX_APEI_GHES_IRQ); + ghes_clear_estatus(ghes, estatus, buf_paddr, FIX_APEI_GHES_IRQ); return rc; } @@ -825,17 +827,20 @@ static void ghes_print_queued_estatus(void) } /* Save estatus for further processing in IRQ context */ -static void __process_error(struct ghes *ghes) +static void __process_error(struct ghes *ghes, + struct acpi_hest_generic_status *src_estatus) { -#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG u32 len, node_len; struct ghes_estatus_node *estatus_node; struct acpi_hest_generic_status *estatus; - if (ghes_estatus_cached(ghes->estatus)) + if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) return; - len = cper_estatus_len(ghes->estatus); + if (ghes_estatus_cached(src_estatus)) + return; + + len = cper_estatus_len(src_estatus); node_len = GHES_ESTATUS_NODE_LEN(len); estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); @@ -845,30 +850,31 @@ static void __process_error(struct ghes *ghes) estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); - memcpy(estatus, ghes->estatus, len); + memcpy(estatus, src_estatus, len); llist_add(&estatus_node->llnode, &ghes_estatus_llist); -#endif } static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, enum fixed_addresses fixmap_idx) { + struct acpi_hest_generic_status *estatus = ghes->estatus; u64 buf_paddr; int sev; - if (ghes_read_estatus(ghes, &buf_paddr, fixmap_idx)) { - ghes_clear_estatus(ghes, buf_paddr, fixmap_idx); + if (ghes_read_estatus(ghes, estatus, &buf_paddr, fixmap_idx)) { + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); return -ENOENT; } - sev = ghes_severity(ghes->estatus->error_severity); + sev = ghes_severity(estatus->error_severity); if (sev >= GHES_SEV_PANIC) { ghes_print_queued_estatus(); - __ghes_panic(ghes, buf_paddr, fixmap_idx); + __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); + } - __process_error(ghes); - ghes_clear_estatus(ghes, buf_paddr, fixmap_idx); + __process_error(ghes, estatus); + ghes_clear_estatus(ghes, estatus, buf_paddr, fixmap_idx); return 0; } -- 2.20.1