memory_failure() may not always recovery successfully. In synchronous external data abort case, if memory_failure() recovery failed, we must handle it. In this case, if the recovery fails, the common helper function arch_apei_do_recovery_failed() is invoked. For arm64 platform, we just send a SIGBUS. Signed-off-by: Xie XiuQi <xiexiuqi@xxxxxxxxxx> --- drivers/acpi/apei/ghes.c | 3 ++- include/linux/mm.h | 2 +- mm/memory-failure.c | 24 +++++++++++++++++------- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ba0631c54c52..ddc4da603215 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -435,7 +435,8 @@ static void ghes_kick_task_work(struct callback_head *head) estatus_node = container_of(head, struct ghes_estatus_node, task_work); if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE)) - memory_failure_queue_kick(estatus_node->task_work_cpu); + if (memory_failure_queue_kick(estatus_node->task_work_cpu)) + arch_apei_do_recovery_failed(); estatus = GHES_ESTATUS_FROM_NODE(estatus_node); node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus)); diff --git a/include/linux/mm.h b/include/linux/mm.h index 974ccca609d2..126d1395c208 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3290,7 +3290,7 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, unsigned long count, int mf_flags); extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); -extern void memory_failure_queue_kick(int cpu); +extern int memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index bead6bccc7f2..b9398f67264a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2240,12 +2240,12 @@ void memory_failure_queue(unsigned long pfn, int flags) } EXPORT_SYMBOL_GPL(memory_failure_queue); -static void memory_failure_work_func(struct work_struct *work) +static int __memory_failure_work_func(struct work_struct *work) { struct memory_failure_cpu *mf_cpu; struct memory_failure_entry entry = { 0, }; unsigned long proc_flags; - int gotten; + int gotten, ret = 0, result; mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { @@ -2254,24 +2254,34 @@ static void memory_failure_work_func(struct work_struct *work) spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; - if (entry.flags & MF_SOFT_OFFLINE) + if (entry.flags & MF_SOFT_OFFLINE) { soft_offline_page(entry.pfn, entry.flags); - else - memory_failure(entry.pfn, entry.flags); + } else { + result = memory_failure(entry.pfn, entry.flags); + if (ret == 0 && result != 0) + ret = result; + } } + + return ret; +} + +static void memory_failure_work_func(struct work_struct *work) +{ + __memory_failure_work_func(work); } /* * Process memory_failure work queued on the specified CPU. * Used to avoid return-to-userspace racing with the memory_failure workqueue. */ -void memory_failure_queue_kick(int cpu) +int memory_failure_queue_kick(int cpu) { struct memory_failure_cpu *mf_cpu; mf_cpu = &per_cpu(memory_failure_cpu, cpu); cancel_work_sync(&mf_cpu->work); - memory_failure_work_func(&mf_cpu->work); + return __memory_failure_work_func(&mf_cpu->work); } static int __init memory_failure_init(void) -- 2.20.1