Currently memory_failure() assumes an infrequent report on a handful of pages. A new use case for surprise removal of a persistent memory device needs to trigger memory_failure() on a large range. Rate limit memory_failure() error logging, and allow the memory_failure_dev_pagemap() helper to be called directly. Cc: Naoya Horiguchi <naoya.horiguchi@xxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- mm/memory-failure.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 24210c9bd843..43ba4307c526 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -395,8 +395,9 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, * signal and then access the memory. Just kill it. */ if (fail || tk->addr == -EFAULT) { - pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", - pfn, tk->tsk->comm, tk->tsk->pid); + pr_err_ratelimited( + "Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", + pfn, tk->tsk->comm, tk->tsk->pid); do_send_sig_info(SIGKILL, SEND_SIG_PRIV, tk->tsk, PIDTYPE_PID); } @@ -408,8 +409,9 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, * process anyways. */ else if (kill_proc(tk, pfn, flags) < 0) - pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n", - pfn, tk->tsk->comm, tk->tsk->pid); + pr_err_ratelimited( + "Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n", + pfn, tk->tsk->comm, tk->tsk->pid); } put_task_struct(tk->tsk); kfree(tk); @@ -919,8 +921,8 @@ static void action_result(unsigned long pfn, enum mf_action_page_type type, { trace_memory_failure_event(pfn, type, result); - pr_err("Memory failure: %#lx: recovery action for %s: %s\n", - pfn, action_page_types[type], action_name[result]); + pr_err_ratelimited("Memory failure: %#lx: recovery action for %s: %s\n", + pfn, action_page_types[type], action_name[result]); } static int page_action(struct page_state *ps, struct page *p, @@ -1375,8 +1377,6 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, unlock: dax_unlock_page(page, cookie); out: - /* drop pgmap ref acquired in caller */ - put_dev_pagemap(pgmap); action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED); return rc; } @@ -1415,9 +1415,12 @@ int memory_failure(unsigned long pfn, int flags) if (!p) { if (pfn_valid(pfn)) { pgmap = get_dev_pagemap(pfn, NULL); - if (pgmap) - return memory_failure_dev_pagemap(pfn, flags, - pgmap); + if (pgmap) { + res = memory_failure_dev_pagemap(pfn, flags, + pgmap); + put_dev_pagemap(pgmap); + return res; + } } pr_err("Memory failure: %#lx: memory outside kernel control\n", pfn);