> The patch below does not apply to the 3.10-stable tree. > If someone wants it applied there, or to any other stable or longterm > tree, then please email the backport, including the original git commit > id to <stable@xxxxxxxxxxxxxxx>. > In my check, this patch is cleanly applied on top of 3.10.45 + commit 74614de17db6 ("mm/memory-failure.c: don't let collect_procs() skip over processes for MF_ACTION_REQUIRED"). There might have been a conflict with other patches newly backported to stable. Could you show me how you failed to apply? Thanks, Naoya Horiguchi > ------------------ original commit in Linus's tree ------------------ > > From 3ba08129e38437561df44c36b7ea9081185d5333 Mon Sep 17 00:00:00 2001 > From: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> > Date: Wed, 4 Jun 2014 16:11:02 -0700 > Subject: [PATCH] mm/memory-failure.c: support use of a dedicated thread to > handle SIGBUS(BUS_MCEERR_AO) > > Currently memory error handler handles action optional errors in the > deferred manner by default. And if a recovery aware application wants > to handle it immediately, it can do it by setting PF_MCE_EARLY flag. > However, such signal can be sent only to the main thread, so it's > problematic if the application wants to have a dedicated thread to > handler such signals. > > So this patch adds dedicated thread support to memory error handler. We > have PF_MCE_EARLY flags for each thread separately, so with this patch > AO signal is sent to the thread with PF_MCE_EARLY flag set, not the main > thread. If you want to implement a dedicated thread, you call prctl() > to set PF_MCE_EARLY on the thread. > > Memory error handler collects processes to be killed, so this patch lets > it check PF_MCE_EARLY flag on each thread in the collecting routines. > > No behavioral change for all non-early kill cases. > > Tony said: > > : The old behavior was crazy - someone with a multithreaded process might > : well expect that if they call prctl(PF_MCE_EARLY) in just one thread, then > : that thread would see the SIGBUS with si_code = BUS_MCEERR_A0 - even if > : that thread wasn't the main thread for the process. > > [akpm@xxxxxxxxxxxxxxxxxxxx: coding-style fixes] > Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> > Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx> > Cc: Kamil Iskra <iskra@xxxxxxxxxxx> > Cc: Andi Kleen <andi@xxxxxxxxxxxxxx> > Cc: Borislav Petkov <bp@xxxxxxx> > Cc: Chen Gong <gong.chen@xxxxxxxxxxxxxxxxxx> > Cc: <stable@xxxxxxxxxxxxxxx> [3.2+] > Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> > > diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt > index 550068466605..6ae89a9edf2a 100644 > --- a/Documentation/vm/hwpoison.txt > +++ b/Documentation/vm/hwpoison.txt > @@ -84,6 +84,11 @@ PR_MCE_KILL > PR_MCE_KILL_EARLY: Early kill > PR_MCE_KILL_LATE: Late kill > PR_MCE_KILL_DEFAULT: Use system global default > + Note that if you want to have a dedicated thread which handles > + the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should > + call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise, > + the SIGBUS is sent to the main thread. > + > PR_MCE_KILL_GET > return current mode > > diff --git a/mm/memory-failure.c b/mm/memory-failure.c > index ed339c505d55..cd8989c1027e 100644 > --- a/mm/memory-failure.c > +++ b/mm/memory-failure.c > @@ -380,15 +380,44 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, > } > } > > -static int task_early_kill(struct task_struct *tsk, int force_early) > +/* > + * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO) > + * on behalf of the thread group. Return task_struct of the (first found) > + * dedicated thread if found, and return NULL otherwise. > + * > + * We already hold read_lock(&tasklist_lock) in the caller, so we don't > + * have to call rcu_read_lock/unlock() in this function. > + */ > +static struct task_struct *find_early_kill_thread(struct task_struct *tsk) > +{ > + struct task_struct *t; > + > + for_each_thread(tsk, t) > + if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY)) > + return t; > + return NULL; > +} > + > +/* > + * Determine whether a given process is "early kill" process which expects > + * to be signaled when some page under the process is hwpoisoned. > + * Return task_struct of the dedicated thread (main thread unless explicitly > + * specified) if the process is "early kill," and otherwise returns NULL. > + */ > +static struct task_struct *task_early_kill(struct task_struct *tsk, > + int force_early) > { > + struct task_struct *t; > if (!tsk->mm) > - return 0; > + return NULL; > if (force_early) > - return 1; > - if (tsk->flags & PF_MCE_PROCESS) > - return !!(tsk->flags & PF_MCE_EARLY); > - return sysctl_memory_failure_early_kill; > + return tsk; > + t = find_early_kill_thread(tsk); > + if (t) > + return t; > + if (sysctl_memory_failure_early_kill) > + return tsk; > + return NULL; > } > > /* > @@ -410,16 +439,17 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, > read_lock(&tasklist_lock); > for_each_process (tsk) { > struct anon_vma_chain *vmac; > + struct task_struct *t = task_early_kill(tsk, force_early); > > - if (!task_early_kill(tsk, force_early)) > + if (!t) > continue; > anon_vma_interval_tree_foreach(vmac, &av->rb_root, > pgoff, pgoff) { > vma = vmac->vma; > if (!page_mapped_in_vma(page, vma)) > continue; > - if (vma->vm_mm == tsk->mm) > - add_to_kill(tsk, page, vma, to_kill, tkc); > + if (vma->vm_mm == t->mm) > + add_to_kill(t, page, vma, to_kill, tkc); > } > } > read_unlock(&tasklist_lock); > @@ -440,10 +470,10 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, > read_lock(&tasklist_lock); > for_each_process(tsk) { > pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); > + struct task_struct *t = task_early_kill(tsk, force_early); > > - if (!task_early_kill(tsk, force_early)) > + if (!t) > continue; > - > vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, > pgoff) { > /* > @@ -453,8 +483,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, > * Assume applications who requested early kill want > * to be informed of all such data corruptions. > */ > - if (vma->vm_mm == tsk->mm) > - add_to_kill(tsk, page, vma, to_kill, tkc); > + if (vma->vm_mm == t->mm) > + add_to_kill(t, page, vma, to_kill, tkc); > } > } > read_unlock(&tasklist_lock); On Mon, Jun 30, 2014 at 10:16:20AM -0500, Horiguchi, Naoya wrote: > The patch below does not apply to the 3.10-stable tree. > If someone wants it applied there, or to any other stable or longterm > tree, then please email the backport, including the original git commit > id to <stable@xxxxxxxxxxxxxxx>. > > thanks, > > greg k-h > > ------------------ original commit in Linus's tree ------------------ > > From 3ba08129e38437561df44c36b7ea9081185d5333 Mon Sep 17 00:00:00 2001 > From: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> > Date: Wed, 4 Jun 2014 16:11:02 -0700 > Subject: [PATCH] mm/memory-failure.c: support use of a dedicated thread to > handle SIGBUS(BUS_MCEERR_AO) > > Currently memory error handler handles action optional errors in the > deferred manner by default. And if a recovery aware application wants > to handle it immediately, it can do it by setting PF_MCE_EARLY flag. > However, such signal can be sent only to the main thread, so it's > problematic if the application wants to have a dedicated thread to > handler such signals. > > So this patch adds dedicated thread support to memory error handler. We > have PF_MCE_EARLY flags for each thread separately, so with this patch > AO signal is sent to the thread with PF_MCE_EARLY flag set, not the main > thread. If you want to implement a dedicated thread, you call prctl() > to set PF_MCE_EARLY on the thread. > > Memory error handler collects processes to be killed, so this patch lets > it check PF_MCE_EARLY flag on each thread in the collecting routines. > > No behavioral change for all non-early kill cases. > > Tony said: > > : The old behavior was crazy - someone with a multithreaded process might > : well expect that if they call prctl(PF_MCE_EARLY) in just one thread, then > : that thread would see the SIGBUS with si_code = BUS_MCEERR_A0 - even if > : that thread wasn't the main thread for the process. > > [akpm@xxxxxxxxxxxxxxxxxxxx: coding-style fixes] > Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> > Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx> > Cc: Kamil Iskra <iskra@xxxxxxxxxxx> > Cc: Andi Kleen <andi@xxxxxxxxxxxxxx> > Cc: Borislav Petkov <bp@xxxxxxx> > Cc: Chen Gong <gong.chen@xxxxxxxxxxxxxxxxxx> > Cc: <stable@xxxxxxxxxxxxxxx> [3.2+] > Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> > > diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt > index 550068466605..6ae89a9edf2a 100644 > --- a/Documentation/vm/hwpoison.txt > +++ b/Documentation/vm/hwpoison.txt > @@ -84,6 +84,11 @@ PR_MCE_KILL > PR_MCE_KILL_EARLY: Early kill > PR_MCE_KILL_LATE: Late kill > PR_MCE_KILL_DEFAULT: Use system global default > + Note that if you want to have a dedicated thread which handles > + the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should > + call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise, > + the SIGBUS is sent to the main thread. > + > PR_MCE_KILL_GET > return current mode > > diff --git a/mm/memory-failure.c b/mm/memory-failure.c > index ed339c505d55..cd8989c1027e 100644 > --- a/mm/memory-failure.c > +++ b/mm/memory-failure.c > @@ -380,15 +380,44 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, > } > } > > -static int task_early_kill(struct task_struct *tsk, int force_early) > +/* > + * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO) > + * on behalf of the thread group. Return task_struct of the (first found) > + * dedicated thread if found, and return NULL otherwise. > + * > + * We already hold read_lock(&tasklist_lock) in the caller, so we don't > + * have to call rcu_read_lock/unlock() in this function. > + */ > +static struct task_struct *find_early_kill_thread(struct task_struct *tsk) > +{ > + struct task_struct *t; > + > + for_each_thread(tsk, t) > + if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY)) > + return t; > + return NULL; > +} > + > +/* > + * Determine whether a given process is "early kill" process which expects > + * to be signaled when some page under the process is hwpoisoned. > + * Return task_struct of the dedicated thread (main thread unless explicitly > + * specified) if the process is "early kill," and otherwise returns NULL. > + */ > +static struct task_struct *task_early_kill(struct task_struct *tsk, > + int force_early) > { > + struct task_struct *t; > if (!tsk->mm) > - return 0; > + return NULL; > if (force_early) > - return 1; > - if (tsk->flags & PF_MCE_PROCESS) > - return !!(tsk->flags & PF_MCE_EARLY); > - return sysctl_memory_failure_early_kill; > + return tsk; > + t = find_early_kill_thread(tsk); > + if (t) > + return t; > + if (sysctl_memory_failure_early_kill) > + return tsk; > + return NULL; > } > > /* > @@ -410,16 +439,17 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, > read_lock(&tasklist_lock); > for_each_process (tsk) { > struct anon_vma_chain *vmac; > + struct task_struct *t = task_early_kill(tsk, force_early); > > - if (!task_early_kill(tsk, force_early)) > + if (!t) > continue; > anon_vma_interval_tree_foreach(vmac, &av->rb_root, > pgoff, pgoff) { > vma = vmac->vma; > if (!page_mapped_in_vma(page, vma)) > continue; > - if (vma->vm_mm == tsk->mm) > - add_to_kill(tsk, page, vma, to_kill, tkc); > + if (vma->vm_mm == t->mm) > + add_to_kill(t, page, vma, to_kill, tkc); > } > } > read_unlock(&tasklist_lock); > @@ -440,10 +470,10 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, > read_lock(&tasklist_lock); > for_each_process(tsk) { > pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); > + struct task_struct *t = task_early_kill(tsk, force_early); > > - if (!task_early_kill(tsk, force_early)) > + if (!t) > continue; > - > vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, > pgoff) { > /* > @@ -453,8 +483,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, > * Assume applications who requested early kill want > * to be informed of all such data corruptions. > */ > - if (vma->vm_mm == tsk->mm) > - add_to_kill(tsk, page, vma, to_kill, tkc); > + if (vma->vm_mm == t->mm) > + add_to_kill(t, page, vma, to_kill, tkc); > } > } > read_unlock(&tasklist_lock); -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html