The patch titled Subject: kernel/exit.c: release ptraced tasks before zap_pid_ns_processes has been removed from the -mm tree. Its filename was kernel-release-ptraced-tasks-before-zap_pid_ns_processes.patch This patch was dropped because an updated version will be merged ------------------------------------------------------ From: Andrei Vagin <avagin@xxxxxxxxx> Subject: kernel/exit.c: release ptraced tasks before zap_pid_ns_processes Currently, exit_ptrace() adds all ptraced tasks in a dead list then zap_pid_ns_processes() waits all tasks in a current pid ns, and only then are tasks from the dead list released. zap_pid_ns_processes() can get stuck waiting for tasks from the dead list. In this case, we will have one unkillable process with one or more dead children. The reproducer for this problem is below. Here you can find its effect without this fix: We have one alive process which sticks in zap_pid_ns_processes: $ ps axf ... 11831 pts/0 S 0:00 [ptrace_pidns] 11833 pts/0 Zl 0:00 \_ [ptrace_pidns] <defunct> $ cat /proc/11831/stack [<0>] do_wait+0x1fa/0x2c0 [<0>] kernel_wait4+0x9e/0x150 [<0>] zap_pid_ns_processes+0x17d/0x270 [<0>] do_exit+0xa15/0xbd0 [<0>] do_group_exit+0x47/0xc0 [<0>] get_signal+0x28c/0x850 [<0>] do_signal+0x36/0x630 [<0>] exit_to_usermode_loop+0x62/0xc0 [<0>] prepare_exit_to_usermode+0xb4/0xe0 [<0>] retint_user+0x8/0x18 [<0>] 0xffffffffffffffff The child process has two threads which were ptraced by parent: $ ls /proc/11833/task/ 11833 11834 The parent can't wait for the child, becase a thread group isn't empty, but a thread is in the dead list: $ cat /proc/1183{1,3,4}/status | grep '\(NSpid\|TracerPid\|State\)' State: S (sleeping) TracerPid: 0 NSpid: 11831 1 State: Z (zombie) TracerPid: 0 NSpid: 11833 2 State: X (dead) TracerPid: 0 NSpid: 11834 3 ====== ptrace_pidns.c ======= #define _GNU_SOURCE /* See feature_test_macros(7) */ #include <unistd.h> #include <sys/ptrace.h> #include <sys/types.h> #include <sys/time.h> #include <sys/resource.h> #include <sys/wait.h> #include <sys/syscall.h> /* For SYS_xxx definitions */ #include <pthread.h> #include <stdio.h> #include <stdlib.h> int pfd[2]; void *thread_fn() { pid_t tid = syscall(SYS_gettid); write(pfd[1], &tid, sizeof(tid)); sleep(1000); return NULL; } int main() { pid_t pid, tid, ppid = getpid(); pthread_t t; if (pipe(pfd)) return 1; pid = fork(); if (pid < 0) return 1; if (pid == 0) { pthread_create(&t, NULL, thread_fn, (void *)(unsigned long)ppid); sleep(1000); return 0; } printf("fork: %d\n", pid); if (read(pfd[0], &tid, sizeof(tid)) != sizeof(tid)) return 1; printf("thread: %d\n", tid); if (ptrace(PTRACE_ATTACH, tid, 0, 0)) return 1; if (wait4(tid, NULL, __WALL, NULL) != tid) return 1; if (ptrace(PTRACE_ATTACH, pid, 0, 0)) return 1; if (wait4(pid, NULL, __WALL, NULL) != pid) return 1; kill(pid, SIGKILL); *((int *)(0)) = 0xdead; return 0; } Link: http://lkml.kernel.org/r/20190102205939.26231-1-avagin@xxxxxxxxx Signed-off-by: Andrei Vagin <avagin@xxxxxxxxx> Cc: Oleg Nesterov <oleg@xxxxxxxxxx> Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- kernel/exit.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) --- a/kernel/exit.c~kernel-release-ptraced-tasks-before-zap_pid_ns_processes +++ a/kernel/exit.c @@ -664,9 +664,6 @@ static void forget_original_parent(struc { struct task_struct *p, *t, *reaper; - if (unlikely(!list_empty(&father->ptraced))) - exit_ptrace(father, dead); - /* Can drop and reacquire tasklist_lock */ reaper = find_child_reaper(father); if (list_empty(&father->children)) @@ -705,8 +702,18 @@ static void exit_notify(struct task_stru LIST_HEAD(dead); write_lock_irq(&tasklist_lock); - forget_original_parent(tsk, &dead); + if (unlikely(!list_empty(&tsk->ptraced))) + exit_ptrace(tsk, &dead); + write_unlock_irq(&tasklist_lock); + + /* Ptraced tasks have to be released before zap_pid_ns_processes(). */ + list_for_each_entry_safe(p, n, &dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } + write_lock_irq(&tasklist_lock); + forget_original_parent(tsk, &dead); if (group_dead) kill_orphaned_pgrp(tsk->group_leader, NULL); _ Patches currently in -mm which might be from avagin@xxxxxxxxx are ptrace-take-into-account-saved_sigmask-in-ptrace_getsetsigmask.patch include-replace-tsk-to-task-in-linux-sched-signalh.patch