On 01/25, Linus Torvalds wrote: > > Keith pinpointed the user space logic to fork_remap(): > > https://github.com/google/minijail/blob/main/rust/minijail/src/lib.rs#L987 > > and honestly, I do think it makes sense for user space to ask "am I > single-threaded" (which is presumably the thing that breaks), and the > code for that is pretty simple: > > fn is_single_threaded() -> io::Result<bool> { > match count_dir_entries("/proc/self/task") { > Ok(1) => Ok(true), > Ok(_) => Ok(false), > Err(e) => Err(e), > } > } > > and I really don't think user space is "wrong". > > So the fact that a kernel helper thread that runs async in the > background and does random background infrastructure things that do > not really affect user space should probably simply not break this > kind of simple (and admittedly simplistic) user space logic. > > Should we just add some flag to say "don't show this thread in this > context"? Not sure I understand... Looking at is_single_threaded() above I guess something like below should work (incomplete, in particular we need to chang first_tid() as well). But a PF_HIDDEN sub-thread will still be visible via /proc/$pid_of_PF_HIDDEN > We obviously still want to see it for management purposes, > so it's not like the thing should be entirely invisible, Can you explain? Oleg. --- x/include/linux/sched.h +++ x/include/linux/sched.h @@ -1685,7 +1685,7 @@ extern struct pid *cad_pid; #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ -#define PF__HOLE__00010000 0x00010000 +#define PF_HIDDEN 0x00010000 #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ --- x/include/linux/sched/task.h +++ x/include/linux/sched/task.h @@ -31,6 +31,7 @@ struct kernel_clone_args { u32 io_thread:1; u32 user_worker:1; u32 no_files:1; + u32 hidden:1; unsigned long stack; unsigned long stack_size; unsigned long tls; --- x/kernel/fork.c +++ x/kernel/fork.c @@ -2237,6 +2237,8 @@ __latent_entropy struct task_struct *cop } if (args->io_thread) p->flags |= PF_IO_WORKER; + if (args->hidden) + p->flags |= PF_HIDDEN; if (args->name) strscpy_pad(p->comm, args->name, sizeof(p->comm)); --- x/kernel/vhost_task.c +++ x/kernel/vhost_task.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL_GPL(vhost_task_stop); */ struct vhost_task *vhost_task_create(bool (*fn)(void *), void (*handle_sigkill)(void *), void *arg, - const char *name) + bool hidden, const char *name) { struct kernel_clone_args args = { .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM | @@ -125,6 +125,7 @@ struct vhost_task *vhost_task_create(boo .exit_signal = 0, .fn = vhost_task_fn, .name = name, + .hidden = hidden, .user_worker = 1, .no_files = 1, }; --- x/fs/proc/base.c +++ x/fs/proc/base.c @@ -3906,9 +3906,12 @@ static struct task_struct *next_tid(stru struct task_struct *pos = NULL; rcu_read_lock(); if (pid_alive(start)) { - pos = __next_thread(start); - if (pos) - get_task_struct(pos); + for (pos = start; (pos = __next_thread(pos)); ) { + if (!(pos->flags & PF_HIDDEN)) { + get_task_struct(pos); + break; + } + } } rcu_read_unlock(); put_task_struct(start);