Aleksa Sarai <asarai@xxxxxxxx> writes: > It appears as though the addition of the PID namespace did not update > the output code for /proc/*/sched, which resulted in it providing PIDs > that were not self-consistent with the /proc mount. This additionally > made it trivial to detect whether a process was inside &init_pid_ns from > userspace (making container detection trivial[1]). This lead to > situations such as: > > % unshare -pmf > % mount -t proc proc /proc > % head -n1 /proc/1/sched > head (10047, #threads: 1) > > Fix this by just using task_pid_nr_ns for the output of /proc/*/sched. > All of the other uses of task_pid_nr in kernel/sched/debug.c are from a > sysctl context and thus don't need to be namespaced. > > [1]: https://github.com/jessfraz/amicontained > > Cc: <stable@xxxxxxxxxxxxxxx> > Cc: Jess Frazelle <acidburn@xxxxxxxxxx> > Signed-off-by: Aleksa Sarai <asarai@xxxxxxxx> Acked-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> > --- > fs/proc/base.c | 3 ++- > include/linux/sched/debug.h | 4 +++- > kernel/sched/debug.c | 5 +++-- > 3 files changed, 8 insertions(+), 4 deletions(-) > > diff --git a/fs/proc/base.c b/fs/proc/base.c > index 719c2e943ea1..98fd8f6df851 100644 > --- a/fs/proc/base.c > +++ b/fs/proc/base.c > @@ -1408,12 +1408,13 @@ static const struct file_operations proc_fail_nth_operations = { > static int sched_show(struct seq_file *m, void *v) > { > struct inode *inode = m->private; > + struct pid_namespace *ns = inode->i_sb->s_fs_info; > struct task_struct *p; > > p = get_proc_task(inode); > if (!p) > return -ESRCH; > - proc_sched_show_task(p, m); > + proc_sched_show_task(p, ns, m); > > put_task_struct(p); > > diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h > index e0eaee54c5a4..5d58d49e9f87 100644 > --- a/include/linux/sched/debug.h > +++ b/include/linux/sched/debug.h > @@ -6,6 +6,7 @@ > */ > > struct task_struct; > +struct pid_namespace; > > extern void dump_cpu_task(int cpu); > > @@ -34,7 +35,8 @@ extern void sched_show_task(struct task_struct *p); > > #ifdef CONFIG_SCHED_DEBUG > struct seq_file; > -extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); > +extern void proc_sched_show_task(struct task_struct *p, > + struct pid_namespace *ns, struct seq_file *m); > extern void proc_sched_set_task(struct task_struct *p); > #endif > > diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c > index 4fa66de52bd6..ac345115877b 100644 > --- a/kernel/sched/debug.c > +++ b/kernel/sched/debug.c > @@ -872,11 +872,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m) > #endif > } > > -void proc_sched_show_task(struct task_struct *p, struct seq_file *m) > +void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, > + struct seq_file *m) > { > unsigned long nr_switches; > > - SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr(p), > + SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns), > get_nr_threads(p)); > SEQ_printf(m, > "---------------------------------------------------------"