Sukadev Bhattiprolu <sukadev@xxxxxxxxxxxxxxxxxx> writes: > Eric W. Biederman [ebiederm@xxxxxxxxxxxx] wrote: > | > | I think replacing a struct pid for another struct pid allocated in > | descendant pid_namespace (but has all of the same struct upid values > | as the first struct pid) is a disastrous idea. It destroys the > > True. Sorry, I did not mean we would need a new 'struct pid' for an > existing process. I think we talked earlier of finding a way of attaching > additional pid numbers to the same struct pid. I just played with this and if you make the semantics of unshare(CLONE_NEWPID) to be that you become the idle task aka pid 0, and not the init task pid 1 the implementation is trivial. Eric ---- arch/powerpc/platforms/cell/spufs/sched.c | 2 +- arch/um/drivers/mconsole_kern.c | 2 +- fs/proc/root.c | 2 +- init/main.c | 9 --------- kernel/cgroup.c | 2 +- kernel/fork.c | 16 +++++++++++++--- kernel/nsproxy.c | 2 +- kernel/perf_event.c | 2 +- kernel/pid.c | 8 ++++---- kernel/signal.c | 9 ++++----- kernel/sysctl_binary.c | 2 +- 11 files changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 4678078..b7f2026 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1094,7 +1094,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) LOAD_INT(c), LOAD_FRAC(c), count_active_contexts(), atomic_read(&nr_spu_contexts), - current->nsproxy->pid_ns->last_pid); + task_active_pid_ns(current)->last_pid); return 0; } diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 3b3c366..4e6985e 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -125,7 +125,7 @@ void mconsole_log(struct mc_request *req) void mconsole_proc(struct mc_request *req) { struct nameidata nd; - struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; + struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt; struct file *file; int n, err; char *ptr = req->request.data, *buf; diff --git a/fs/proc/root.c b/fs/proc/root.c index b080b79..fbcd3f8 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -57,7 +57,7 @@ static int proc_get_sb(struct file_system_type *fs_type, if (flags & MS_KERNMOUNT) ns = (struct pid_namespace *)data; else - ns = current->nsproxy->pid_ns; + ns = task_active_pid_ns(current); sb = sget(fs_type, proc_test_super, proc_set_super, ns); if (IS_ERR(sb)) diff --git a/init/main.c b/init/main.c index 4cb47a1..67e40fc 100644 --- a/init/main.c +++ b/init/main.c @@ -851,15 +851,6 @@ static int __init kernel_init(void * unused) * init can run on any cpu. */ set_cpus_allowed_ptr(current, cpu_all_mask); - /* - * Tell the world that we're going to be the grim - * reaper of innocent orphaned children. - * - * We don't want people to have to make incorrect - * assumptions about where in the task array this - * can be found. - */ - init_pid_ns.child_reaper = current; cad_pid = task_pid(current); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index aa3bee5..737d2eb 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2453,7 +2453,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, { struct cgroup_pidlist *l; /* don't need task_nsproxy() if we're looking at ourself */ - struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns); + struct pid_namespace *ns = get_pid_ns(task_active_pid_ns(current)); /* * We can't drop the pidlist_mutex before taking the l->mutex in case * the last ref-holder is trying to remove l from the list at the same diff --git a/kernel/fork.c b/kernel/fork.c index f88bd98..832c035 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1172,7 +1172,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (!pid) goto bad_fork_cleanup_io; - if (clone_flags & CLONE_NEWPID) { + if (pid->numbers[pid->level].nr == 1) { retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); if (retval < 0) goto bad_fork_free_pid; @@ -1279,7 +1279,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { - if (clone_flags & CLONE_NEWPID) + if (pid->numbers[pid->level].nr == 1) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; @@ -1539,10 +1539,19 @@ static void check_unshare_flags(unsigned long *flags_ptr) *flags_ptr |= CLONE_THREAD; /* + * If unsharing the pid namespace and the task was created + * using CLONE_THREAD, then must unshare the thread. + */ + if ((*flags_ptr & CLONE_NEWPID) && + (atomic_read(¤t->signal->count) > 1)) + *flags_ptr |= CLONE_THREAD; + + /* * If unsharing namespace, must also unshare filesystem information. */ if (*flags_ptr & CLONE_NEWNS) *flags_ptr |= CLONE_FS; + } /* @@ -1647,7 +1656,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) + CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| + CLONE_NEWPID)) goto bad_unshare_out; /* diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index e3be4ef..1d023d5 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -173,7 +173,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWNET))) + CLONE_NEWNET | CLONE_NEWPID))) return 0; if (!capable(CAP_SYS_ADMIN)) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2ae7409..74865cd 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4436,7 +4436,7 @@ perf_event_alloc(struct perf_event_attr *attr, event->parent = parent_event; - event->ns = get_pid_ns(current->nsproxy->pid_ns); + event->ns = get_pid_ns(task_active_pid_ns(current)); event->id = atomic64_inc_return(&perf_event_id); event->state = PERF_EVENT_STATE_INACTIVE; diff --git a/kernel/pid.c b/kernel/pid.c index 2e17c9c..6b64a82 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -305,7 +305,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns); struct pid *find_vpid(int nr) { - return find_pid_ns(nr, current->nsproxy->pid_ns); + return find_pid_ns(nr, task_active_pid_ns(current)); } EXPORT_SYMBOL_GPL(find_vpid); @@ -385,7 +385,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) struct task_struct *find_task_by_vpid(pid_t vnr) { - return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns); + return find_task_by_pid_ns(vnr, task_active_pid_ns(current)); } struct pid *get_task_pid(struct task_struct *task, enum pid_type type) @@ -437,7 +437,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) pid_t pid_vnr(struct pid *pid) { - return pid_nr_ns(pid, current->nsproxy->pid_ns); + return pid_nr_ns(pid, task_active_pid_ns(current)); } EXPORT_SYMBOL_GPL(pid_vnr); @@ -448,7 +448,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, rcu_read_lock(); if (!ns) - ns = current->nsproxy->pid_ns; + ns = task_active_pid_ns(current); if (likely(pid_alive(task))) { if (type != PIDTYPE_PID) task = task->group_leader; diff --git a/kernel/signal.c b/kernel/signal.c index 934ae5e..885b699 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1438,16 +1438,15 @@ int do_notify_parent(struct task_struct *tsk, int sig) * we are under tasklist_lock here so our parent is tied to * us and cannot exit and release its namespace. * - * the only it can is to switch its nsproxy with sys_unshare, - * bu uncharing pid namespaces is not allowed, so we'll always - * see relevant namespace + * The only it can is to switch its nsproxy with sys_unshare, + * but we use the pid_namespace for task_pid which never changes. * * write_lock() currently calls preempt_disable() which is the * same as rcu_read_lock(), but according to Oleg, this is not * correct to rely on this */ rcu_read_lock(); - info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); + info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent)); info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); @@ -1518,7 +1517,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) * see comment in do_notify_parent() abot the following 3 lines */ rcu_read_lock(); - info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); + info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent)); info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 8f5d16e..1e4da59 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1356,7 +1356,7 @@ static ssize_t binary_sysctl(const int *name, int nlen, goto out_putname; } - mnt = current->nsproxy->pid_ns->proc_mnt; + mnt = task_active_pid_ns(current)->proc_mnt; result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd); if (result) goto out_putname; -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html