Re: [PATCH 5/7] clone: Introduce the CLONE_CHILD_USEPID functionality

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Quoting Pavel Emelyanov (xemul@xxxxxxxxxxxxx):
> The respective flag for clone() makes the latter to take the desired
> pid of a new process from the child_tidptr. The given pid is used as
> the pid for the pid namespace the parent is currently running in.
> 
> Needed badly for restoring a process.
> 
> Signed-off-by: Pavel Emelyanov <xemul@xxxxxxxxxxxxx>

How do you intend to eventually support multiple pid namespaces?
I don't mind not supporting them now, but if the interface does
cannot be extended to support it, I think that's a simple NACK.

IIRC Eric Biederman (explicity cc:d) in the past has advocated using
/proc/sys/pid_max games to specify a pid.  That is actually
usable cross-namespace, though only serially.  Do you think that
will be too cumbersome?

There is also the clone_with_pid() syscall from Suka.  It
accompanied the in-kernel checkpoint/restart patchset, but should
be perfectly usable without it.  Is there a good reason not to
pursue that?

thanks,
-serge

> ---
>  include/linux/pid.h   |    2 +-
>  include/linux/sched.h |    1 +
>  kernel/fork.c         |   10 ++++++-
>  kernel/pid.c          |   70 +++++++++++++++++++++++++++++++++++-------------
>  4 files changed, 62 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index cdced84..de772ab 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
>  extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
>  int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
>  
> -extern struct pid *alloc_pid(struct pid_namespace *ns);
> +extern struct pid *alloc_pid(struct pid_namespace *ns, int pid);
>  extern void free_pid(struct pid *pid);
>  
>  /*
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 781abd1..5b6c1e2 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -23,6 +23,7 @@
>  #define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
>  /* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
>     and is now available for re-use. */
> +#define CLONE_CHILD_USEPID	0x02000000	/* use the given pid */
>  #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
>  #define CLONE_NEWIPC		0x08000000	/* New ipcs */
>  #define CLONE_NEWUSER		0x10000000	/* New user namespace */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index e7548de..f30fbdb 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1183,8 +1183,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  		goto bad_fork_cleanup_io;
>  
>  	if (pid != &init_struct_pid) {
> +		int want_pid = 0;
> +
> +		if (clone_flags & CLONE_CHILD_USEPID) {
> +			retval = get_user(want_pid, child_tidptr);
> +			if (retval)
> +				goto bad_fork_cleanup_io;
> +		}
> +
>  		retval = -ENOMEM;
> -		pid = alloc_pid(p->nsproxy->pid_ns);
> +		pid = alloc_pid(p->nsproxy->pid_ns, want_pid);
>  		if (!pid)
>  			goto bad_fork_cleanup_io;
>  	}
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 57a8346..69ae1be 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -159,11 +159,55 @@ static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid)
>  	} while ((prev != last_write) && (pid_before(base, last_write, pid)));
>  }
>  
> -static int alloc_pidmap(struct pid_namespace *pid_ns)
> +static int alloc_pidmap_page(struct pidmap *map)
> +{
> +	if (unlikely(!map->page)) {
> +		void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> +		/*
> +		 * Free the page if someone raced with us
> +		 * installing it:
> +		 */
> +		spin_lock_irq(&pidmap_lock);
> +		if (!map->page) {
> +			map->page = page;
> +			page = NULL;
> +		}
> +		spin_unlock_irq(&pidmap_lock);
> +		kfree(page);
> +		if (unlikely(!map->page))
> +			return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
> +static int set_pidmap(struct pid_namespace *pid_ns, int pid)
> +{
> +	int offset;
> +	struct pidmap *map;
> +
> +	offset = pid & BITS_PER_PAGE_MASK;
> +	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
> +
> +	if (alloc_pidmap_page(map) < 0)
> +		return -ENOMEM;
> +
> +	if (!test_and_set_bit(offset, map->page)) {
> +		atomic_dec(&map->nr_free);
> +		return pid;
> +	}
> +
> +	return -EBUSY;
> +}
> +
> +static int alloc_pidmap(struct pid_namespace *pid_ns, int desired_pid)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
>  	struct pidmap *map;
>  
> +	if (desired_pid)
> +		return set_pidmap(pid_ns, desired_pid);
> +
>  	pid = last + 1;
>  	if (pid >= pid_max)
>  		pid = RESERVED_PIDS;
> @@ -176,22 +220,9 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  	 */
>  	max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
>  	for (i = 0; i <= max_scan; ++i) {
> -		if (unlikely(!map->page)) {
> -			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
> -			/*
> -			 * Free the page if someone raced with us
> -			 * installing it:
> -			 */
> -			spin_lock_irq(&pidmap_lock);
> -			if (!map->page) {
> -				map->page = page;
> -				page = NULL;
> -			}
> -			spin_unlock_irq(&pidmap_lock);
> -			kfree(page);
> -			if (unlikely(!map->page))
> -				break;
> -		}
> +		if (alloc_pidmap_page(map) < 0)
> +			break;
> +
>  		if (likely(atomic_read(&map->nr_free))) {
>  			do {
>  				if (!test_and_set_bit(offset, map->page)) {
> @@ -277,7 +308,7 @@ void free_pid(struct pid *pid)
>  	call_rcu(&pid->rcu, delayed_put_pid);
>  }
>  
> -struct pid *alloc_pid(struct pid_namespace *ns)
> +struct pid *alloc_pid(struct pid_namespace *ns, int this_ns_pid)
>  {
>  	struct pid *pid;
>  	enum pid_type type;
> @@ -291,13 +322,14 @@ struct pid *alloc_pid(struct pid_namespace *ns)
>  
>  	tmp = ns;
>  	for (i = ns->level; i >= 0; i--) {
> -		nr = alloc_pidmap(tmp);
> +		nr = alloc_pidmap(tmp, this_ns_pid);
>  		if (nr < 0)
>  			goto out_free;
>  
>  		pid->numbers[i].nr = nr;
>  		pid->numbers[i].ns = tmp;
>  		tmp = tmp->parent;
> +		this_ns_pid = 0;
>  	}
>  
>  	get_pid_ns(ns);
> -- 
> 1.5.5.6
> _______________________________________________
> Containers mailing list
> Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
> https://lists.linux-foundation.org/mailman/listinfo/containers
_______________________________________________
Containers mailing list
Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linux-foundation.org/mailman/listinfo/containers


[Index of Archives]     [Cgroups]     [Netdev]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux