Thank you Sukadev for your comments. I will try to clean up my patch and repost it. --Ying On Thu, Mar 12, 2009 at 10:34 PM, Sukadev Bhattiprolu <sukadev@xxxxxxxxxxxxxxxxxx> wrote: > Ying Han [yinghan@xxxxxxxxxx] wrote: > | Hi Serge: > | I made a patch based on Oren's tree recently which implement a new > | syscall clone_with_pid. I tested with checkpoint/restart process tree > | and it works as expected. > > Yes, I think we had a version of clone() with pid a while ago. > > But it would be easier to review if you break it up into smaller > patches. and remove the unnecessary diffs in this patch like... > > > | This patch has some hack in it which i made a copy of libc's clone and > | made modifications of passing one more argument(pid number). I will > | try to clean up the code and do more testing. > | > | New syscall clone_with_pid > | Implement a new syscall which clone a thread with a preselected pid number. > | > | clone_with_pid(child_func, child_stack + CHILD_STACK - 16, > | CLONE_WITH_PID|SIGCHLD, pid, NULL); > | > | Signed-off-by: Ying Han <yinghan@xxxxxxxxxx> > | > | diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h > | index 87803da..b5a1b03 100644 > | --- a/arch/x86/include/asm/syscalls.h > | +++ b/arch/x86/include/asm/syscalls.h > | @@ -26,6 +26,7 @@ asmlinkage int sys_fork(struct pt_regs); > | asmlinkage int sys_clone(struct pt_regs); > | asmlinkage int sys_vfork(struct pt_regs); > | asmlinkage int sys_execve(struct pt_regs); > | +asmlinkage int sys_clone_with_pid(struct pt_regs); > | > | /* kernel/signal_32.c */ > | asmlinkage int sys_sigsuspend(int, int, old_sigset_t); > | diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32 > | index a5f9e09..f10ca0e 100644 > | --- a/arch/x86/include/asm/unistd_32.h > | +++ b/arch/x86/include/asm/unistd_32.h > | @@ -340,6 +340,7 @@ > | #define __NR_inotify_init1 332 > | #define __NR_checkpoint 333 > | #define __NR_restart 334 > | +#define __NR_clone_with_pid 335 > | > | #ifdef __KERNEL__ > | > | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c > | index 0a1302f..88ae634 100644 > | --- a/arch/x86/kernel/process_32.c > | +++ b/arch/x86/kernel/process_32.c > | @@ -8,7 +8,6 @@ > | /* > | * This file handles the architecture-dependent parts of process handling.. > | */ > | - > > these > > | #include <stdarg.h> > | > | #include <linux/cpu.h> > | @@ -652,6 +651,28 @@ asmlinkage int sys_clone(struct pt_regs regs) > | return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); > | } > | > | +/** > | + * sys_clone_with_pid - clone a thread with pre-select pid number. > | + */ > | +asmlinkage int sys_clone_with_pid(struct pt_regs regs) > | +{ > | + unsigned long clone_flags; > | + unsigned long newsp; > | + int __user *parent_tidptr, *child_tidptr; > | + pid_t pid_nr; > | + > | + clone_flags = regs.bx; > | + newsp = regs.cx; > | + parent_tidptr = (int __user *)regs.dx; > | + child_tidptr = (int __user *)regs.di; > | + pid_nr = regs.bp; > | + > | + if (!newsp) > | + newsp = regs.sp; > | + return do_fork(clone_flags, newsp, ®s, pid_nr, parent_tidptr, > | + child_tidptr); > | +} > | + > | /* > | * This is trivial, and on the face of it looks like it > | * could equally well be done in user mode. > | diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_tabl > | index 5543136..5191117 100644 > | --- a/arch/x86/kernel/syscall_table_32.S > | +++ b/arch/x86/kernel/syscall_table_32.S > | @@ -334,3 +334,4 @@ ENTRY(sys_call_table) > | .long sys_inotify_init1 > | .long sys_checkpoint > | .long sys_restart > | + .long sys_clone_with_pid > | diff --git a/arch/x86/mm/checkpoint.c b/arch/x86/mm/checkpoint.c > | index 50bde9a..a4aee65 100644 > | --- a/arch/x86/mm/checkpoint.c > | +++ b/arch/x86/mm/checkpoint.c > | @@ -7,7 +7,6 @@ > | * License. See the file COPYING in the main directory of the Linux > | * distribution for more details. > | */ > | - > | #include <asm/desc.h> > | #include <asm/i387.h> > | > | diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c > | index 64155de..b7de611 100644 > | --- a/checkpoint/checkpoint.c > | +++ b/checkpoint/checkpoint.c > | @@ -8,6 +8,7 @@ > | * distribution for more details. > | */ > | > | +#define DEBUG > | #include <linux/version.h> > | #include <linux/sched.h> > | #include <linux/ptrace.h> > | @@ -564,3 +565,4 @@ int do_checkpoint(struct cr_ctx *ctx, pid_t pid) > | out: > | return ret; > | } > | + > | diff --git a/checkpoint/ckpt_file.c b/checkpoint/ckpt_file.c > | index e3097ac..a8c5ad5 100644 > | --- a/checkpoint/ckpt_file.c > | +++ b/checkpoint/ckpt_file.c > | @@ -7,7 +7,7 @@ > | * License. See the file COPYING in the main directory of the Linux > | * distribution for more details. > | */ > | - > | +#define DEBUG > | #include <linux/kernel.h> > | #include <linux/sched.h> > | #include <linux/file.h> > | diff --git a/checkpoint/ckpt_mem.c b/checkpoint/ckpt_mem.c > | index 4925ff2..ca5840b 100644 > | --- a/checkpoint/ckpt_mem.c > | +++ b/checkpoint/ckpt_mem.c > | @@ -7,7 +7,7 @@ > | * License. See the file COPYING in the main directory of the Linux > | * distribution for more details. > | */ > | - > | +#define DEBUG > | #include <linux/kernel.h> > | #include <linux/sched.h> > | #include <linux/slab.h> > | diff --git a/checkpoint/restart.c b/checkpoint/restart.c > | index 7ec4de4..30e43c2 100644 > | --- a/checkpoint/restart.c > | +++ b/checkpoint/restart.c > | @@ -8,6 +8,7 @@ > | * distribution for more details. > | */ > | > | +#define DEBUG > | #include <linux/version.h> > | #include <linux/sched.h> > | #include <linux/wait.h> > | @@ -242,7 +243,7 @@ static int cr_read_task_struct(struct cr_ctx *ctx) > | memcpy(t->comm, buf, min(hh->task_comm_len, TASK_COMM_LEN)); > | } > | kfree(buf); > | - > | + pr_debug("read task %s\n", t->comm); > | /* FIXME: restore remaining relevant task_struct fields */ > | out: > | cr_hbuf_put(ctx, sizeof(*hh)); > | diff --git a/checkpoint/rstr_file.c b/checkpoint/rstr_file.c > | index f44b081..755e40e 100644 > | --- a/checkpoint/rstr_file.c > | +++ b/checkpoint/rstr_file.c > | @@ -7,7 +7,7 @@ > | * License. See the file COPYING in the main directory of the Linux > | * distribution for more details. > | */ > | - > | +#define DEBUG > | #include <linux/kernel.h> > | #include <linux/sched.h> > | #include <linux/fs.h> > | diff --git a/checkpoint/rstr_mem.c b/checkpoint/rstr_mem.c > | index 4d5ce1a..8330468 100644 > | --- a/checkpoint/rstr_mem.c > | +++ b/checkpoint/rstr_mem.c > | @@ -7,7 +7,7 @@ > | * License. See the file COPYING in the main directory of the Linux > | * distribution for more details. > | */ > | - > | +#define DEBUG > | #include <linux/kernel.h> > | #include <linux/sched.h> > | #include <linux/fcntl.h> > | diff --git a/checkpoint/sys.c b/checkpoint/sys.c > | index f26b0c6..d1a5394 100644 > | --- a/checkpoint/sys.c > | +++ b/checkpoint/sys.c > | @@ -7,7 +7,7 @@ > | * License. See the file COPYING in the main directory of the Linux > | * distribution for more details. > | */ > | - > | +#define DEBUG > | #include <linux/sched.h> > | #include <linux/nsproxy.h> > | #include <linux/kernel.h> > | @@ -263,7 +263,6 @@ asmlinkage long sys_checkpoint(pid_t pid, int fd, unsigned > | return PTR_ERR(ctx); > | > | ret = do_checkpoint(ctx, pid); > | - > | if (!ret) > | ret = ctx->crid; > | > | @@ -304,3 +303,4 @@ asmlinkage long sys_restart(int crid, int fd, unsigned lon > | cr_ctx_put(ctx); > | return ret; > | } > | + > | diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h > | index 217cf6e..bc2c202 100644 > | --- a/include/linux/checkpoint.h > | +++ b/include/linux/checkpoint.h > | @@ -114,7 +114,6 @@ extern int cr_write_files(struct cr_ctx *ctx, struct task_ > | extern int do_restart(struct cr_ctx *ctx, pid_t pid); > | extern int cr_read_mm(struct cr_ctx *ctx); > | extern int cr_read_files(struct cr_ctx *ctx); > | - > | #ifdef pr_fmt > | #undef pr_fmt > | #endif > | diff --git a/include/linux/pid.h b/include/linux/pid.h > | index d7e98ff..86e2f61 100644 > | --- a/include/linux/pid.h > | +++ b/include/linux/pid.h > | @@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr); > | extern struct pid *find_ge_pid(int nr, struct pid_namespace *); > | int next_pidmap(struct pid_namespace *pid_ns, int last); > | > | -extern struct pid *alloc_pid(struct pid_namespace *ns); > | +extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t pid_nr); > | extern void free_pid(struct pid *pid); > | > | /* > | diff --git a/include/linux/sched.h b/include/linux/sched.h > | index 0150e90..7fb4e28 100644 > | --- a/include/linux/sched.h > | +++ b/include/linux/sched.h > | @@ -28,6 +28,7 @@ > | #define CLONE_NEWPID 0x20000000 /* New pid namespace */ > | #define CLONE_NEWNET 0x40000000 /* New network namespace */ > | #define CLONE_IO 0x80000000 /* Clone io context */ > | +#define CLONE_WITH_PID 0x00001000 /* Clone with pre-select PID */ > | > | /* > | * Scheduling policies > | diff --git a/kernel/exit.c b/kernel/exit.c > | index 2d8be7e..4baf651 100644 > | --- a/kernel/exit.c > | +++ b/kernel/exit.c > | @@ -3,7 +3,7 @@ > | * > | * Copyright (C) 1991, 1992 Linus Torvalds > | */ > | - > | +#define DEBUG > | #include <linux/mm.h> > | #include <linux/slab.h> > | #include <linux/interrupt.h> > | @@ -1676,6 +1676,7 @@ static long do_wait(enum pid_type type, struct pid *pid, > | DECLARE_WAITQUEUE(wait, current); > | struct task_struct *tsk; > | int retval; > | + int level; > > and this (level is not used). > | > | trace_sched_process_wait(pid); > | > | @@ -1708,7 +1709,6 @@ repeat: > | retval = tsk_result; > | goto end; > | } > | - > | if (options & __WNOTHREAD) > | break; > | tsk = next_thread(tsk); > | @@ -1817,7 +1817,6 @@ asmlinkage long sys_wait4(pid_t upid, int __user *stat_a > | type = PIDTYPE_PID; > | pid = find_get_pid(upid); > | } > | - > | ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru); > | put_pid(pid); > | > | diff --git a/kernel/fork.c b/kernel/fork.c > | index 085ce56..262ae1e 100644 > | --- a/kernel/fork.c > | +++ b/kernel/fork.c > | @@ -10,7 +10,7 @@ > | * Fork is rather simple, once you get the hang of it, but the memory > | * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' > | */ > | - > | +#define DEBUG > | #include <linux/slab.h> > | #include <linux/init.h> > | #include <linux/unistd.h> > | @@ -959,10 +959,19 @@ static struct task_struct *copy_process(unsigned long cl > | int retval; > | struct task_struct *p; > | int cgroup_callbacks_done = 0; > | + pid_t clone_pid = stack_size; > | > | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) > | return ERR_PTR(-EINVAL); > | > | + /* We only allow the clone_with_pid when a new pid namespace is > | + * created. FIXME: how to restrict it. > > Not sure why CLONE_NEWPID is required to set pid_nr. In fact with CLONE_NEWPID, > by definition, pid_nr should be 1. Also, what happens if a container has > more than one process - where the second process has a pid_nr > 2 ? > _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers