On Mon, Jun 01, 2015 at 12:38:57PM -0700, Andy Lutomirski wrote: > On Mon, Jun 1, 2015 at 12:28 PM, Tycho Andersen > <tycho.andersen@xxxxxxxxxxxxx> wrote: > > This patch is the first step in enabling checkpoint/restore of processes > > with seccomp enabled. > > > > One of the things CRIU does while dumping tasks is inject code into them > > via ptrace to collect information that is only available to the process > > itself. However, if we are in a seccomp mode where these processes are > > prohibited from making these syscalls, then what CRIU does kills the task. > > > > This patch adds a new ptrace command, PTRACE_SUSPEND_SECCOMP that enables a > > task from the init user namespace which has CAP_SYS_ADMIN to disable (and > > re-enable) seccomp filters for another task so that they can be > > successfully dumped (and restored). > > > > Signed-off-by: Tycho Andersen <tycho.andersen@xxxxxxxxxxxxx> > > CC: Kees Cook <keescook@xxxxxxxxxxxx> > > CC: Andy Lutomirski <luto@xxxxxxxxxxxxxx> > > CC: Will Drewry <wad@xxxxxxxxxxxx> > > CC: Roland McGrath <roland@xxxxxxxxxxxxx> > > CC: Oleg Nesterov <oleg@xxxxxxxxxx> > > CC: Pavel Emelyanov <xemul@xxxxxxxxxxxxx> > > CC: Serge E. Hallyn <serge.hallyn@xxxxxxxxxx> > > --- > > include/linux/seccomp.h | 8 ++++++ > > include/uapi/linux/ptrace.h | 1 + > > kernel/ptrace.c | 10 ++++++++ > > kernel/seccomp.c | 62 ++++++++++++++++++++++++++++++++++++++++++++- > > 4 files changed, 80 insertions(+), 1 deletion(-) > > > > diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h > > index a19ddac..7cc870f 100644 > > --- a/include/linux/seccomp.h > > +++ b/include/linux/seccomp.h > > @@ -25,6 +25,9 @@ struct seccomp_filter; > > struct seccomp { > > int mode; > > struct seccomp_filter *filter; > > +#ifdef CONFIG_CHECKPOINT_RESTORE > > + bool suspended; > > +#endif > > }; > > > > #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER > > @@ -53,6 +56,11 @@ static inline int seccomp_mode(struct seccomp *s) > > return s->mode; > > } > > > > +#ifdef CONFIG_CHECKPOINT_RESTORE > > +extern int suspend_seccomp(struct task_struct *); > > +extern int resume_seccomp(struct task_struct *); > > +#endif > > + > > #else /* CONFIG_SECCOMP */ > > > > #include <linux/errno.h> > > diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h > > index cf1019e..8ba4e4f 100644 > > --- a/include/uapi/linux/ptrace.h > > +++ b/include/uapi/linux/ptrace.h > > @@ -17,6 +17,7 @@ > > #define PTRACE_CONT 7 > > #define PTRACE_KILL 8 > > #define PTRACE_SINGLESTEP 9 > > +#define PTRACE_SUSPEND_SECCOMP 10 > > > > #define PTRACE_ATTACH 16 > > #define PTRACE_DETACH 17 > > diff --git a/kernel/ptrace.c b/kernel/ptrace.c > > index c8e0e05..a6b6527 100644 > > --- a/kernel/ptrace.c > > +++ b/kernel/ptrace.c > > @@ -15,6 +15,7 @@ > > #include <linux/highmem.h> > > #include <linux/pagemap.h> > > #include <linux/ptrace.h> > > +#include <linux/seccomp.h> > > #include <linux/security.h> > > #include <linux/signal.h> > > #include <linux/uio.h> > > @@ -1003,6 +1004,15 @@ int ptrace_request(struct task_struct *child, long request, > > break; > > } > > #endif > > + > > +#if defined(CONFIG_SECCOMP) && defined(CONFIG_CHECKPOINT_RESTORE) > > + case PTRACE_SUSPEND_SECCOMP: > > + if (data) > > + return suspend_seccomp(child); > > + else > > + return resume_seccomp(child); > > +#endif > > + > > default: > > break; > > } > > diff --git a/kernel/seccomp.c b/kernel/seccomp.c > > index 980fd26..a358a58 100644 > > --- a/kernel/seccomp.c > > +++ b/kernel/seccomp.c > > @@ -569,6 +569,7 @@ static int mode1_syscalls_32[] = { > > static void __secure_computing_strict(int this_syscall) > > { > > int *syscall_whitelist = mode1_syscalls; > > + > > #ifdef CONFIG_COMPAT > > if (is_compat_task()) > > syscall_whitelist = mode1_syscalls_32; > > @@ -590,6 +591,11 @@ void secure_computing_strict(int this_syscall) > > { > > int mode = current->seccomp.mode; > > > > +#ifdef CONFIG_CHECKPOINT_RESTORE > > + if (current->seccomp.suspended) > > + return; > > +#endif > > IMO it's unfortunate that this has any runtime overhead at all. Can > it be suspend be multiplexed into mode to avoid this? Yes, I can make that change. Something like: #define SECCOMP_SUSPENDED 0x8000000 and then masking it off at the appropriate places? Tycho -- To unsubscribe from this list: send the line "unsubscribe linux-api" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html