This adds the new "seccomp" syscall with both an "operation" and "flags" parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). In addition to the TSYNC flag in the following patch, there is a non-zero chance that this syscall could be used for configuring a fixed argument area for seccomp-tracer-aware processes to pass syscall arguments in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter" for this syscall. Additionally, this syscall uses operation, flags, and user pointer for arguments because strictly passing arguments via a user pointer would mean seccomp itself would be unable to trivially filter the seccomp syscall itself. Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx> Cc: linux-api@xxxxxxxxxxxxxxx --- arch/Kconfig | 1 + arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 2 ++ include/uapi/asm-generic/unistd.h | 4 ++- include/uapi/linux/seccomp.h | 4 +++ kernel/seccomp.c | 63 ++++++++++++++++++++++++++++++++----- kernel/sys_ni.c | 3 ++ 8 files changed, 70 insertions(+), 9 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 97ff872c7acc..0eae9df35b88 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER - secure_computing is called from a ptrace_event()-safe context - secure_computing return value is checked and a return value of -1 results in the system call being skipped immediately. + - seccomp syscall wired up config SECCOMP_FILTER def_bool y diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351 i386 sched_setattr sys_sched_setattr 352 i386 sched_getattr sys_sched_getattr 353 i386 renameat2 sys_renameat2 +354 i386 seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314 common sched_setattr sys_sched_setattr 315 common sched_getattr sys_sched_getattr 316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 333640608087..65acbf0e2867 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 276 __SYSCALL(__NR_renameat2, sys_renameat2) +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 277 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f72973..b258878ba754 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,10 @@ #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index c81000dd0a53..051c3da4fb4c 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -19,6 +19,7 @@ #include <linux/sched.h> #include <linux/seccomp.h> #include <linux/slab.h> +#include <linux/syscalls.h> /* #define SECCOMP_DEBUG 1 */ @@ -308,8 +309,8 @@ free_prog: * * Returns filter on success and ERR_PTR otherwise. */ -static -struct seccomp_filter *seccomp_prepare_user_filter(char __user *user_filter) +static struct seccomp_filter * +seccomp_prepare_user_filter(const char __user *user_filter) { struct sock_fprog fprog; struct seccomp_filter *filter = ERR_PTR(-EFAULT); @@ -332,19 +333,25 @@ out: /** * seccomp_attach_filter: validate and attach filter + * @flags: flags to change filter behavior * @filter: seccomp filter to add to the current process * * Caller must be holding current->sighand->siglock lock. * * Returns 0 on success, -ve on error. */ -static long seccomp_attach_filter(struct seccomp_filter *filter) +static long seccomp_attach_filter(unsigned int flags, + struct seccomp_filter *filter) { unsigned long total_insns; struct seccomp_filter *walker; BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + /* Validate flags. */ + if (flags != 0) + return -EINVAL; + /* Validate resulting filter length. */ total_insns = filter->prog->len; for (walker = current->seccomp.filter; walker; walker = walker->prev) @@ -555,6 +562,7 @@ out: #ifdef CONFIG_SECCOMP_FILTER /** * seccomp_set_mode_filter: internal function for setting seccomp filter + * @flags: flags to change filter behavior * @filter: struct sock_fprog containing filter * * This function may be called repeatedly to install additional filters. @@ -565,7 +573,8 @@ out: * * Returns 0 on success or -EINVAL on failure. */ -static long seccomp_set_mode_filter(char __user *filter) +static long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; struct seccomp_filter *prepared = NULL; @@ -582,7 +591,7 @@ static long seccomp_set_mode_filter(char __user *filter) if (!seccomp_check_mode(current, seccomp_mode)) goto out; - ret = seccomp_attach_filter(prepared); + ret = seccomp_attach_filter(flags, prepared); if (ret) goto out; /* Do not free the successfully attached filter. */ @@ -595,12 +604,35 @@ out: return ret; } #else -static inline long seccomp_set_mode_filter(char __user *filter) +static inline long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { return -EINVAL; } #endif +/* Common entry point for both prctl and syscall. */ +static long do_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs) +{ + switch (op) { + case SECCOMP_SET_MODE_STRICT: + if (flags != 0 || uargs != NULL) + return -EINVAL; + return seccomp_set_mode_strict(); + case SECCOMP_SET_MODE_FILTER: + return seccomp_set_mode_filter(flags, uargs); + default: + return -EINVAL; + } +} + +SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, + const char __user *, uargs) +{ + return do_seccomp(op, flags, uargs); +} + /** * prctl_set_seccomp: configures current->seccomp.mode * @seccomp_mode: requested mode to use @@ -610,12 +642,27 @@ static inline long seccomp_set_mode_filter(char __user *filter) */ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) { + unsigned int op; + char __user *uargs; + switch (seccomp_mode) { case SECCOMP_MODE_STRICT: - return seccomp_set_mode_strict(); + op = SECCOMP_SET_MODE_STRICT; + /* + * Setting strict mode through prctl always ignored filter, + * so make sure it is always NULL here to pass the internal + * check in do_seccomp(). + */ + uargs = NULL; + break; case SECCOMP_MODE_FILTER: - return seccomp_set_mode_filter(filter); + op = SECCOMP_SET_MODE_FILTER; + uargs = filter; + break; default: return -EINVAL; } + + /* prctl interface doesn't have flags, so they are always zero. */ + return do_seccomp(op, 0, uargs); } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 36441b51b5df..2904a2105914 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at); /* compare kernel pointers */ cond_syscall(sys_kcmp); + +/* operate on Secure Computing state */ +cond_syscall(sys_seccomp); -- 1.7.9.5