On Thu, Jan 12, 2012 at 6:51 PM, Randy Dunlap <rdunlap@xxxxxxxxxxxx> wrote: > On 01/12/2012 03:38 PM, Will Drewry wrote: >> include/linux/prctl.h | 3 + >> include/linux/seccomp.h | 68 +++++- >> kernel/Makefile | 1 + >> kernel/fork.c | 4 + >> kernel/seccomp.c | 8 + >> kernel/seccomp_filter.c | 620 +++++++++++++++++++++++++++++++++++++++++++++++ >> kernel/sys.c | 4 + >> security/Kconfig | 12 + >> 8 files changed, 717 insertions(+), 3 deletions(-) >> create mode 100644 kernel/seccomp_filter.c >> >> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h >> index cc7a4e9..0296871 100644 >> --- a/include/linux/seccomp.h >> +++ b/include/linux/seccomp.h >> @@ -5,9 +5,28 @@ >> #ifdef CONFIG_SECCOMP >> >> #include <linux/thread_info.h> >> +#include <linux/types.h> >> #include <asm/seccomp.h> >> >> -typedef struct { int mode; } seccomp_t; >> +struct seccomp_filter; >> +/** >> + * struct seccomp_struct - the state of a seccomp'ed process >> + * >> + * @mode: >> + * if this is 0, seccomp is not in use. >> + * is 1, the process is under standard seccomp rules. >> + * is 2, the process is only allowed to make system calls where >> + * associated filters evaluate successfully. >> + * @filter: Metadata for filter if using CONFIG_SECCOMP_FILTER. >> + * @filter must only be accessed from the context of current as there >> + * is no guard. >> + */ >> +typedef struct seccomp_struct { >> + int mode; >> +#ifdef CONFIG_SECCOMP_FILTER >> + struct seccomp_filter *filter; >> +#endif >> +} seccomp_t; >> >> extern void __secure_computing(int); >> static inline void secure_computing(int this_syscall) >> @@ -28,8 +47,7 @@ static inline int seccomp_mode(seccomp_t *s) >> >> #include <linux/errno.h> >> >> -typedef struct { } seccomp_t; >> - >> +typedef struct seccomp_struct { } seccomp_t; >> #define secure_computing(x) do { } while (0) >> >> static inline long prctl_get_seccomp(void) >> @@ -49,4 +67,48 @@ static inline int seccomp_mode(seccomp_t *s) >> >> #endif /* CONFIG_SECCOMP */ >> >> +#ifdef CONFIG_SECCOMP_FILTER >> + >> + >> +extern long prctl_attach_seccomp_filter(char __user *); >> + >> +extern struct seccomp_filter *get_seccomp_filter(struct seccomp_filter *); >> +extern void put_seccomp_filter(struct seccomp_filter *); >> + >> +extern int seccomp_test_filters(int); >> +extern void seccomp_filter_log_failure(int); >> +extern void seccomp_struct_fork(struct seccomp_struct *child, >> + const struct seccomp_struct *parent); >> + >> +static inline void seccomp_struct_init_task(struct seccomp_struct *seccomp) >> +{ >> + seccomp->mode = 0; >> + seccomp->filter = NULL; >> +} >> + >> +/* No locking is needed here because the task_struct will >> + * have no parallel consumers. >> + */ > > (in multiple places:) Kernel multi-line comment style is: > > /* > * first line of text > * more stuff > */ Thanks! I'll roll through and clean them all up. My apologies! >> +static inline void seccomp_struct_free_task(struct seccomp_struct *seccomp) >> +{ >> + put_seccomp_filter(seccomp->filter); >> + seccomp->filter = NULL; >> +} >> + >> +#else /* CONFIG_SECCOMP_FILTER */ >> + >> +#include <linux/errno.h> >> + >> +struct seccomp_filter { }; >> +/* Macros consume the unused dereference by the caller. */ >> +#define seccomp_struct_init_task(_seccomp) do { } while (0); >> +#define seccomp_struct_fork(_tsk, _orig) do { } while (0); >> +#define seccomp_struct_free_task(_seccomp) do { } while (0); >> + >> +static inline long prctl_attach_seccomp_filter(char __user *a2) >> +{ >> + return -ENOSYS; >> +} >> + >> +#endif /* CONFIG_SECCOMP_FILTER */ >> #endif /* _LINUX_SECCOMP_H */ > > >> diff --git a/kernel/seccomp.c b/kernel/seccomp.c >> index 57d4b13..78719be 100644 >> --- a/kernel/seccomp.c >> +++ b/kernel/seccomp.c >> @@ -47,6 +47,14 @@ void __secure_computing(int this_syscall) >> return; >> } while (*++syscall); >> break; >> +#ifdef CONFIG_SECCOMP_FILTER >> + case 2: > > Can we get macros (defines) for these @modes instead of using > inline constants? Certainly! >> + if (seccomp_test_filters(this_syscall) == 0) >> + return; >> + >> + seccomp_filter_log_failure(this_syscall); >> + break; >> +#endif >> default: >> BUG(); >> } >> diff --git a/kernel/seccomp_filter.c b/kernel/seccomp_filter.c >> new file mode 100644 >> index 0000000..108a3f3 >> --- /dev/null >> +++ b/kernel/seccomp_filter.c >> @@ -0,0 +1,620 @@ >> +/* bpf program-based system call filtering >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >> + * >> + * Copyright (C) 2011 The Chromium OS Authors <chromium-os-dev@xxxxxxxxxxxx> >> + */ >> + Aside from being a year off, is there a current style? I just went off of an existing file. > >> +/* seccomp_struct_fork: manages inheritance on fork > > /** > * seccomp_struct_fork - manages inheritance on fork Thanks - sorry! >> + * @child: forkee's seccomp_struct >> + * @parent: forker's seccomp_struct >> + * Ensures that @child inherit a seccomp_filter iff seccomp is enabled >> + * and the set of filters is marked as 'enabled'. >> + */ >> +void seccomp_struct_fork(struct seccomp_struct *child, >> + const struct seccomp_struct *parent) >> +{ >> + if (!parent->mode) >> + return; >> + child->mode = parent->mode; >> + child->filter = get_seccomp_filter(parent->filter); >> +} >> + >> +/* Returns a pointer to the BPF evaluator after checking the offset and size >> + * boundaries. The signature almost matches the signature from >> + * net/core/filter.c with the hopes of sharing code in the future. > > Use kernel multi-line comment style. Of course. >> + */ >> +static const void *load_pointer(const u8 *buf, size_t buflen, >> + int offset, size_t size, >> + void *unused) >> +{ >> + if (offset >= buflen) >> + goto fail; >> + if (offset < 0) >> + goto fail; >> + if (size > buflen - offset) >> + goto fail; >> + return buf + offset; >> +fail: >> + return NULL; >> +} >> + > >> diff --git a/security/Kconfig b/security/Kconfig >> index 51bd5a0..77b1106 100644 >> --- a/security/Kconfig >> +++ b/security/Kconfig >> @@ -84,6 +84,18 @@ config SECURITY_DMESG_RESTRICT >> >> If you are unsure how to answer this question, answer N. >> >> +config SECCOMP_FILTER >> + bool "Enable seccomp-based system call filtering" >> + select SECCOMP >> + depends on EXPERIMENTAL >> + help >> + This kernel feature expands CONFIG_SECCOMP to allow computing >> + in environments with reduced kernel access dictated by a system >> + call filter, expressed in BPF, installed by the application itself >> + through prctl(2). > > This help text is only useful to someone who already knows what it does/means > IMO. I'll attempt to clean that up so it makes actual sense to those without context! >> + >> + See Documentation/prctl/seccomp_filter.txt for more detail. > > Yes, I'll look at that.. Awesome - thanks! >> + >> config SECURITY >> bool "Enable different security models" >> depends on SYSFS > > > -- > ~Randy > *** Remember to use Documentation/SubmitChecklist when testing your code *** -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html