For systems that provide multiple syscall maps based on architectures (e.g. AUDIT_ARCH_X86_64 and AUDIT_ARCH_I386 via CONFIG_COMPAT), allow a fast way to pin the process to a specific syscall mapping, instead of needing to generate all filters with an architecture check as the first filter action. Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx> Cc: Will Drewry <wad@xxxxxxxxxxxx> Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx> --- include/linux/seccomp.h | 3 +++ include/uapi/linux/seccomp.h | 1 + kernel/seccomp.c | 37 ++++++++++++++++++++++++++++++++++-- 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index babcd6c02d09..6525ddec177a 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -30,6 +30,9 @@ struct seccomp_filter; */ struct seccomp { int mode; +#ifdef CONFIG_COMPAT + u32 arch; +#endif atomic_t filter_count; struct seccomp_filter *filter; }; diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index c1735455bc53..84e89bb201ae 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -16,6 +16,7 @@ #define SECCOMP_SET_MODE_FILTER 1 #define SECCOMP_GET_ACTION_AVAIL 2 #define SECCOMP_GET_NOTIF_SIZES 3 +#define SECCOMP_PIN_ARCHITECTURE 4 /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index a319700c04c4..43edf53c2d84 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -268,9 +268,16 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, struct seccomp_filter **match) { u32 ret = SECCOMP_RET_ALLOW; + struct seccomp_filter *f; + +#ifdef CONFIG_COMPAT + /* Block mismatched architectures. */ + if (current->seccomp.arch && current->seccomp.arch != sd->arch) + return SECCOMP_RET_KILL_PROCESS; +#endif + /* Make sure cross-thread synced filter points somewhere sane. */ - struct seccomp_filter *f = - READ_ONCE(current->seccomp.filter); + f = READ_ONCE(current->seccomp.filter); /* Ensure unexpected behavior doesn't result in failing open. */ if (WARN_ON(f == NULL)) @@ -478,6 +485,11 @@ static inline void seccomp_sync_threads(unsigned long flags) if (task_no_new_privs(caller)) task_set_no_new_privs(thread); +#ifdef CONFIG_COMPAT + /* Copy any pinned architecture. */ + thread->seccomp.arch = caller->seccomp.arch; +#endif + /* * Opt the other thread into seccomp if needed. * As threads are considered to be trust-realm @@ -1456,6 +1468,20 @@ static long seccomp_get_notif_sizes(void __user *usizes) return 0; } +static long seccomp_pin_architecture(void) +{ +#ifdef CONFIG_COMPAT + u32 arch = syscall_get_arch(current); + + /* How did you even get here? */ + if (current->seccomp.arch && current->seccomp.arch != arch) + return -EBUSY; + + current->seccomp.arch = arch; +#endif + return 0; +} + /* Common entry point for both prctl and syscall. */ static long do_seccomp(unsigned int op, unsigned int flags, void __user *uargs) @@ -1477,6 +1503,13 @@ static long do_seccomp(unsigned int op, unsigned int flags, return -EINVAL; return seccomp_get_notif_sizes(uargs); + case SECCOMP_PIN_ARCHITECTURE: + if (flags != 0) + return -EINVAL; + if (uargs != NULL) + return -EINVAL; + + return seccomp_pin_architecture(); default: return -EINVAL; } -- 2.25.1 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers