populate_seccomp_data is expensive: it works by inspecting task_pt_regs and various other bits to piece together all the information, and it's does so in multiple partially redundant steps. Arch-specific code in the syscall entry path can do much better. Admittedly this adds a bit of additional room for error, but the speedup should be worth it. Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx> --- include/linux/seccomp.h | 2 +- kernel/seccomp.c | 32 +++++++++++++++++++------------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 8345fdc..4fc7a84 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -37,7 +37,7 @@ static inline int secure_computing(void) #define SECCOMP_PHASE1_OK 0 #define SECCOMP_PHASE1_SKIP 1 -extern u32 seccomp_phase1(void); +extern u32 seccomp_phase1(struct seccomp_data *sd); int seccomp_phase2(u32 phase1_result); #else extern void secure_computing_strict(int this_syscall); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d737445..391f6c4 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -171,24 +171,27 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) * * Returns valid seccomp BPF response codes. */ -static u32 seccomp_run_filters(void) +static u32 seccomp_run_filters(struct seccomp_data *sd) { struct seccomp_filter *f; - struct seccomp_data sd; + struct seccomp_data sd_local; u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ if (WARN_ON(current->seccomp.filter == NULL)) return SECCOMP_RET_KILL; - populate_seccomp_data(&sd); + if (!sd) { + populate_seccomp_data(&sd_local); + sd = &sd_local; + } /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ for (f = current->seccomp.filter; f; f = f->prev) { - u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd); + u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)sd); if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; @@ -415,7 +418,7 @@ void secure_computing_strict(int this_syscall) #else int __secure_computing(void) { - u32 phase1_result = seccomp_phase1(); + u32 phase1_result = seccomp_phase1(NULL); if (likely(phase1_result == SECCOMP_PHASE1_OK)) return 0; @@ -426,22 +429,22 @@ int __secure_computing(void) } #ifdef CONFIG_SECCOMP_FILTER -static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs) +static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd) { - u32 filter_ret = seccomp_run_filters(); + u32 filter_ret = seccomp_run_filters(sd); int data = filter_ret & SECCOMP_RET_DATA; u32 action = filter_ret & SECCOMP_RET_ACTION; switch (action) { case SECCOMP_RET_ERRNO: /* Set the low-order 16-bits as a errno. */ - syscall_set_return_value(current, regs, + syscall_set_return_value(current, task_pt_regs(current), -data, 0); goto skip; case SECCOMP_RET_TRAP: /* Show the handler the original registers. */ - syscall_rollback(current, regs); + syscall_rollback(current, task_pt_regs(current)); /* Let the filter pass back 16 bits of data. */ seccomp_send_sigsys(this_syscall, data); goto skip; @@ -468,11 +471,14 @@ skip: /** * seccomp_phase1() - run fast path seccomp checks on the current syscall + * @arg sd: The seccomp_data or NULL * * This only reads pt_regs via the syscall_xyz helpers. The only change * it will make to pt_regs is via syscall_set_return_value, and it will * only do that if it returns SECCOMP_PHASE1_SKIP. * + * If sd is provided, it will not read pt_regs at all. + * * It may also call do_exit or force a signal; these actions must be * safe. * @@ -486,11 +492,11 @@ skip: * If it returns anything else, then the return value should be passed * to seccomp_phase2 from a context in which ptrace hooks are safe. */ -u32 seccomp_phase1(void) +u32 seccomp_phase1(struct seccomp_data *sd) { int mode = current->seccomp.mode; - struct pt_regs *regs = task_pt_regs(current); - int this_syscall = syscall_get_nr(current, regs); + int this_syscall = sd ? sd->nr : + syscall_get_nr(current, task_pt_regs(current)); switch (mode) { case SECCOMP_MODE_STRICT: @@ -498,7 +504,7 @@ u32 seccomp_phase1(void) return SECCOMP_PHASE1_OK; #ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: - return __seccomp_phase1_filter(this_syscall, regs); + return __seccomp_phase1_filter(this_syscall, sd); #endif default: BUG(); -- 1.9.3