From: Sargun Dhillon <sargun@xxxxxxxxx> This extends the ptrace API to allow fetching eBPF seccomp filters attached to programs. This is to enable checkpoint / restore cases. The user will have to use the traditional PTRACE_SECCOMP_GET_FILTER API call, and if they get an invalid medium type error they can switch over to the eBPF variant of the API -- PTRACE_SECCOMP_GET_FILTER_EXTENDED. Signed-off-by: Sargun Dhillon <sargun@xxxxxxxxx> Link: https://lists.linux-foundation.org/pipermail/containers/2018-February/038478.html [YiFei: increase ptrace number to 0x4210] Signed-off-by: YiFei Zhu <yifeifz2@xxxxxxxxxxxx> --- include/linux/seccomp.h | 12 ++++++++++++ include/uapi/linux/ptrace.h | 2 ++ kernel/ptrace.c | 4 ++++ kernel/seccomp.c | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index c0750dc05de5..7ce9e3b3fa80 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -122,6 +122,18 @@ static inline long seccomp_get_metadata(struct task_struct *task, return -EINVAL; } #endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */ +#if defined(CONFIG_SECCOMP_FILTER_EXTENDED) && defined(CONFIG_CHECKPOINT_RESTORE) +extern long seccomp_get_filter_extended(struct task_struct *task, + unsigned long n, + void __user *data); +#else +static inline long seccomp_get_filter_extended(struct task_struct *task, + unsigned long n, + void __user *data) +{ + return -EINVAL; +} +#endif /* CONFIG_SECCOMP_FILTER_EXTENDED && CONFIG_CHECKPOINT_RESTORE */ #ifdef CONFIG_SECCOMP_CACHE_DEBUG struct seq_file; diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 3747bf816f9a..725a03614c28 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -112,6 +112,8 @@ struct ptrace_rseq_configuration { __u32 pad; }; +#define PTRACE_SECCOMP_GET_FILTER_EXTENDED 0x4210 + /* * These values are stored in task->ptrace_message * by tracehook_report_syscall_* to describe the current syscall-stop. diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 76f09456ec4b..1e8d2155231f 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -1247,6 +1247,10 @@ int ptrace_request(struct task_struct *child, long request, break; #endif + case PTRACE_SECCOMP_GET_FILTER_EXTENDED: + ret = seccomp_get_filter_extended(child, addr, datavp); + break; + default: break; } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 1ef26a5bf93f..8550ae885245 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -2168,6 +2168,43 @@ long seccomp_get_metadata(struct task_struct *task, } #endif +#if defined(CONFIG_SECCOMP_FILTER_EXTENDED) && defined(CONFIG_CHECKPOINT_RESTORE) +long seccomp_get_filter_extended(struct task_struct *task, + unsigned long filter_off, + void __user *data) +{ + struct seccomp_filter *filter; + struct bpf_prog *prog; + long ret; + + if (!capable(CAP_SYS_ADMIN) || + current->seccomp.mode != SECCOMP_MODE_DISABLED) { + return -EACCES; + } + + filter = get_nth_filter(task, filter_off); + if (IS_ERR(filter)) + return PTR_ERR(filter); + + if (bpf_prog_was_classic(filter->prog)) { + ret = -EMEDIUMTYPE; + goto out; + } + prog = bpf_prog_inc_not_zero(filter->prog); + if (IS_ERR(prog)) { + ret = PTR_ERR(prog); + goto out; + } + + ret = bpf_prog_new_fd(filter->prog); + if (ret < 0) + bpf_prog_put(prog); +out: + __put_seccomp_filter(filter); + return ret; +} +#endif + #ifdef CONFIG_SYSCTL /* Human readable action names for friendly sysctl interaction */ -- 2.31.1