The kill() syscall operates on process identifiers. After a process has exited its pid can be reused by another process. If a caller sends a signal to a reused pid it will end up signaling the wrong process. This issue has often surfaced and there has been a push [1] to address this problem. A prior patch has introduced the ability to get a file descriptor referencing struct pid by opening /proc/<pid>. This guarantees a stable handle on a process which can be used to send signals to the referenced process. Discussion has shown that a dedicated syscall is preferable over ioctl()s. Thus, the new syscall procfd_signal() is introduced to solve this problem. It operates on a process file descriptor. The syscall takes an additional siginfo_t and flags argument. If siginfo_t is NULL then procfd_signal() behaves like kill() if it is not NULL it behaves like rt_sigqueueinfo. The flags argument is added to allow for future extensions of this syscall. It currently needs to be passed as 0. With this patch a process can be killed via: #define _GNU_SOURCE #include <errno.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <signal.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/types.h> #include <unistd.h> int main(int argc, char *argv[]) { int ret; char buf[1000]; if (argc < 2) exit(EXIT_FAILURE); ret = snprintf(buf, sizeof(buf), "/proc/%s", argv[1]); if (ret < 0) exit(EXIT_FAILURE); int fd = open(buf, O_DIRECTORY | O_CLOEXEC); if (fd < 0) { printf("%s - Failed to open \"%s\"\n", strerror(errno), buf); exit(EXIT_FAILURE); } ret = syscall(__NR_procfd_signal, fd, SIGKILL, NULL, 0); if (ret < 0) { printf("Failed to send SIGKILL \"%s\"\n", strerror(errno)); close(fd); exit(EXIT_FAILURE); } close(fd); exit(EXIT_SUCCESS); } [1]: https://lkml.org/lkml/2018/11/18/130 Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> Cc: Serge Hallyn <serge@xxxxxxxxxx> Cc: Jann Horn <jannh@xxxxxxxxxx> Cc: Kees Cook <keescook@xxxxxxxxxxxx> Cc: Andy Lutomirsky <luto@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Oleg Nesterov <oleg@xxxxxxxxxx> Cc: Aleksa Sarai <cyphar@xxxxxxxxxx> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Signed-off-by: Christian Brauner <christian@xxxxxxxxxx> --- Changelog: v1: - patch introduced --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + fs/proc/base.c | 6 ++ include/linux/proc_fs.h | 1 + include/linux/syscalls.h | 2 + kernel/signal.c | 76 ++++++++++++++++++++++++-- 6 files changed, 81 insertions(+), 6 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 3cf7b533b3d1..e637eab883e9 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -398,3 +398,4 @@ 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents 386 i386 rseq sys_rseq __ia32_sys_rseq +387 i386 procfd_signal sys_procfd_signal __ia32_sys_procfd_signal diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index f0b1709a5ffb..e95f6741ab42 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -343,6 +343,7 @@ 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq +335 common procfd_signal __x64_sys_procfd_signal # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/fs/proc/base.c b/fs/proc/base.c index 6365a4fea314..a12c9de92bd0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3055,6 +3055,12 @@ static const struct file_operations proc_tgid_base_operations = { .release = proc_tgid_release, }; +bool proc_is_procfd(const struct file *file) +{ + return d_is_dir(file->f_path.dentry) && + (file->f_op == &proc_tgid_base_operations); +} + static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index d0e1f1522a78..2d53a47fba34 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -73,6 +73,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo int (*show)(struct seq_file *, void *), proc_write_t write, void *data); +extern bool proc_is_procfd(const struct file *file); #else /* CONFIG_PROC_FS */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2ac3d13a915b..a5ca8cb84566 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -907,6 +907,8 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, int flags, uint32_t sig); +asmlinkage long sys_procfd_signal(int fd, int sig, siginfo_t __user *info, + int flags); /* * Architecture-specific system calls diff --git a/kernel/signal.c b/kernel/signal.c index 9a32bc2088c9..e8a8929de710 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -19,7 +19,9 @@ #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/sched/cputime.h> +#include <linux/file.h> #include <linux/fs.h> +#include <linux/proc_fs.h> #include <linux/tty.h> #include <linux/binfmts.h> #include <linux/coredump.h> @@ -3286,6 +3288,16 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese, } #endif +static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info) +{ + clear_siginfo(info); + info->si_signo = sig; + info->si_errno = 0; + info->si_code = SI_USER; + info->si_pid = task_tgid_vnr(current); + info->si_uid = from_kuid_munged(current_user_ns(), current_uid()); +} + /** * sys_kill - send a signal to a process * @pid: the PID of the process @@ -3295,16 +3307,68 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) { struct kernel_siginfo info; - clear_siginfo(&info); - info.si_signo = sig; - info.si_errno = 0; - info.si_code = SI_USER; - info.si_pid = task_tgid_vnr(current); - info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); + prepare_kill_siginfo(sig, &info); return kill_something_info(sig, &info, pid); } +/** + * sys_procfd_signal - send a signal to a process through a process file + * descriptor + * @fd: the file descriptor of the process + * @sig: signal to be sent + * @info: the signal info + * @flags: future flags to be passed + */ +SYSCALL_DEFINE4(procfd_signal, int, fd, int, sig, siginfo_t __user *, info, + int, flags) +{ + int ret; + struct pid *pid; + kernel_siginfo_t kinfo; + struct fd f; + + /* Enforce flags be set to 0 until we add an extension. */ + if (flags) + return -EINVAL; + + f = fdget_raw(fd); + if (!f.file) + return -EBADF; + + ret = -EINVAL; + /* Is this a process file descriptor? */ + if (!proc_is_procfd(f.file)) + goto err; + + pid = f.file->private_data; + if (!pid) + goto err; + + if (info) { + ret = __copy_siginfo_from_user(sig, &kinfo, info); + if (unlikely(ret)) + goto err; + /* + * Not even root can pretend to send signals from the kernel. + * Nor can they impersonate a kill()/tgkill(), which adds + * source info. + */ + ret = -EPERM; + if ((kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL) && + (task_pid(current) != pid)) + goto err; + } else { + prepare_kill_siginfo(sig, &kinfo); + } + + ret = kill_pid_info(sig, &kinfo, pid); + +err: + fdput(f); + return ret; +} + static int do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info) { -- 2.19.1