From: Tycho Andersen <tandersen@xxxxxxxxxxx> Zbigniew mentioned at Linux Plumber's that systemd is interested in switching to execveat() for service execution, but can't, because the contents of /proc/pid/comm are the file descriptor which was used, instead of the path to the binary. This makes the output of tools like top and ps useless, especially in a world where most fds are opened CLOEXEC so the number is truly meaningless. This patch adds an AT_ flag to fix up /proc/pid/comm to instead be the contents of argv[0], instead of the fdno. Signed-off-by: Tycho Andersen <tandersen@xxxxxxxxxxx> Suggested-by: Zbigniew Jędrzejewski-Szmek <zbyszek@xxxxxxxxx> CC: Aleksa Sarai <cyphar@xxxxxxxxxx> --- There is some question about what to name the flag; it seems to me that "everyone wants this" instead of the fdno, but probably "REASONABLE" is not a good choice. Also, requiring the arg to alloc_bprm() is a bit ugly: kernel-based execs will never use this, so they just have to pass an empty thing. We could introduce a bprm_fixup_comm() to do the munging there, but then the code paths start to diverge, which is maybe not nice. I left it this way because this is the smallest patch in terms of size, but I'm happy to change it. Finally, here is a small set of test programs, I'm happy to turn them into kselftests if we agree on an API #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> int main(void) { int fd; char buf[128]; fd = open("/proc/self/comm", O_RDONLY); if (fd < 0) { perror("open comm"); exit(1); } if (read(fd, buf, 128) < 0) { perror("read"); exit(1); } printf("comm: %s", buf); exit(0); } #define _GNU_SOURCE #include <stdio.h> #include <syscall.h> #include <stdbool.h> #include <unistd.h> #include <fcntl.h> #include <stdlib.h> #include <errno.h> #include <sys/wait.h> #ifndef AT_EMPTY_PATH #define AT_EMPTY_PATH 0x1000 /* Allow empty relative */ #endif #ifndef AT_EXEC_REASONABLE_COMM #define AT_EXEC_REASONABLE_COMM 0x200 #endif int main(int argc, char *argv[]) { pid_t pid; int status; bool wants_reasonable_comm = argc > 1; pid = fork(); if (pid < 0) { perror("fork"); exit(1); } if (pid == 0) { int fd; long ret, flags; fd = open("./catprocselfcomm", O_PATH); if (fd < 0) { perror("open catprocselfname"); exit(1); } flags = AT_EMPTY_PATH; if (wants_reasonable_comm) flags |= AT_EXEC_REASONABLE_COMM; syscall(__NR_execveat, fd, "", (char *[]){"./catprocselfcomm", NULL}, NULL, flags); fprintf(stderr, "execveat failed %d\n", errno); exit(1); } if (waitpid(pid, &status, 0) != pid) { fprintf(stderr, "wrong child\n"); exit(1); } if (!WIFEXITED(status)) { fprintf(stderr, "exit status %x\n", status); exit(1); } if (WEXITSTATUS(status) != 0) { fprintf(stderr, "child failed\n"); exit(1); } return 0; } --- fs/exec.c | 22 ++++++++++++++++++---- include/uapi/linux/fcntl.h | 3 ++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index dad402d55681..36434feddb7b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1569,11 +1569,15 @@ static void free_bprm(struct linux_binprm *bprm) kfree(bprm); } -static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int flags) +static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, + struct user_arg_ptr argv, int flags) { struct linux_binprm *bprm; struct file *file; int retval = -ENOMEM; + bool needs_comm_fixup = flags & AT_EXEC_REASONABLE_COMM; + + flags &= ~AT_EXEC_REASONABLE_COMM; file = do_open_execat(fd, filename, flags); if (IS_ERR(file)) @@ -1590,11 +1594,20 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; } else { - if (filename->name[0] == '\0') + if (needs_comm_fixup) { + const char __user *p = get_user_arg_ptr(argv, 0); + + retval = -EFAULT; + if (!p) + goto out_free; + + bprm->fdpath = strndup_user(p, MAX_ARG_STRLEN); + } else if (filename->name[0] == '\0') bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); else bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", fd, filename->name); + retval = -ENOMEM; if (!bprm->fdpath) goto out_free; @@ -1969,7 +1982,7 @@ static int do_execveat_common(int fd, struct filename *filename, * further execve() calls fail. */ current->flags &= ~PF_NPROC_EXCEEDED; - bprm = alloc_bprm(fd, filename, flags); + bprm = alloc_bprm(fd, filename, argv, flags); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; @@ -2034,6 +2047,7 @@ int kernel_execve(const char *kernel_filename, struct linux_binprm *bprm; int fd = AT_FDCWD; int retval; + struct user_arg_ptr user_argv = {}; /* It is non-sense for kernel threads to call execve */ if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) @@ -2043,7 +2057,7 @@ int kernel_execve(const char *kernel_filename, if (IS_ERR(filename)) return PTR_ERR(filename); - bprm = alloc_bprm(fd, filename, 0); + bprm = alloc_bprm(fd, filename, user_argv, 0); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 87e2dec79fea..7178d1e4a3de 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -100,7 +100,8 @@ /* Reserved for per-syscall flags 0xff. */ #define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ -/* Reserved for per-syscall flags 0x200 */ +#define AT_EXEC_REASONABLE_COMM 0x200 /* Use argv[0] for comm in + execveat */ #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ #define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal. */ base-commit: baeb9a7d8b60b021d907127509c44507539c15e5 -- 2.34.1