On Mon, Oct 29, 2018 at 04:40:30PM -0600, Tycho Andersen wrote: > + resp.id = req.id; > + resp.error = -512; /* -ERESTARTSYS */ > + resp.val = 0; > + > + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); So, it turns out this *doesn't* work, and the reason this test was passing is because of poor hygiene on my part. Per the documentation in include/linux/errno.h, /* * These should never be seen by user programs. To return one of ERESTART* * codes, signal_pending() MUST be set. Note that ptrace can observe these * at syscall exit tracing, but they will never be left for the debugged user * process to see. */ #define ERESTARTSYS 512 So basically, if you respond with -ERESTARTSYS with no signal pending, you'll leak it to userspace. It turns out this is already possible with SECCOMP_RET_TRAP (and probably ptrace alone, although I didn't try it out), see the program below. The question is: do we care? If so, it seems like we may need to handle the -ERESTARTSYS-style cases even when there is no signal pending. If we don't, there's precedent for us to just do the same thing as what happens for SECCOMP_RET_TRACE, but we should probably at least fix the docs. Tycho #include <stdio.h> #include <stddef.h> #include <stdbool.h> #include <unistd.h> #include <stdlib.h> #include <errno.h> #include <signal.h> #include <string.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/wait.h> #include <sys/ptrace.h> #include <sys/ioctl.h> #include <inttypes.h> #include <linux/filter.h> #include <linux/seccomp.h> #include <linux/ptrace.h> #include <linux/elf.h> #include <pthread.h> static int filter_syscall(int syscall_nr) { struct sock_filter filter[] = { BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscall_nr, 0, 1), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRACE), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog bpf_prog = { .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), .filter = filter, }; int ret; ret = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0, &bpf_prog); if (ret < 0) { perror("prctl failed"); return -1; } return ret; } typedef struct { uint64_t r15; uint64_t r14; uint64_t r13; uint64_t r12; uint64_t bp; uint64_t bx; uint64_t r11; uint64_t r10; uint64_t r9; uint64_t r8; uint64_t ax; uint64_t cx; uint64_t dx; uint64_t si; uint64_t di; uint64_t orig_ax; uint64_t ip; uint64_t cs; uint64_t flags; uint64_t sp; uint64_t ss; uint64_t fs_base; uint64_t gs_base; uint64_t ds; uint64_t es; uint64_t fs; uint64_t gs; } user_regs_struct64; int main(int argc, char **argv) { pid_t pid; user_regs_struct64 regs; struct iovec iov = {.iov_base = ®s, .iov_len = sizeof(regs)}; int status; pid = fork(); if (pid == 0) { if (signal(SIGUSR1, signal_handler) == SIG_ERR) { perror("signal"); exit(1); } if (filter_syscall(__NR_getpid) < 0) exit(1); /* i'm lazy, so sue me :) */ sleep(1); errno = 0; pid = syscall(__NR_getpid); /* * we get: * getpid(): -1, errno: 512 * probably should get * getpid(): <pid> errno: 0 */ printf("getpid(): %d, errno: %d\n", pid, errno); exit(errno); } if (ptrace(PTRACE_ATTACH, pid, NULL, 0) < 0) { perror("ptrace attach"); goto out; } if (waitpid(pid, NULL, 0) != pid) { perror("waitpid"); goto out; } if (ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_TRACESECCOMP) < 0) { perror("ptrace setoptions"); goto out; } if (ptrace(PTRACE_CONT, pid, NULL, 0) != 0) { perror("ptrace cont"); goto out; } if (waitpid(pid, &status, 0) != pid) { perror("wait for trace"); goto out; } if (status >> 8 != (SIGTRAP | (PTRACE_EVENT_SECCOMP<<8))) { printf("got bad trap event?\n"); goto out; } if (ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov) < 0) { perror("getregset"); goto out; } /* * Tell the syscall to restart. Per include/linux/errno.h this should * only be set when signal_pending() is set. But we just won't send * any signals to the process, and we'll see this in userspace. */ regs.ax = -512; /* -ERESTARTSYS */ /* * This makes the this_syscall < 0 check in __seccomp_filter() * trigger, so we skip the syscall and return whatever is in ax */ regs.orig_ax = -512; /* -ERESTARTSYS */ if (ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov) < 0) { perror("setregset"); goto out; } if (ptrace(PTRACE_CONT, pid, NULL, 0) < 0) { perror("cont after setregset"); goto out; } while (1) { if (waitpid(pid, &status, 0) != pid) { perror("wait for death"); goto out; } if (!WIFSTOPPED(status)) { break; } if (ptrace(PTRACE_CONT, pid, NULL, 0) < 0) { perror("cont after setregset"); goto out; } } if (WIFSIGNALED(status)) { printf("didn't exit: %d\n", WTERMSIG(status)); return 1; } if (WEXITSTATUS(status)) { printf("exited: %d\n", WEXITSTATUS(status)); return 1; } return 0; out: kill(pid, SIGKILL); return 1; } _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers