This is basically a test-suite for setns() and as of now contains: - test that we can't pass garbage flags - test that we can't attach to the namespaces of task that has already exited - test that we can incrementally setns into all namespaces of a target task using a pidfd - test that we can setns atomically into all namespaces of a target task - test that we can't cross setns into a user namespace outside of our user namespace hierarchy - test that we can't setns into namespaces owned by user namespaces over which we are not privileged Signed-off-by: Christian Brauner <christian.brauner@xxxxxxxxxx> --- /* v2 */ patch introduced --- tools/testing/selftests/pidfd/.gitignore | 1 + tools/testing/selftests/pidfd/Makefile | 3 +- tools/testing/selftests/pidfd/config | 6 + .../selftests/pidfd/pidfd_setns_test.c | 422 ++++++++++++++++++ 4 files changed, 431 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/pidfd/config create mode 100644 tools/testing/selftests/pidfd/pidfd_setns_test.c diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 2d4db5afb142..973198a3ec3d 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -5,3 +5,4 @@ pidfd_test pidfd_wait pidfd_fdinfo_test pidfd_getfd_test +pidfd_setns_test diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 75a545861375..f4a2f28f926b 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -g -I../../../../usr/include/ -pthread -TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test +TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ + pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test include ../lib.mk diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config new file mode 100644 index 000000000000..bb11de90c0c9 --- /dev/null +++ b/tools/testing/selftests/pidfd/config @@ -0,0 +1,6 @@ +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y +CONFIG_NET_NS=y +CONFIG_CGROUPS=y diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c new file mode 100644 index 000000000000..c327cc3b0ec9 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <linux/kcmp.h> + +#include "pidfd.h" +#include "../clone3/clone3_selftests.h" +#include "../kselftest.h" +#include "../kselftest_harness.h" + +enum { + PIDFD_NS_USER, + PIDFD_NS_MNT, + PIDFD_NS_PID, + PIDFD_NS_UTS, + PIDFD_NS_IPC, + PIDFD_NS_NET, + PIDFD_NS_CGROUP, + PIDFD_NS_PIDCLD, + PIDFD_NS_MAX +}; + +const struct ns_info { + const char *proc_name; + int clone_flag; + const char *flag_name; +} ns_info[] = { + [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, "CLONE_NEWUSER", }, + [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, "CLONE_NEWNS", }, + [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, "CLONE_NEWPID", }, + [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, "CLONE_NEWUTS", }, + [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, "CLONE_NEWIPC", }, + [PIDFD_NS_NET] = { "net", CLONE_NEWNET, "CLONE_NEWNET", }, + [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, "CLONE_NEWCGROUP", }, + [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, "INVALID_FLAG_PID_FOR_CHILDREN", }, +}; + +FIXTURE(current_nsset) +{ + pid_t pid; + int pidfd; + int ns_fds[PIDFD_NS_MAX]; + + pid_t child_pid_exited; + int child_pidfd_exited; + + pid_t child_pid_all_ns_stopped1; + int child_pidfd_all_ns_stopped1; + int child_ns_fds_all_ns_stopped1[PIDFD_NS_MAX]; + + pid_t child_pid_all_ns_stopped2; + int child_pidfd_all_ns_stopped2; + int child_ns_fds_all_ns_stopped2[PIDFD_NS_MAX]; +}; + +static int sys_waitid(int which, pid_t pid, int options) +{ + return syscall(__NR_waitid, which, pid, NULL, options, NULL); +} + +pid_t create_child(int *pidfd, unsigned flags) +{ + struct clone_args args = { + .flags = CLONE_PIDFD | flags, + .exit_signal = SIGCHLD, + .pidfd = ptr_to_u64(pidfd), + }; + + return sys_clone3(&args, sizeof(struct clone_args)); +} + +FIXTURE_SETUP(current_nsset) +{ + int i; + int proc_fd; + + for (i = 0; i < PIDFD_NS_MAX; i++) { + self->ns_fds[i] = -EBADF; + self->child_ns_fds_all_ns_stopped1[i] = -EBADF; + self->child_ns_fds_all_ns_stopped2[i] = -EBADF; + } + + proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); + ASSERT_GE(proc_fd, 0) { + TH_LOG("%m - Failed to open /proc/self/ns"); + } + + self->pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + self->ns_fds[i] = openat(proc_fd, info->proc_name, O_RDONLY | O_CLOEXEC); + if (self->ns_fds[i] < 0) { + EXPECT_EQ(errno, ENOENT) { + TH_LOG("%m - Failed to open %s namespace for process %d", + info->proc_name, self->pid); + } + } + } + + self->pidfd = sys_pidfd_open(self->pid, 0); + ASSERT_GE(self->pidfd, 0) { + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + } + + /* Create task that exits right away. */ + self->child_pid_exited = create_child(&self->child_pidfd_exited, + CLONE_NEWUSER | CLONE_NEWNET); + ASSERT_GE(self->child_pid_exited, 0); + + if (self->child_pid_exited == 0) + _exit(EXIT_SUCCESS); + + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); + + self->pidfd = sys_pidfd_open(self->pid, 0); + ASSERT_GE(self->pidfd, 0) { + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + } + + /* Create tasks that will be stopped. */ + self->child_pid_all_ns_stopped1 = create_child(&self->child_pidfd_all_ns_stopped1, + CLONE_NEWUSER | + CLONE_NEWNS | + CLONE_NEWCGROUP | + CLONE_NEWIPC | + CLONE_NEWUTS | + CLONE_NEWPID | + CLONE_NEWNET); + ASSERT_GE(self->child_pid_all_ns_stopped1, 0); + + if (self->child_pid_all_ns_stopped1 == 0) { + pause(); + _exit(EXIT_SUCCESS); + } + + self->child_pid_all_ns_stopped2 = create_child(&self->child_pidfd_all_ns_stopped2, + CLONE_NEWUSER | + CLONE_NEWNS | + CLONE_NEWCGROUP | + CLONE_NEWIPC | + CLONE_NEWUTS | + CLONE_NEWPID | + CLONE_NEWNET); + ASSERT_GE(self->child_pid_all_ns_stopped2, 0); + + if (self->child_pid_all_ns_stopped2 == 0) { + pause(); + _exit(EXIT_SUCCESS); + } + + for (i = 0; i < PIDFD_NS_MAX; i++) { + char path[100]; + + const struct ns_info *info = &ns_info[i]; + + self->ns_fds[i] = openat(proc_fd, info->proc_name, O_RDONLY | O_CLOEXEC); + if (self->ns_fds[i] < 0) { + EXPECT_EQ(errno, ENOENT) { + TH_LOG("%m - Failed to open %s namespace for process %d", + info->proc_name, self->pid); + } + } + + (void)snprintf(path, sizeof(path), "/proc/%d/ns/%s", self->child_pid_all_ns_stopped1, info->proc_name); + self->child_ns_fds_all_ns_stopped1[i] = open(path, O_RDONLY | O_CLOEXEC); + if (self->child_ns_fds_all_ns_stopped1[i] < 0) { + EXPECT_EQ(errno, ENOENT) { + TH_LOG("%m - Failed to open %s namespace for process %d", + info->proc_name, self->child_pid_all_ns_stopped1); + } + } + + (void)snprintf(path, sizeof(path), "/proc/%d/ns/%s", self->child_pid_all_ns_stopped1, info->proc_name); + self->child_ns_fds_all_ns_stopped2[i] = open(path, O_RDONLY | O_CLOEXEC); + if (self->child_ns_fds_all_ns_stopped2[i] < 0) { + EXPECT_EQ(errno, ENOENT) { + TH_LOG("%m - Failed to open %s namespace for process %d", + info->proc_name, self->child_pid_all_ns_stopped1); + } + } + } +} + +FIXTURE_TEARDOWN(current_nsset) +{ + int i; + + ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd_all_ns_stopped1, + SIGKILL, NULL, 0), 0); + ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd_all_ns_stopped2, + SIGKILL, NULL, 0), 0); + + for (i = 0; i < PIDFD_NS_MAX; i++) { + if (self->ns_fds[i] >= 0) + close(self->ns_fds[i]); + if (self->child_ns_fds_all_ns_stopped1[i] >= 0) + close(self->child_ns_fds_all_ns_stopped1[i]); + if (self->child_ns_fds_all_ns_stopped2[i] >= 0) + close(self->child_ns_fds_all_ns_stopped2[i]); + } + + EXPECT_EQ(0, close(self->child_pidfd_all_ns_stopped1)); + EXPECT_EQ(0, close(self->child_pidfd_all_ns_stopped2)); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_all_ns_stopped1, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_all_ns_stopped2, WEXITED), 0); +} + +int preserve_ns(const int pid, const char *ns) +{ + int ret; +/* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */ +#define __NS_PATH_LEN 50 + char path[__NS_PATH_LEN]; + + /* This way we can use this function to also check whether namespaces + * are supported by the kernel by passing in the NULL or the empty + * string. + */ + ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid, + !ns || strcmp(ns, "") == 0 ? "" : "/", + !ns || strcmp(ns, "") == 0 ? "" : ns); + if (ret < 0 || (size_t)ret >= __NS_PATH_LEN) { + errno = EFBIG; + return -1; + } + + return open(path, O_RDONLY | O_CLOEXEC); +} + +static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns) +{ + int ns_fd2 = -EBADF; + int ret = -1; + struct stat ns_st1, ns_st2; + + ret = fstat(ns_fd1, &ns_st1); + if (ret < 0) + return -1; + + ns_fd2 = preserve_ns(pid2, ns); + if (ns_fd2 < 0) + return -1; + + ret = fstat(ns_fd2, &ns_st2); + close(ns_fd2); + if (ret < 0) + return -1; + + /* processes are in the same namespace */ + if ((ns_st1.st_dev == ns_st2.st_dev) && + (ns_st1.st_ino == ns_st2.st_ino)) + return 1; + + /* processes are in different namespaces */ + return 0; +} + +/* Test that we can't pass garbage to the kernel. */ +TEST_F(current_nsset, invalid_flags) +{ + ASSERT_NE(setns(self->pidfd, 0), 0); + EXPECT_EQ(errno, EINVAL); + + ASSERT_NE(setns(self->pidfd, -1), 0); + EXPECT_EQ(errno, EINVAL); + + ASSERT_NE(setns(self->pidfd, CLONE_VM), 0); + EXPECT_EQ(errno, EINVAL); + + ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0); + EXPECT_EQ(errno, EINVAL); +} + +/* Test that we can't attach to a task that has already exited. */ +TEST_F(current_nsset, exited_child) +{ + int i; + pid_t pid; + + ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET), 0); + EXPECT_EQ(errno, ESRCH); + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + /* Verify that we haven't changed any namespaces. */ + if (self->ns_fds[i] >= 0) + ASSERT_EQ(in_same_namespace(self->ns_fds[i], pid, info->proc_name), 1); + } +} + +TEST_F(current_nsset, incremental_setns) +{ + int i; + pid_t pid; + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + int nsfd; + + if (self->child_ns_fds_all_ns_stopped1[i] < 0) + continue; + + if (info->clone_flag) { + ASSERT_EQ(setns(self->child_pidfd_all_ns_stopped1, info->clone_flag), 0) { + TH_LOG("%m - Failed to setns to %s namespace of %d", info->proc_name, self->child_pid_all_ns_stopped1); + } + } + + /* Verify that we have changed to the correct namespaces. */ + if (info->clone_flag == CLONE_NEWPID) + nsfd = self->ns_fds[i]; + else + nsfd = self->child_ns_fds_all_ns_stopped1[i]; + ASSERT_EQ(in_same_namespace(nsfd, pid, info->proc_name), 1) { + TH_LOG("setns failed to place us correctly into %s namespace of %d", info->proc_name, self->child_pid_all_ns_stopped1); + } + TH_LOG("Managed to correctly setns to %s namespace of %d", info->proc_name, self->child_pid_all_ns_stopped1); + } +} + +TEST_F(current_nsset, one_shot_setns) +{ + unsigned flags = 0; + int i; + pid_t pid; + + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + + if (self->child_ns_fds_all_ns_stopped1[i] < 0) + continue; + + flags |= info->clone_flag; + TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", info->proc_name, self->child_pid_all_ns_stopped1); + } + + ASSERT_EQ(setns(self->child_pidfd_all_ns_stopped1, flags), 0) { + TH_LOG("%m - Failed to setns to namespaces of %d", self->child_pid_all_ns_stopped1); + } + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + int nsfd; + + if (self->child_ns_fds_all_ns_stopped1[i] < 0) + continue; + + /* Verify that we have changed to the correct namespaces. */ + if (info->clone_flag == CLONE_NEWPID) + nsfd = self->ns_fds[i]; + else + nsfd = self->child_ns_fds_all_ns_stopped1[i]; + ASSERT_EQ(in_same_namespace(nsfd, pid, info->proc_name), 1) { + TH_LOG("setns failed to place us correctly into %s namespace of %d", info->proc_name, self->child_pid_all_ns_stopped1); + } + TH_LOG("Managed to correctly setns to %s namespace of %d", info->proc_name, self->child_pid_all_ns_stopped1); + } +} + +TEST_F(current_nsset, no_foul_play) +{ + unsigned flags = 0; + int i; + + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + + if (self->child_ns_fds_all_ns_stopped1[i] < 0) + continue; + + flags |= info->clone_flag; + if (info->clone_flag) /* No use logging pid_for_children. */ + TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", info->proc_name, self->child_pid_all_ns_stopped1); + } + + ASSERT_EQ(setns(self->child_pidfd_all_ns_stopped1, flags), 0) { + TH_LOG("%m - Failed to setns to namespaces of %d", self->child_pid_all_ns_stopped1); + } + + + /* + * Can't setns to a user namespace outside of our hierarchy since we + * don't have caps in there and didn't create it. That means that under + * no circumstances should we be able to setns to any of the other + * ones since they aren't owned by our user namespace. + */ + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + + if (self->child_ns_fds_all_ns_stopped1[i] < 0) + continue; + + if (!info->clone_flag) + continue; + + ASSERT_NE(setns(self->child_pidfd_all_ns_stopped2, info->clone_flag), 0) { + TH_LOG("Managed to setns to %s namespace of %d", info->proc_name, self->child_pid_all_ns_stopped2); + } + TH_LOG("%m - Correctly failed to setns to %s namespace of %d", info->proc_name, self->child_pid_all_ns_stopped2); + } +} + +TEST_HARNESS_MAIN -- 2.26.2 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers