Re: [PATCH v3 3/4] selftests/ovl: add second selftest for "override_creds"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Feb 19, 2025 at 11:02 AM Christian Brauner <brauner@xxxxxxxxxx> wrote:
>
> Add a simple test to verify that the new "override_creds" option works.
>
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>

For the added test you may add:

Reviewed-by: Amir Goldstein <amir73il@xxxxxxxxx>

But you may want to consider splitting the large infrastructure
and the churn to the previous test to a separate patch, to make this
patch cleaner.

Thanks,
Amir.

> ---
>  .../selftests/filesystems/overlayfs/Makefile       |  11 +-
>  .../filesystems/overlayfs/set_layers_via_fds.c     | 149 ++++++-
>  tools/testing/selftests/filesystems/utils.c        | 474 +++++++++++++++++++++
>  tools/testing/selftests/filesystems/utils.h        |  44 ++
>  4 files changed, 665 insertions(+), 13 deletions(-)
>
> diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile
> index e8d1adb021af..6c661232b3b5 100644
> --- a/tools/testing/selftests/filesystems/overlayfs/Makefile
> +++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
> @@ -1,7 +1,14 @@
>  # SPDX-License-Identifier: GPL-2.0
>
> -TEST_GEN_PROGS := dev_in_maps set_layers_via_fds
> +CFLAGS += -Wall
> +CFLAGS += $(KHDR_INCLUDES)
> +LDLIBS += -lcap
>
> -CFLAGS := -Wall -Werror
> +LOCAL_HDRS += wrappers.h log.h
> +
> +TEST_GEN_PROGS := dev_in_maps
> +TEST_GEN_PROGS += set_layers_via_fds
>
>  include ../../lib.mk
> +
> +$(OUTPUT)/set_layers_via_fds: ../utils.c
> diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
> index 70acd833581d..6b65e3610578 100644
> --- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
> +++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
> @@ -6,6 +6,7 @@
>  #include <sched.h>
>  #include <stdio.h>
>  #include <string.h>
> +#include <sys/socket.h>
>  #include <sys/stat.h>
>  #include <sys/mount.h>
>  #include <unistd.h>
> @@ -13,20 +14,27 @@
>  #include "../../kselftest_harness.h"
>  #include "../../pidfd/pidfd.h"
>  #include "log.h"
> +#include "../utils.h"
>  #include "wrappers.h"
>
>  FIXTURE(set_layers_via_fds) {
> +       int pidfd;
>  };
>
>  FIXTURE_SETUP(set_layers_via_fds)
>  {
> -       ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
> +       self->pidfd = -EBADF;
> +       EXPECT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
>  }
>
>  FIXTURE_TEARDOWN(set_layers_via_fds)
>  {
> +       if (self->pidfd >= 0) {
> +               EXPECT_EQ(sys_pidfd_send_signal(self->pidfd, SIGKILL, NULL, 0), 0);
> +               EXPECT_EQ(close(self->pidfd), 0);
> +       }
>         umount2("/set_layers_via_fds", 0);
> -       ASSERT_EQ(rmdir("/set_layers_via_fds"), 0);
> +       EXPECT_EQ(rmdir("/set_layers_via_fds"), 0);
>  }
>
>  TEST_F(set_layers_via_fds, set_layers_via_fds)
> @@ -266,7 +274,7 @@ TEST_F(set_layers_via_fds, set_override_creds)
>         ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
>
>         pid = create_child(&pidfd, 0);
> -       EXPECT_GE(pid, 0);
> +       ASSERT_GE(pid, 0);
>         if (pid == 0) {
>                 if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
>                         TH_LOG("sys_fsconfig should have succeeded");
> @@ -275,11 +283,11 @@ TEST_F(set_layers_via_fds, set_override_creds)
>
>                 _exit(EXIT_SUCCESS);
>         }
> -       EXPECT_EQ(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
> -       EXPECT_EQ(close(pidfd), 0);
> +       ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
> +       ASSERT_GE(close(pidfd), 0);
>
>         pid = create_child(&pidfd, 0);
> -       EXPECT_GE(pid, 0);
> +       ASSERT_GE(pid, 0);
>         if (pid == 0) {
>                 if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "nooverride_creds", NULL, 0)) {
>                         TH_LOG("sys_fsconfig should have succeeded");
> @@ -288,11 +296,11 @@ TEST_F(set_layers_via_fds, set_override_creds)
>
>                 _exit(EXIT_SUCCESS);
>         }
> -       EXPECT_EQ(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
> -       EXPECT_EQ(close(pidfd), 0);
> +       ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
> +       ASSERT_GE(close(pidfd), 0);
>
>         pid = create_child(&pidfd, 0);
> -       EXPECT_GE(pid, 0);
> +       ASSERT_GE(pid, 0);
>         if (pid == 0) {
>                 if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
>                         TH_LOG("sys_fsconfig should have succeeded");
> @@ -301,8 +309,125 @@ TEST_F(set_layers_via_fds, set_override_creds)
>
>                 _exit(EXIT_SUCCESS);
>         }
> -       EXPECT_EQ(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
> -       EXPECT_EQ(close(pidfd), 0);
> +       ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
> +       ASSERT_GE(close(pidfd), 0);
> +
> +       ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
> +
> +       fd_overlay = sys_fsmount(fd_context, 0, 0);
> +       ASSERT_GE(fd_overlay, 0);
> +
> +       ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
> +
> +       ASSERT_EQ(close(fd_context), 0);
> +       ASSERT_EQ(close(fd_overlay), 0);
> +}
> +
> +TEST_F(set_layers_via_fds, set_override_creds_invalid)
> +{
> +       int fd_context, fd_tmpfs, fd_overlay, ret;
> +       int layer_fds[] = { [0 ... 3] = -EBADF };
> +       pid_t pid;
> +       int fd_userns1, fd_userns2;
> +       int ipc_sockets[2];
> +       char c;
> +       const unsigned int predictable_fd_context_nr = 123;
> +
> +       fd_userns1 = get_userns_fd(0, 0, 10000);
> +       ASSERT_GE(fd_userns1, 0);
> +
> +       fd_userns2 = get_userns_fd(0, 1234, 10000);
> +       ASSERT_GE(fd_userns2, 0);
> +
> +       ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
> +       ASSERT_GE(ret, 0);
> +
> +       pid = create_child(&self->pidfd, 0);
> +       ASSERT_GE(pid, 0);
> +       if (pid == 0) {
> +               if (close(ipc_sockets[0])) {
> +                       TH_LOG("close should have succeeded");
> +                       _exit(EXIT_FAILURE);
> +               }
> +
> +               if (!switch_userns(fd_userns2, 0, 0, false)) {
> +                       TH_LOG("switch_userns should have succeeded");
> +                       _exit(EXIT_FAILURE);
> +               }
> +
> +               if (read_nointr(ipc_sockets[1], &c, 1) != 1) {
> +                       TH_LOG("read_nointr should have succeeded");
> +                       _exit(EXIT_FAILURE);
> +               }
> +
> +               if (close(ipc_sockets[1])) {
> +                       TH_LOG("close should have succeeded");
> +                       _exit(EXIT_FAILURE);
> +               }
> +
> +               if (!sys_fsconfig(predictable_fd_context_nr, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
> +                       TH_LOG("sys_fsconfig should have failed");
> +                       _exit(EXIT_FAILURE);
> +               }
> +
> +               _exit(EXIT_SUCCESS);
> +       }
> +
> +       ASSERT_EQ(close(ipc_sockets[1]), 0);
> +       ASSERT_EQ(switch_userns(fd_userns1, 0, 0, false), true);
> +       ASSERT_EQ(unshare(CLONE_NEWNS), 0);
> +       ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
> +
> +       fd_context = sys_fsopen("tmpfs", 0);
> +       ASSERT_GE(fd_context, 0);
> +
> +       ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
> +       fd_tmpfs = sys_fsmount(fd_context, 0, 0);
> +       ASSERT_GE(fd_tmpfs, 0);
> +       ASSERT_EQ(close(fd_context), 0);
> +
> +       ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
> +       ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
> +       ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
> +       ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
> +
> +       layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
> +       ASSERT_GE(layer_fds[0], 0);
> +
> +       layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
> +       ASSERT_GE(layer_fds[1], 0);
> +
> +       layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
> +       ASSERT_GE(layer_fds[2], 0);
> +
> +       layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
> +       ASSERT_GE(layer_fds[3], 0);
> +
> +       ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
> +       ASSERT_EQ(close(fd_tmpfs), 0);
> +
> +       fd_context = sys_fsopen("overlay", 0);
> +       ASSERT_GE(fd_context, 0);
> +       ASSERT_EQ(dup3(fd_context, predictable_fd_context_nr, 0), predictable_fd_context_nr);
> +       ASSERT_EQ(close(fd_context), 0);
> +       fd_context = predictable_fd_context_nr;
> +       ASSERT_EQ(write_nointr(ipc_sockets[0], "1", 1), 1);
> +       ASSERT_EQ(close(ipc_sockets[0]), 0);
> +
> +       ASSERT_EQ(wait_for_pid(pid), 0);
> +       ASSERT_EQ(close(self->pidfd), 0);
> +       self->pidfd = -EBADF;
> +
> +       ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
> +       ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir",   NULL, layer_fds[0]), 0);
> +       ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir",  NULL, layer_fds[1]), 0);
> +       ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
> +       ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
> +
> +       for (int i = 0; i < ARRAY_SIZE(layer_fds); i++)
> +               ASSERT_EQ(close(layer_fds[i]), 0);
> +
> +       ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
>
>         ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
>
> @@ -313,6 +438,8 @@ TEST_F(set_layers_via_fds, set_override_creds)
>
>         ASSERT_EQ(close(fd_context), 0);
>         ASSERT_EQ(close(fd_overlay), 0);
> +       ASSERT_EQ(close(fd_userns1), 0);
> +       ASSERT_EQ(close(fd_userns2), 0);
>  }
>
>  TEST_HARNESS_MAIN
> diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
> new file mode 100644
> index 000000000000..0e8080bd0aea
> --- /dev/null
> +++ b/tools/testing/selftests/filesystems/utils.c
> @@ -0,0 +1,474 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +#include <fcntl.h>
> +#include <sys/types.h>
> +#include <dirent.h>
> +#include <grp.h>
> +#include <linux/limits.h>
> +#include <sched.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <sys/eventfd.h>
> +#include <sys/fsuid.h>
> +#include <sys/prctl.h>
> +#include <sys/socket.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
> +#include <sys/xattr.h>
> +
> +#include "utils.h"
> +
> +#define MAX_USERNS_LEVEL 32
> +
> +#define syserror(format, ...)                           \
> +       ({                                              \
> +               fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
> +               (-errno);                               \
> +       })
> +
> +#define syserror_set(__ret__, format, ...)                    \
> +       ({                                                    \
> +               typeof(__ret__) __internal_ret__ = (__ret__); \
> +               errno = labs(__ret__);                        \
> +               fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__);       \
> +               __internal_ret__;                             \
> +       })
> +
> +#define STRLITERALLEN(x) (sizeof(""x"") - 1)
> +
> +#define INTTYPE_TO_STRLEN(type)             \
> +       (2 + (sizeof(type) <= 1             \
> +                 ? 3                       \
> +                 : sizeof(type) <= 2       \
> +                       ? 5                 \
> +                       : sizeof(type) <= 4 \
> +                             ? 10          \
> +                             : sizeof(type) <= 8 ? 20 : sizeof(int[-2 * (sizeof(type) > 8)])))
> +
> +#define list_for_each(__iterator, __list) \
> +       for (__iterator = (__list)->next; __iterator != __list; __iterator = __iterator->next)
> +
> +typedef enum idmap_type_t {
> +       ID_TYPE_UID,
> +       ID_TYPE_GID
> +} idmap_type_t;
> +
> +struct id_map {
> +       idmap_type_t map_type;
> +       __u32 nsid;
> +       __u32 hostid;
> +       __u32 range;
> +};
> +
> +struct list {
> +       void *elem;
> +       struct list *next;
> +       struct list *prev;
> +};
> +
> +struct userns_hierarchy {
> +       int fd_userns;
> +       int fd_event;
> +       unsigned int level;
> +       struct list id_map;
> +};
> +
> +static inline void list_init(struct list *list)
> +{
> +       list->elem = NULL;
> +       list->next = list->prev = list;
> +}
> +
> +static inline int list_empty(const struct list *list)
> +{
> +       return list == list->next;
> +}
> +
> +static inline void __list_add(struct list *new, struct list *prev, struct list *next)
> +{
> +       next->prev = new;
> +       new->next = next;
> +       new->prev = prev;
> +       prev->next = new;
> +}
> +
> +static inline void list_add_tail(struct list *head, struct list *list)
> +{
> +       __list_add(list, head->prev, head);
> +}
> +
> +static inline void list_del(struct list *list)
> +{
> +       struct list *next, *prev;
> +
> +       next = list->next;
> +       prev = list->prev;
> +       next->prev = prev;
> +       prev->next = next;
> +}
> +
> +static ssize_t read_nointr(int fd, void *buf, size_t count)
> +{
> +       ssize_t ret;
> +
> +       do {
> +               ret = read(fd, buf, count);
> +       } while (ret < 0 && errno == EINTR);
> +
> +       return ret;
> +}
> +
> +static ssize_t write_nointr(int fd, const void *buf, size_t count)
> +{
> +       ssize_t ret;
> +
> +       do {
> +               ret = write(fd, buf, count);
> +       } while (ret < 0 && errno == EINTR);
> +
> +       return ret;
> +}
> +
> +#define __STACK_SIZE (8 * 1024 * 1024)
> +static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
> +{
> +       void *stack;
> +
> +       stack = malloc(__STACK_SIZE);
> +       if (!stack)
> +               return -ENOMEM;
> +
> +#ifdef __ia64__
> +       return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
> +#else
> +       return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
> +#endif
> +}
> +
> +static int get_userns_fd_cb(void *data)
> +{
> +       for (;;)
> +               pause();
> +       _exit(0);
> +}
> +
> +static int wait_for_pid(pid_t pid)
> +{
> +       int status, ret;
> +
> +again:
> +       ret = waitpid(pid, &status, 0);
> +       if (ret == -1) {
> +               if (errno == EINTR)
> +                       goto again;
> +
> +               return -1;
> +       }
> +
> +       if (!WIFEXITED(status))
> +               return -1;
> +
> +       return WEXITSTATUS(status);
> +}
> +
> +static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
> +{
> +       int fd = -EBADF, setgroups_fd = -EBADF;
> +       int fret = -1;
> +       int ret;
> +       char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
> +                 STRLITERALLEN("/setgroups") + 1];
> +
> +       if (geteuid() != 0 && map_type == ID_TYPE_GID) {
> +               ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
> +               if (ret < 0 || ret >= sizeof(path))
> +                       goto out;
> +
> +               setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
> +               if (setgroups_fd < 0 && errno != ENOENT) {
> +                       syserror("Failed to open \"%s\"", path);
> +                       goto out;
> +               }
> +
> +               if (setgroups_fd >= 0) {
> +                       ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
> +                       if (ret != STRLITERALLEN("deny\n")) {
> +                               syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
> +                               goto out;
> +                       }
> +               }
> +       }
> +
> +       ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
> +       if (ret < 0 || ret >= sizeof(path))
> +               goto out;
> +
> +       fd = open(path, O_WRONLY | O_CLOEXEC);
> +       if (fd < 0) {
> +               syserror("Failed to open \"%s\"", path);
> +               goto out;
> +       }
> +
> +       ret = write_nointr(fd, buf, buf_size);
> +       if (ret != buf_size) {
> +               syserror("Failed to write %cid mapping to \"%s\"",
> +                        map_type == ID_TYPE_UID ? 'u' : 'g', path);
> +               goto out;
> +       }
> +
> +       fret = 0;
> +out:
> +       close(fd);
> +       close(setgroups_fd);
> +
> +       return fret;
> +}
> +
> +static int map_ids_from_idmap(struct list *idmap, pid_t pid)
> +{
> +       int fill, left;
> +       char mapbuf[4096] = {};
> +       bool had_entry = false;
> +       idmap_type_t map_type, u_or_g;
> +
> +       if (list_empty(idmap))
> +               return 0;
> +
> +       for (map_type = ID_TYPE_UID, u_or_g = 'u';
> +            map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
> +               char *pos = mapbuf;
> +               int ret;
> +               struct list *iterator;
> +
> +
> +               list_for_each(iterator, idmap) {
> +                       struct id_map *map = iterator->elem;
> +                       if (map->map_type != map_type)
> +                               continue;
> +
> +                       had_entry = true;
> +
> +                       left = 4096 - (pos - mapbuf);
> +                       fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
> +                       /*
> +                        * The kernel only takes <= 4k for writes to
> +                        * /proc/<pid>/{g,u}id_map
> +                        */
> +                       if (fill <= 0 || fill >= left)
> +                               return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
> +
> +                       pos += fill;
> +               }
> +               if (!had_entry)
> +                       continue;
> +
> +               ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
> +               if (ret < 0)
> +                       return syserror("Failed to write mapping: %s", mapbuf);
> +
> +               memset(mapbuf, 0, sizeof(mapbuf));
> +       }
> +
> +       return 0;
> +}
> +
> +static int get_userns_fd_from_idmap(struct list *idmap)
> +{
> +       int ret;
> +       pid_t pid;
> +       char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
> +                    STRLITERALLEN("/ns/user") + 1];
> +
> +       pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
> +       if (pid < 0)
> +               return -errno;
> +
> +       ret = map_ids_from_idmap(idmap, pid);
> +       if (ret < 0)
> +               return ret;
> +
> +       ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
> +       if (ret < 0 || (size_t)ret >= sizeof(path_ns))
> +               ret = -EIO;
> +       else
> +               ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
> +
> +       (void)kill(pid, SIGKILL);
> +       (void)wait_for_pid(pid);
> +       return ret;
> +}
> +
> +int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
> +{
> +       struct list head, uid_mapl, gid_mapl;
> +       struct id_map uid_map = {
> +               .map_type       = ID_TYPE_UID,
> +               .nsid           = nsid,
> +               .hostid         = hostid,
> +               .range          = range,
> +       };
> +       struct id_map gid_map = {
> +               .map_type       = ID_TYPE_GID,
> +               .nsid           = nsid,
> +               .hostid         = hostid,
> +               .range          = range,
> +       };
> +
> +       list_init(&head);
> +       uid_mapl.elem = &uid_map;
> +       gid_mapl.elem = &gid_map;
> +       list_add_tail(&head, &uid_mapl);
> +       list_add_tail(&head, &gid_mapl);
> +
> +       return get_userns_fd_from_idmap(&head);
> +}
> +
> +bool switch_ids(uid_t uid, gid_t gid)
> +{
> +       if (setgroups(0, NULL))
> +               return syserror("failure: setgroups");
> +
> +       if (setresgid(gid, gid, gid))
> +               return syserror("failure: setresgid");
> +
> +       if (setresuid(uid, uid, uid))
> +               return syserror("failure: setresuid");
> +
> +       /* Ensure we can access proc files from processes we can ptrace. */
> +       if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
> +               return syserror("failure: make dumpable");
> +
> +       return true;
> +}
> +
> +static int create_userns_hierarchy(struct userns_hierarchy *h);
> +
> +static int userns_fd_cb(void *data)
> +{
> +       struct userns_hierarchy *h = data;
> +       char c;
> +       int ret;
> +
> +       ret = read_nointr(h->fd_event, &c, 1);
> +       if (ret < 0)
> +               return syserror("failure: read from socketpair");
> +
> +       /* Only switch ids if someone actually wrote a mapping for us. */
> +       if (c == '1') {
> +               if (!switch_ids(0, 0))
> +                       return syserror("failure: switch ids to 0");
> +       }
> +
> +       ret = write_nointr(h->fd_event, "1", 1);
> +       if (ret < 0)
> +               return syserror("failure: write to socketpair");
> +
> +       ret = create_userns_hierarchy(++h);
> +       if (ret < 0)
> +               return syserror("failure: userns level %d", h->level);
> +
> +       return 0;
> +}
> +
> +static int create_userns_hierarchy(struct userns_hierarchy *h)
> +{
> +       int fret = -1;
> +       char c;
> +       int fd_socket[2];
> +       int fd_userns = -EBADF, ret = -1;
> +       ssize_t bytes;
> +       pid_t pid;
> +       char path[256];
> +
> +       if (h->level == MAX_USERNS_LEVEL)
> +               return 0;
> +
> +       ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fd_socket);
> +       if (ret < 0)
> +               return syserror("failure: create socketpair");
> +
> +       /* Note the CLONE_FILES | CLONE_VM when mucking with fds and memory. */
> +       h->fd_event = fd_socket[1];
> +       pid = do_clone(userns_fd_cb, h, CLONE_NEWUSER | CLONE_FILES | CLONE_VM);
> +       if (pid < 0) {
> +               syserror("failure: userns level %d", h->level);
> +               goto out_close;
> +       }
> +
> +       ret = map_ids_from_idmap(&h->id_map, pid);
> +       if (ret < 0) {
> +               kill(pid, SIGKILL);
> +               syserror("failure: writing id mapping for userns level %d for %d", h->level, pid);
> +               goto out_wait;
> +       }
> +
> +       if (!list_empty(&h->id_map))
> +               bytes = write_nointr(fd_socket[0], "1", 1); /* Inform the child we wrote a mapping. */
> +       else
> +               bytes = write_nointr(fd_socket[0], "0", 1); /* Inform the child we didn't write a mapping. */
> +       if (bytes < 0) {
> +               kill(pid, SIGKILL);
> +               syserror("failure: write to socketpair");
> +               goto out_wait;
> +       }
> +
> +       /* Wait for child to set*id() and become dumpable. */
> +       bytes = read_nointr(fd_socket[0], &c, 1);
> +       if (bytes < 0) {
> +               kill(pid, SIGKILL);
> +               syserror("failure: read from socketpair");
> +               goto out_wait;
> +       }
> +
> +       snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
> +       fd_userns = open(path, O_RDONLY | O_CLOEXEC);
> +       if (fd_userns < 0) {
> +               kill(pid, SIGKILL);
> +               syserror("failure: open userns level %d for %d", h->level, pid);
> +               goto out_wait;
> +       }
> +
> +       fret = 0;
> +
> +out_wait:
> +       if (!wait_for_pid(pid) && !fret) {
> +               h->fd_userns = fd_userns;
> +               fd_userns = -EBADF;
> +       }
> +
> +out_close:
> +       if (fd_userns >= 0)
> +               close(fd_userns);
> +       close(fd_socket[0]);
> +       close(fd_socket[1]);
> +       return fret;
> +}
> +
> +/* caps_down - lower all effective caps */
> +int caps_down(void)
> +{
> +       bool fret = false;
> +       cap_t caps = NULL;
> +       int ret = -1;
> +
> +       caps = cap_get_proc();
> +       if (!caps)
> +               goto out;
> +
> +       ret = cap_clear_flag(caps, CAP_EFFECTIVE);
> +       if (ret)
> +               goto out;
> +
> +       ret = cap_set_proc(caps);
> +       if (ret)
> +               goto out;
> +
> +       fret = true;
> +
> +out:
> +       cap_free(caps);
> +       return fret;
> +}
> diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h
> new file mode 100644
> index 000000000000..f35001a75f99
> --- /dev/null
> +++ b/tools/testing/selftests/filesystems/utils.h
> @@ -0,0 +1,44 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __IDMAP_UTILS_H
> +#define __IDMAP_UTILS_H
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +#include <errno.h>
> +#include <linux/types.h>
> +#include <sched.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <syscall.h>
> +#include <sys/capability.h>
> +#include <sys/fsuid.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +
> +extern int get_userns_fd(unsigned long nsid, unsigned long hostid,
> +                        unsigned long range);
> +
> +extern int caps_down(void);
> +
> +extern bool switch_ids(uid_t uid, gid_t gid);
> +
> +static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
> +{
> +       if (setns(fd, CLONE_NEWUSER))
> +               return false;
> +
> +       if (!switch_ids(uid, gid))
> +               return false;
> +
> +       if (drop_caps && !caps_down())
> +               return false;
> +
> +       return true;
> +}
> +
> +#endif /* __IDMAP_UTILS_H */
>
> --
> 2.47.2
>





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux