On Thu, Jul 21, 2022 at 12:28:07PM -0500, Frederick Lawler wrote: > The LSM hook userns_create was introduced to provide LSM's an > opportunity to block or allow unprivileged user namespace creation. This > test serves two purposes: it provides a test eBPF implementation, and > tests the hook successfully blocks or allows user namespace creation. > > This tests 4 cases: > > 1. Unattached bpf program does not block unpriv user namespace > creation. > 2. Attached bpf program allows user namespace creation given > CAP_SYS_ADMIN privileges. > 3. Attached bpf program denies user namespace creation for a > user without CAP_SYS_ADMIN. > 4. The sleepable implementation loads Sounds good! > > Signed-off-by: Frederick Lawler <fred@xxxxxxxxxxxxxx> > > --- > The generic deny_namespace file name is used for future namespace > expansion. I didn't want to limit these files to just the create_user_ns > hook. > Changes since v2: > - Rename create_user_ns hook to userns_create > Changes since v1: > - Introduce this patch > --- > .../selftests/bpf/prog_tests/deny_namespace.c | 88 +++++++++++++++++++ > .../selftests/bpf/progs/test_deny_namespace.c | 39 ++++++++ > 2 files changed, 127 insertions(+) > create mode 100644 tools/testing/selftests/bpf/prog_tests/deny_namespace.c > create mode 100644 tools/testing/selftests/bpf/progs/test_deny_namespace.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c > new file mode 100644 > index 000000000000..9e4714295008 > --- /dev/null > +++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c > @@ -0,0 +1,88 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#define _GNU_SOURCE > +#include <test_progs.h> > +#include "test_deny_namespace.skel.h" > +#include <sched.h> > +#include "cap_helpers.h" > + > +#define STACK_SIZE (1024 * 1024) > +static char child_stack[STACK_SIZE]; > + > +int clone_callback(void *arg) > +{ > + return 0; > +} > + > +static int create_new_user_ns(void) > +{ > + int status; > + pid_t cpid; > + > + cpid = clone(clone_callback, child_stack + STACK_SIZE, > + CLONE_NEWUSER | SIGCHLD, NULL); > + > + if (cpid == -1) > + return errno; > + > + if (cpid == 0) > + return 0; Martin asked about this already but fwiw, this cannot happen with clone(). The clone() function doesn't return twice. It always returns the PID of the child process or an error. > + > + waitpid(cpid, &status, 0); > + if (WIFEXITED(status)) > + return WEXITSTATUS(status); > + > + return -1; > +} You can also just avoid the clone() dance and simply do sm like: static int wait_for_pid(pid_t pid) { int status, ret; again: ret = waitpid(pid, &status, 0); if (ret == -1) { if (errno == EINTR) goto again; return -1; } if (!WIFEXITED(status)) return -1; return WEXITSTATUS(status); } /* negative return value -> some internal error * positive return value -> userns creation failed * 0 -> userns creation succeeded */ static int create_user_ns(void) { pid_t pid; pid = fork(); if (pid < 0) return -1; if (pid == 0) { if (unshare(CLONE_NEWUSER)) _exit(EXIT_FAILURE); _exit(EXIT_SUCCESS); } return wait_for_pid(pid); } Same difference since both codepaths hit the right spot in the kernel. > + > +static void test_userns_create_bpf(void) > +{ > + __u32 cap_mask = 1ULL << CAP_SYS_ADMIN; > + __u64 old_caps = 0; > + > + ASSERT_OK(create_new_user_ns(), "priv new user ns"); > + > + cap_disable_effective(cap_mask, &old_caps); > + > + ASSERT_EQ(create_new_user_ns(), EPERM, "unpriv new user ns"); > + > + if (cap_mask & old_caps) > + cap_enable_effective(cap_mask, NULL); > +} > + > +static void test_unpriv_userns_create_no_bpf(void) > +{ > + __u32 cap_mask = 1ULL << CAP_SYS_ADMIN; > + __u64 old_caps = 0; > + > + cap_disable_effective(cap_mask, &old_caps); > + > + ASSERT_OK(create_new_user_ns(), "no-bpf unpriv new user ns"); > + > + if (cap_mask & old_caps) > + cap_enable_effective(cap_mask, NULL); > +} > + > +void test_deny_namespace(void) > +{ > + struct test_deny_namespace *skel = NULL; > + int err; > + > + if (test__start_subtest("unpriv_userns_create_no_bpf")) > + test_unpriv_userns_create_no_bpf(); > + > + skel = test_deny_namespace__open_and_load(); > + if (!ASSERT_OK_PTR(skel, "skel load")) > + goto close_prog; > + > + err = test_deny_namespace__attach(skel); > + if (!ASSERT_OK(err, "attach")) > + goto close_prog; > + > + if (test__start_subtest("userns_create_bpf")) > + test_userns_create_bpf(); > + > + test_deny_namespace__detach(skel); > + > +close_prog: > + test_deny_namespace__destroy(skel); > +} > diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c > new file mode 100644 > index 000000000000..9ec9dabc8372 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c > @@ -0,0 +1,39 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include <linux/bpf.h> > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_tracing.h> > +#include <errno.h> > +#include <linux/capability.h> > + > +struct kernel_cap_struct { > + __u32 cap[_LINUX_CAPABILITY_U32S_3]; > +} __attribute__((preserve_access_index)); > + > +struct cred { > + struct kernel_cap_struct cap_effective; > +} __attribute__((preserve_access_index)); > + > +char _license[] SEC("license") = "GPL"; > + > +SEC("lsm/userns_create") > +int BPF_PROG(test_userns_create, const struct cred *cred, int ret) > +{ > + struct kernel_cap_struct caps = cred->cap_effective; > + int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN); > + __u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN); > + > + if (ret) > + return 0; > + > + ret = -EPERM; > + if (caps.cap[cap_index] & cap_mask) > + return 0; > + > + return -EPERM; > +} Looks nice and simple. Acked-by: Christian Brauner (Microsoft) <brauner@xxxxxxxxxx>