Attached is a patch for refpolicy to define the new security classes and allow them for unconfined domains. Also attached is a sample patch for selinux-policy.spec to add the patch. So, to rebuild and install policy with this cange, I did something like: yumdownloader --source selinux-policy-targeted rpm -ivh selinux-policy*.src.rpm cp refpolicy-define-cap-userns.patch rpmbuild/SOURCES cd rpmbuild/SPECS patch -p1 < ~/selinux-policy.spec.patch rpmbuild -bb selinux-policy.spec rpm -Uvh ../RPMS/noarch/selinux-policy*.rpm I can also send the actual .src.rpm and/or binary rpms separately if desired/needed, but not on the list. On Wed, 2016-04-06 at 09:02 -0700, Stephen Smalley wrote: > Add tests for the non-init user namespace capability checks. > The tests depend on the previously posted kernel patch and on > a patch for refpolicy to define the new security class. > > Signed-off-by: Stephen Smalley <sds@xxxxxxxxxxxxx> > --- > policy/Makefile | 2 +- > policy/test_cap_userns.te | 27 ++++ > tests/Makefile | 2 +- > tests/cap_userns/Makefile | 5 + > tests/cap_userns/test | 17 ++ > tests/cap_userns/userns_child_exec.c | 298 > +++++++++++++++++++++++++++++++++++ > 6 files changed, 349 insertions(+), 2 deletions(-) > create mode 100644 policy/test_cap_userns.te > create mode 100644 tests/cap_userns/Makefile > create mode 100755 tests/cap_userns/test > create mode 100644 tests/cap_userns/userns_child_exec.c > > diff --git a/policy/Makefile b/policy/Makefile > index 98fccbc..33f3458 100644 > --- a/policy/Makefile > +++ b/policy/Makefile > @@ -20,7 +20,7 @@ TARGETS = \ > test_task_create.te test_task_getpgid.te > test_task_getsched.te \ > test_task_getsid.te test_task_setpgid.te > test_task_setsched.te \ > test_transition.te test_inet_socket.te test_unix_socket.te \ > - test_wait.te test_mmap.te > + test_wait.te test_mmap.te test_cap_userns.te > > ifeq ($(shell [ $(POL_VERS) -ge 24 ] && echo true),true) > TARGETS += test_bounds.te > diff --git a/policy/test_cap_userns.te b/policy/test_cap_userns.te > new file mode 100644 > index 0000000..ab74325 > --- /dev/null > +++ b/policy/test_cap_userns.te > @@ -0,0 +1,27 @@ > +################################# > +# > +# Policy for testing non-init userns capability checking. > +# > + > +attribute capusernsdomain; > + > +# Domain for process that is allowed non-init userns capabilities > +type test_cap_userns_t; > +domain_type(test_cap_userns_t) > +unconfined_runs_test(test_cap_userns_t) > +typeattribute test_cap_userns_t testdomain; > +typeattribute test_cap_userns_t capusernsdomain; > + > +# This domain is allowed sys_admin on non-init userns for mount. > +allow test_cap_userns_t self:cap_userns sys_admin; > + > +# Domain for process that is not allowed non-init userns > capabilities > +type test_no_cap_userns_t; > +domain_type(test_no_cap_userns_t) > +unconfined_runs_test(test_no_cap_userns_t) > +typeattribute test_no_cap_userns_t testdomain; > +typeattribute test_no_cap_userns_t capusernsdomain; > + > +# Rules common to both domains. > +miscfiles_domain_entry_test_files(capusernsdomain) > +corecmd_exec_bin(capusernsdomain) > diff --git a/tests/Makefile b/tests/Makefile > index 7a9b39c..bf3f946 100644 > --- a/tests/Makefile > +++ b/tests/Makefile > @@ -5,7 +5,7 @@ DISTRO=$(shell ./os_detect) > > SUBDIRS_COMMON:=domain_trans entrypoint execshare exectrace > execute_no_trans fdreceive inherit link mkdir msg open ptrace > readlink relabel rename rxdir sem setattr setnice shm sigkill stat > sysctl task_create task_setnice task_setscheduler task_getscheduler > task_getsid task_getpgid task_setpgid wait file ioctl capable_file > capable_net capable_sys > > -SUBDIRS:= $(SUBDIRS_COMMON) dyntrans dyntrace bounds nnp mmap > unix_socket inet_socket > +SUBDIRS:= $(SUBDIRS_COMMON) dyntrans dyntrace bounds nnp mmap > unix_socket inet_socket cap_userns > > ifeq ($(DISTRO),RHEL4) > SUBDIRS:=$(SUBDIRS_COMMON) > diff --git a/tests/cap_userns/Makefile b/tests/cap_userns/Makefile > new file mode 100644 > index 0000000..27b4676 > --- /dev/null > +++ b/tests/cap_userns/Makefile > @@ -0,0 +1,5 @@ > +TARGETS=userns_child_exec > + > +all: $(TARGETS) > +clean: > + rm -f $(TARGETS) > diff --git a/tests/cap_userns/test b/tests/cap_userns/test > new file mode 100755 > index 0000000..5842ebd > --- /dev/null > +++ b/tests/cap_userns/test > @@ -0,0 +1,17 @@ > +#!/usr/bin/perl > + > +use Test; > +BEGIN { plan tests => 2} > + > +$basedir = $0; $basedir =~ s|(.*)/[^/]*|$1|; > + > +# Verify that test_cap_userns_t can mount proc within its own mount > namespace. > + > +$result = system ("runcon -t test_cap_userns_t -- > $basedir/userns_child_exec -p -m -U -M '0 0 1' -G '0 0 1' -- true > 2>&1"); > +ok($result, 0); > + > +# Verify that test_no_cap_userns_t cannot mount proc within its own > mount namespace. > + > +$result = system ("runcon -t test_no_cap_userns_t -- > $basedir/userns_child_exec -p -m -U -M '0 0 1' -G '0 0 1' -- true > 2>&1"); > +ok($result); > + > diff --git a/tests/cap_userns/userns_child_exec.c > b/tests/cap_userns/userns_child_exec.c > new file mode 100644 > index 0000000..26ea357 > --- /dev/null > +++ b/tests/cap_userns/userns_child_exec.c > @@ -0,0 +1,298 @@ > +/* Taken from the user_namespaces.7 man page */ > + > +/* userns_child_exec.c > + > + Licensed under GNU General Public License v2 or later > + > + Create a child process that executes a shell command in new > + namespace(s); allow UID and GID mappings to be specified when > + creating a user namespace. > +*/ > +#define _GNU_SOURCE > +#include <sched.h> > +#include <unistd.h> > +#include <stdlib.h> > +#include <sys/wait.h> > +#include <signal.h> > +#include <fcntl.h> > +#include <stdio.h> > +#include <string.h> > +#include <limits.h> > +#include <errno.h> > + > +/* A simple error-handling function: print an error message based > + on the value in 'errno' and terminate the calling process */ > + > +#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \ > + } while (0) > + > +struct child_args { > + char **argv; /* Command to be executed by child, with > args */ > + int pipe_fd[2]; /* Pipe used to synchronize parent and child > */ > +}; > + > +static int verbose; > + > +static void > +usage(char *pname) > +{ > + fprintf(stderr, "Usage: %s [options] cmd [arg...]\n\n", pname); > + fprintf(stderr, "Create a child process that executes a shell " > + "command in a new user namespace,\n" > + "and possibly also other new namespace(s).\n\n"); > + fprintf(stderr, "Options can be:\n\n"); > +#define fpe(str) fprintf(stderr, " %s", str); > + fpe("-i New IPC namespace\n"); > + fpe("-m New mount namespace\n"); > + fpe("-n New network namespace\n"); > + fpe("-p New PID namespace\n"); > + fpe("-u New UTS namespace\n"); > + fpe("-U New user namespace\n"); > + fpe("-M uid_map Specify UID map for user namespace\n"); > + fpe("-G gid_map Specify GID map for user namespace\n"); > + fpe("-z Map user's UID and GID to 0 in user > namespace\n"); > + fpe(" (equivalent to: -M '0 <uid> 1' -G '0 <gid> > 1'\n"); > + fpe("-v Display verbose messages\n"); > + fpe("\n"); > + fpe("If -z, -M, or -G is specified, -U is required.\n"); > + fpe("It is not permitted to specify both -z and either -M or > -G.\n"); > + fpe("\n"); > + fpe("Map strings for -M and -G consist of records of the > form:\n"); > + fpe("\n"); > + fpe(" ID-inside-ns ID-outside-ns len\n"); > + fpe("\n"); > + fpe("A map string can contain multiple records, separated" > + " by commas;\n"); > + fpe("the commas are replaced by newlines before writing" > + " to map files.\n"); > + > + exit(EXIT_FAILURE); > +} > + > +/* Update the mapping file 'map_file', with the value provided in > + 'mapping', a string that defines a UID or GID mapping. A UID or > + GID mapping consists of one or more newline-delimited records > + of the form: > + > + ID_inside-ns ID-outside-ns length > + > + Requiring the user to supply a string that contains newlines is > + of course inconvenient for command-line use. Thus, we permit the > + use of commas to delimit records in this string, and replace them > + with newlines before writing the string to the file. */ > + > +static void > +update_map(char *mapping, char *map_file) > +{ > + int fd, j; > + size_t map_len; /* Length of 'mapping' */ > + > + /* Replace commas in mapping string with newlines */ > + > + map_len = strlen(mapping); > + for (j = 0; j < map_len; j++) > + if (mapping[j] == ',') > + mapping[j] = '\n'; > + > + fd = open(map_file, O_RDWR); > + if (fd == -1) { > + fprintf(stderr, "ERROR: open %s: %s\n", map_file, > + strerror(errno)); > + exit(EXIT_FAILURE); > + } > + > + if (write(fd, mapping, map_len) != map_len) { > + fprintf(stderr, "ERROR: write %s: %s\n", map_file, > + strerror(errno)); > + exit(EXIT_FAILURE); > + } > + > + close(fd); > +} > + > +/* Linux 3.19 made a change in the handling of setgroups(2) and the > + 'gid_map' file to address a security issue. The issue allowed > + *unprivileged* users to employ user namespaces in order to drop > + The upshot of the 3.19 changes is that in order to update the > + 'gid_maps' file, use of the setgroups() system call in this > + user namespace must first be disabled by writing "deny" to one of > + the /proc/PID/setgroups files for this namespace. That is the > + purpose of the following function. */ > + > +static void > +proc_setgroups_write(pid_t child_pid, char *str) > +{ > + char setgroups_path[PATH_MAX]; > + int fd; > + > + snprintf(setgroups_path, PATH_MAX, "/proc/%ld/setgroups", > + (long) child_pid); > + > + fd = open(setgroups_path, O_RDWR); > + if (fd == -1) { > + > + /* We may be on a system that doesn't support > + /proc/PID/setgroups. In that case, the file won't exist, > + and the system won't impose the restrictions that Linux > 3.19 > + added. That's fine: we don't need to do anything in order > + to permit 'gid_map' to be updated. > + > + However, if the error from open() was something other > than > + the ENOENT error that is expected for that case, let the > + user know. */ > + > + if (errno != ENOENT) > + fprintf(stderr, "ERROR: open %s: %s\n", setgroups_path, > + strerror(errno)); > + return; > + } > + > + if (write(fd, str, strlen(str)) == -1) > + fprintf(stderr, "ERROR: write %s: %s\n", setgroups_path, > + strerror(errno)); > + > + close(fd); > +} > + > +static int /* Start function for cloned child */ > +childFunc(void *arg) > +{ > + struct child_args *args = (struct child_args *) arg; > + char ch; > + > + /* Wait until the parent has updated the UID and GID mappings. > + See the comment in main(). We wait for end of file on a > + pipe that will be closed by the parent process once it has > + updated the mappings. */ > + > + close(args->pipe_fd[1]); /* Close our descriptor for the > write > + end of the pipe so that we see > EOF > + when parent closes its descriptor > */ > + if (read(args->pipe_fd[0], &ch, 1) != 0) { > + fprintf(stderr, > + "Failure in child: read from pipe returned != 0\n"); > + exit(EXIT_FAILURE); > + } > + > + /* Execute a shell command */ > + > + printf("About to exec %s\n", args->argv[0]); > + execvp(args->argv[0], args->argv); > + errExit("execvp"); > +} > + > +#define STACK_SIZE (1024 * 1024) > + > +static char child_stack[STACK_SIZE]; /* Space for child's stack > */ > + > +int > +main(int argc, char *argv[]) > +{ > + int flags, opt, map_zero; > + pid_t child_pid; > + struct child_args args; > + char *uid_map, *gid_map; > + const int MAP_BUF_SIZE = 100; > + char map_buf[MAP_BUF_SIZE]; > + char map_path[PATH_MAX]; > + > + /* Parse command-line options. The initial '+' character in > + the final getopt() argument prevents GNU-style permutation > + of command-line options. That's useful, since sometimes > + the 'command' to be executed by this program itself > + has command-line options. We don't want getopt() to treat > + those as options to this program. */ > + > + flags = 0; > + verbose = 0; > + gid_map = NULL; > + uid_map = NULL; > + map_zero = 0; > + while ((opt = getopt(argc, argv, "+imnpuUM:G:zv")) != -1) { > + switch (opt) { > + case 'i': flags |= CLONE_NEWIPC; break; > + case 'm': flags |= CLONE_NEWNS; break; > + case 'n': flags |= CLONE_NEWNET; break; > + case 'p': flags |= CLONE_NEWPID; break; > + case 'u': flags |= CLONE_NEWUTS; break; > + case 'v': verbose = 1; break; > + case 'z': map_zero = 1; break; > + case 'M': uid_map = optarg; break; > + case 'G': gid_map = optarg; break; > + case 'U': flags |= CLONE_NEWUSER; break; > + default: usage(argv[0]); > + } > + } > + > + /* -M or -G without -U is nonsensical */ > + > + if (((uid_map != NULL || gid_map != NULL || map_zero) && > + !(flags & CLONE_NEWUSER)) || > + (map_zero && (uid_map != NULL || gid_map != NULL))) > + usage(argv[0]); > + > + args.argv = &argv[optind]; > + > + /* We use a pipe to synchronize the parent and child, in order > to > + ensure that the parent sets the UID and GID maps before the > child > + calls execve(). This ensures that the child maintains its > + capabilities during the execve() in the common case where we > + want to map the child's effective user ID to 0 in the new > user > + namespace. Without this synchronization, the child would lose > + its capabilities if it performed an execve() with nonzero > + user IDs (see the capabilities(7) man page for details of the > + transformation of a process's capabilities during execve()). > */ > + > + if (pipe(args.pipe_fd) == -1) > + errExit("pipe"); > + > + /* Create the child in new namespace(s) */ > + > + child_pid = clone(childFunc, child_stack + STACK_SIZE, > + flags | SIGCHLD, &args); > + if (child_pid == -1) > + errExit("clone"); > + > + /* Parent falls through to here */ > + > + if (verbose) > + printf("%s: PID of child created by clone() is %ld\n", > + argv[0], (long) child_pid); > + > + /* Update the UID and GID maps in the child */ > + > + if (uid_map != NULL || map_zero) { > + snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map", > + (long) child_pid); > + if (map_zero) { > + snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) > getuid()); > + uid_map = map_buf; > + } > + update_map(uid_map, map_path); > + } > + > + if (gid_map != NULL || map_zero) { > + proc_setgroups_write(child_pid, "deny"); > + > + snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map", > + (long) child_pid); > + if (map_zero) { > + snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) > getgid()); > + gid_map = map_buf; > + } > + update_map(gid_map, map_path); > + } > + > + /* Close the write end of the pipe, to signal to the child that > we > + have updated the UID and GID maps */ > + > + close(args.pipe_fd[1]); > + > + if (waitpid(child_pid, NULL, 0) == -1) /* Wait for child */ > + errExit("waitpid"); > + > + if (verbose) > + printf("%s: terminating\n", argv[0]); > + > + exit(EXIT_SUCCESS); > +} > -- > 2.8.0 >
diff -ru serefpolicy-3.13.1.orig/policy/flask/access_vectors serefpolicy-3.13.1/policy/flask/access_vectors --- serefpolicy-3.13.1.orig/policy/flask/access_vectors 2016-04-05 14:59:56.548712088 -0700 +++ serefpolicy-3.13.1/policy/flask/access_vectors 2016-04-05 20:29:57.981139727 -0700 @@ -925,3 +925,56 @@ { read } + +class cap_userns +{ + # The capabilities are defined in include/linux/capability.h + # Capabilities >= 32 are defined in the capability2 class. + # Care should be taken to ensure that these are consistent with + # those definitions. (Order matters) + + chown + dac_override + dac_read_search + fowner + fsetid + kill + setgid + setuid + setpcap + linux_immutable + net_bind_service + net_broadcast + net_admin + net_raw + ipc_lock + ipc_owner + sys_module + sys_rawio + sys_chroot + sys_ptrace + sys_pacct + sys_admin + sys_boot + sys_nice + sys_resource + sys_time + sys_tty_config + mknod + lease + audit_write + audit_control + setfcap +} + +class cap2_userns +{ + mac_override # unused by SELinux + mac_admin + syslog + wake_alarm + epolwakeup + block_suspend + compromise_kernel + audit_read +} diff -ru serefpolicy-3.13.1.orig/policy/flask/security_classes serefpolicy-3.13.1/policy/flask/security_classes --- serefpolicy-3.13.1.orig/policy/flask/security_classes 2016-04-05 14:59:56.548712088 -0700 +++ serefpolicy-3.13.1/policy/flask/security_classes 2016-04-05 20:29:57.981139727 -0700 @@ -149,5 +149,8 @@ # gssd services class proxy +# Capability checks when on a non-init user namespace +class cap_userns +class cap2_userns # FLASK diff -ru serefpolicy-3.13.1.orig/policy/modules/kernel/kernel.te serefpolicy-3.13.1/policy/modules/kernel/kernel.te --- serefpolicy-3.13.1.orig/policy/modules/kernel/kernel.te 2016-04-05 14:59:56.567712479 -0700 +++ serefpolicy-3.13.1/policy/modules/kernel/kernel.te 2016-04-05 21:57:44.634218241 -0700 @@ -491,6 +491,9 @@ # Rules for unconfined acccess to this module # +allow kern_unconfined self:cap_userns all_cap_userns_perms; +allow kern_unconfined self:cap2_userns all_cap2_userns_perms; + allow kern_unconfined proc_type:{ file } ~entrypoint; allow kern_unconfined proc_type:{ dir lnk_file } *;
--- selinux-policy.spec.orig 2016-04-05 15:11:01.345382448 -0700 +++ selinux-policy.spec 2016-04-05 20:28:54.912898878 -0700 @@ -19,7 +19,7 @@ Summary: SELinux policy configuration Name: selinux-policy Version: 3.13.1 -Release: 180%{?dist} +Release: 180%{?dist}.userns.1 License: GPLv2+ Group: System Environment/Base Source: serefpolicy-%{version}.tgz @@ -29,6 +29,7 @@ patch: policy-rawhide-base.patch patch1: policy-rawhide-contrib.patch patch2: policy-rawhide-base-cockpit.patch +patch3: refpolicy-define-cap-userns.patch Source1: modules-targeted-base.conf Source31: modules-targeted-contrib.conf Source2: booleans-targeted.conf @@ -324,6 +325,7 @@ %setup -n serefpolicy-%{version} -q %patch -p1 %patch2 -p1 +%patch3 -p1 refpolicy_path=`pwd` cp $contrib_path/* $refpolicy_path/policy/modules/contrib
_______________________________________________ Selinux mailing list Selinux@xxxxxxxxxxxxx To unsubscribe, send email to Selinux-leave@xxxxxxxxxxxxx. To get help, send an email containing "help" to Selinux-request@xxxxxxxxxxxxx.