Serge E. Hallyn wrote: > Quoting Daniel P. Berrange (berrange@xxxxxxxxxx): > >> This patch updates the LXC driver to make use of libcap-ng for managing >> process capabilities. Previously Ryota Ozaki had provided code to remove >> the CAP_BOOT capabilities inside the container, preventing host reboots. >> In addition to that one, I believe we should be removing ability to >> load kernel modules, change the system clock and changing audit/MAC. >> So this patch also clears the following: >> >> CAP_SYS_MODULE, /* No kernel module loading */ >> CAP_SYS_TIME, /* No changing the clock */ >> CAP_AUDIT_CONTROL, /* No messing with auditing */ >> CAP_AUDIT_WRITE, /* No messing with auditing */ >> CAP_MAC_ADMIN, /* No messing with LSM */ >> CAP_MAC_OVERRIDE, /* No messing with LSM */ >> What is going to run inside your container? Turning off the MAC capabilities can seriously limit the programs that can run inside it. If you can't drop CAP_DAC_OVERRIDE or CAP_KILL it's unlikely that it makes sense to drop CAP_MAC_OVERRIDE. Similarly, if you can't drop CAP_FOWNER or CAP_CHOWN you'll probably be ill advised to forgo CAP_MAC_ADMIN. > > Thanks, Daniel, this does look good. I wonder whether there is a more > appropriate list to email caps-related patches (including libcap-ng > itself) to. Not only does the code itself warrant discussion (for > instance, should capng_lock() also set CAP_NOSUID_FIXUP?), but such > discussions, in one place, about converting several programs to dropping > capabilities would help others to do the same with this code. > > I can't think of anything other than the LSM list, so I'm cc:ing it > here. > > >> We use libcap-ng's capng_updatev/apply functions to remove these from >> the permitted, inheritable, effective and bounding sets. Then we use >> capng_lock to set NOROOT and NOROOT_LOCKED in the process securebits >> to prevent them ever being re-acquired. >> >> The other thing I realized is that the 'libvirt_lxc' controller process >> does not need to keep any capabilities at all once it has spawned the >> container process, since all its doing is forwarding I/O between 2 open >> file descripts. So I also clear all capabilities from that. We should >> probably make it chuid/gid to a non-root user in future too. >> >> lxc_container.c | 66 +++++++++++++++++++++++++++++++++++++------------------ >> lxc_controller.c | 28 +++++++++++++++++++++++ >> 2 files changed, 73 insertions(+), 21 deletions(-) >> >> >> Regards, >> Daniel >> >> diff -r 7e766489c4a2 src/lxc_container.c >> --- a/src/lxc_container.c Tue Jun 23 11:13:45 2009 +0100 >> +++ b/src/lxc_container.c Tue Jun 23 11:54:10 2009 +0100 >> @@ -41,8 +41,9 @@ >> /* For MS_MOVE */ >> #include <linux/fs.h> >> >> -#include <sys/prctl.h> >> -#include <linux/capability.h> >> +#if HAVE_CAPNG >> +#include <cap-ng.h> >> +#endif >> >> #include "virterror_internal.h" >> #include "logging.h" >> @@ -642,27 +643,50 @@ static int lxcContainerSetupMounts(virDo >> return lxcContainerSetupExtraMounts(vmDef); >> } >> >> -static int lxcContainerDropCapabilities(virDomainDefPtr vmDef ATTRIBUTE_UNUSED) >> + >> +/* >> + * This is running as the 'init' process insid the container. >> + * It removes some capabilities that could be dangerous to >> + * host system, since they are not currently "containerized" >> + */ >> +static int lxcContainerDropCapabilities(void) >> { >> -#ifdef PR_CAPBSET_DROP >> - int i; >> - const struct { >> - int id; >> - const char *name; >> - } caps[] = { >> -#define ID_STRING(name) name, #name >> - { ID_STRING(CAP_SYS_BOOT) }, >> - }; >> +#if HAVE_CAPNG >> + int ret; >> >> - for (i = 0 ; i < ARRAY_CARDINALITY(caps) ; i++) { >> - if (prctl(PR_CAPBSET_DROP, caps[i].id, 0, 0, 0)) { >> - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, >> - _("failed to drop %s"), caps[i].name); >> - return -1; >> - } >> + capng_get_caps_process(); >> + >> + if ((ret = capng_updatev(CAPNG_DROP, >> + CAPNG_EFFECTIVE | CAPNG_PERMITTED | >> + CAPNG_INHERITABLE | CAPNG_BOUNDING_SET, >> + CAP_SYS_BOOT, /* No use of reboot */ >> + CAP_SYS_MODULE, /* No kernel module loading */ >> + CAP_SYS_TIME, /* No changing the clock */ >> + CAP_AUDIT_CONTROL, /* No messing with auditing */ >> + CAP_AUDIT_WRITE, /* No messing with auditing */ >> + CAP_MAC_ADMIN, /* No messing with LSM */ >> + CAP_MAC_OVERRIDE, /* No messing with LSM */ >> + -1 /* sentinal */)) < 0) { >> + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, >> + _("failed to remove capabilities %d"), ret); >> + return -1; >> } >> -#else /* ! PR_CAPBSET_DROP */ >> - VIR_WARN0(_("failed to drop capabilities PR_CAPBSET_DROP undefined")); >> + >> + if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) { >> + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, >> + _("failed to apply capabilities: %d"), ret); >> + return -1; >> + } >> > > The only problem offhand with this idiom is that you need CAP_SETPCAP to > set securebits and drop caps from bounding set, but I think a lot of > applications could stand to drop CAP_SETPCAP otherwise. So I don't know > if it would help to do the capng_lock() before capng_apply(). > > (To be clear, not bc you need to do so right here, but because others > may well look at your code as example code.) > > >> + /* Need to prevent them regaining any caps on exec */ >> + if ((ret = capng_lock()) < 0) { >> + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, >> + _("failed to lock capabilities: %d"), ret); >> + return -1; >> + } >> + >> +#else >> + VIR_WARN0(_("libcap-ng support not compiled in, unable to clear capabilities")); >> #endif >> return 0; >> } >> @@ -735,7 +759,7 @@ static int lxcContainerChild( void *data >> return -1; >> >> /* drop a set of root capabilities */ >> - if (lxcContainerDropCapabilities(vmDef) < 0) >> + if (lxcContainerDropCapabilities() < 0) >> return -1; >> >> /* this function will only return if an error occured */ >> diff -r 7e766489c4a2 src/lxc_controller.c >> --- a/src/lxc_controller.c Tue Jun 23 11:13:45 2009 +0100 >> +++ b/src/lxc_controller.c Tue Jun 23 11:54:10 2009 +0100 >> @@ -35,6 +35,10 @@ >> #include <getopt.h> >> #include <sys/mount.h> >> >> +#if HAVE_CAPNG >> +#include <cap-ng.h> >> +#endif >> + >> #include "virterror_internal.h" >> #include "logging.h" >> #include "util.h" >> @@ -210,6 +214,25 @@ cleanup: >> return rc; >> } >> >> + >> +static int lxcControllerClearCapabilities(void) >> +{ >> +#if HAVE_CAPNG >> + int ret; >> + >> + capng_clear(CAPNG_SELECT_BOTH); >> + >> + if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) { >> + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, >> + _("failed to apply capabilities: %d"), ret); >> + return -1; >> + } >> +#else >> + VIR_WARN0(_("libcap-ng support not compiled in, unable to clear capabilities")); >> +#endif >> + return 0; >> +} >> + >> typedef struct _lxcTtyForwardFd_t { >> int fd; >> int active; >> @@ -562,6 +585,11 @@ lxcControllerRun(virDomainDefPtr def, >> if (lxcContainerSendContinue(control[0]) < 0) >> goto cleanup; >> >> + /* Now the container is running, there's no need for us to keep >> + any elevated capabilities */ >> + if (lxcControllerClearCapabilities() < 0) >> + goto cleanup; >> + >> rc = lxcControllerMain(monitor, client, appPty, containerPty); >> >> cleanup: >> >> >> -- >> |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| >> |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| >> |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| >> |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| >> >> -- >> Libvir-list mailing list >> Libvir-list@xxxxxxxxxx >> https://www.redhat.com/mailman/listinfo/libvir-list >> > -- > To unsubscribe from this list: send the line "unsubscribe linux-security-module" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list