Quoting Daniel P. Berrange (berrange@xxxxxxxxxx): > This patch updates the LXC driver to make use of libcap-ng for managing > process capabilities. Previously Ryota Ozaki had provided code to remove > the CAP_BOOT capabilities inside the container, preventing host reboots. > In addition to that one, I believe we should be removing ability to > load kernel modules, change the system clock and changing audit/MAC. > So this patch also clears the following: > > CAP_SYS_MODULE, /* No kernel module loading */ > CAP_SYS_TIME, /* No changing the clock */ > CAP_AUDIT_CONTROL, /* No messing with auditing */ > CAP_AUDIT_WRITE, /* No messing with auditing */ > CAP_MAC_ADMIN, /* No messing with LSM */ > CAP_MAC_OVERRIDE, /* No messing with LSM */ Thanks, Daniel, this does look good. I wonder whether there is a more appropriate list to email caps-related patches (including libcap-ng itself) to. Not only does the code itself warrant discussion (for instance, should capng_lock() also set CAP_NOSUID_FIXUP?), but such discussions, in one place, about converting several programs to dropping capabilities would help others to do the same with this code. I can't think of anything other than the LSM list, so I'm cc:ing it here. > We use libcap-ng's capng_updatev/apply functions to remove these from > the permitted, inheritable, effective and bounding sets. Then we use > capng_lock to set NOROOT and NOROOT_LOCKED in the process securebits > to prevent them ever being re-acquired. > > The other thing I realized is that the 'libvirt_lxc' controller process > does not need to keep any capabilities at all once it has spawned the > container process, since all its doing is forwarding I/O between 2 open > file descripts. So I also clear all capabilities from that. We should > probably make it chuid/gid to a non-root user in future too. > > lxc_container.c | 66 +++++++++++++++++++++++++++++++++++++------------------ > lxc_controller.c | 28 +++++++++++++++++++++++ > 2 files changed, 73 insertions(+), 21 deletions(-) > > > Regards, > Daniel > > diff -r 7e766489c4a2 src/lxc_container.c > --- a/src/lxc_container.c Tue Jun 23 11:13:45 2009 +0100 > +++ b/src/lxc_container.c Tue Jun 23 11:54:10 2009 +0100 > @@ -41,8 +41,9 @@ > /* For MS_MOVE */ > #include <linux/fs.h> > > -#include <sys/prctl.h> > -#include <linux/capability.h> > +#if HAVE_CAPNG > +#include <cap-ng.h> > +#endif > > #include "virterror_internal.h" > #include "logging.h" > @@ -642,27 +643,50 @@ static int lxcContainerSetupMounts(virDo > return lxcContainerSetupExtraMounts(vmDef); > } > > -static int lxcContainerDropCapabilities(virDomainDefPtr vmDef ATTRIBUTE_UNUSED) > + > +/* > + * This is running as the 'init' process insid the container. > + * It removes some capabilities that could be dangerous to > + * host system, since they are not currently "containerized" > + */ > +static int lxcContainerDropCapabilities(void) > { > -#ifdef PR_CAPBSET_DROP > - int i; > - const struct { > - int id; > - const char *name; > - } caps[] = { > -#define ID_STRING(name) name, #name > - { ID_STRING(CAP_SYS_BOOT) }, > - }; > +#if HAVE_CAPNG > + int ret; > > - for (i = 0 ; i < ARRAY_CARDINALITY(caps) ; i++) { > - if (prctl(PR_CAPBSET_DROP, caps[i].id, 0, 0, 0)) { > - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, > - _("failed to drop %s"), caps[i].name); > - return -1; > - } > + capng_get_caps_process(); > + > + if ((ret = capng_updatev(CAPNG_DROP, > + CAPNG_EFFECTIVE | CAPNG_PERMITTED | > + CAPNG_INHERITABLE | CAPNG_BOUNDING_SET, > + CAP_SYS_BOOT, /* No use of reboot */ > + CAP_SYS_MODULE, /* No kernel module loading */ > + CAP_SYS_TIME, /* No changing the clock */ > + CAP_AUDIT_CONTROL, /* No messing with auditing */ > + CAP_AUDIT_WRITE, /* No messing with auditing */ > + CAP_MAC_ADMIN, /* No messing with LSM */ > + CAP_MAC_OVERRIDE, /* No messing with LSM */ > + -1 /* sentinal */)) < 0) { > + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, > + _("failed to remove capabilities %d"), ret); > + return -1; > } > -#else /* ! PR_CAPBSET_DROP */ > - VIR_WARN0(_("failed to drop capabilities PR_CAPBSET_DROP undefined")); > + > + if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) { > + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, > + _("failed to apply capabilities: %d"), ret); > + return -1; > + } The only problem offhand with this idiom is that you need CAP_SETPCAP to set securebits and drop caps from bounding set, but I think a lot of applications could stand to drop CAP_SETPCAP otherwise. So I don't know if it would help to do the capng_lock() before capng_apply(). (To be clear, not bc you need to do so right here, but because others may well look at your code as example code.) > + /* Need to prevent them regaining any caps on exec */ > + if ((ret = capng_lock()) < 0) { > + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, > + _("failed to lock capabilities: %d"), ret); > + return -1; > + } > + > +#else > + VIR_WARN0(_("libcap-ng support not compiled in, unable to clear capabilities")); > #endif > return 0; > } > @@ -735,7 +759,7 @@ static int lxcContainerChild( void *data > return -1; > > /* drop a set of root capabilities */ > - if (lxcContainerDropCapabilities(vmDef) < 0) > + if (lxcContainerDropCapabilities() < 0) > return -1; > > /* this function will only return if an error occured */ > diff -r 7e766489c4a2 src/lxc_controller.c > --- a/src/lxc_controller.c Tue Jun 23 11:13:45 2009 +0100 > +++ b/src/lxc_controller.c Tue Jun 23 11:54:10 2009 +0100 > @@ -35,6 +35,10 @@ > #include <getopt.h> > #include <sys/mount.h> > > +#if HAVE_CAPNG > +#include <cap-ng.h> > +#endif > + > #include "virterror_internal.h" > #include "logging.h" > #include "util.h" > @@ -210,6 +214,25 @@ cleanup: > return rc; > } > > + > +static int lxcControllerClearCapabilities(void) > +{ > +#if HAVE_CAPNG > + int ret; > + > + capng_clear(CAPNG_SELECT_BOTH); > + > + if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) { > + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, > + _("failed to apply capabilities: %d"), ret); > + return -1; > + } > +#else > + VIR_WARN0(_("libcap-ng support not compiled in, unable to clear capabilities")); > +#endif > + return 0; > +} > + > typedef struct _lxcTtyForwardFd_t { > int fd; > int active; > @@ -562,6 +585,11 @@ lxcControllerRun(virDomainDefPtr def, > if (lxcContainerSendContinue(control[0]) < 0) > goto cleanup; > > + /* Now the container is running, there's no need for us to keep > + any elevated capabilities */ > + if (lxcControllerClearCapabilities() < 0) > + goto cleanup; > + > rc = lxcControllerMain(monitor, client, appPty, containerPty); > > cleanup: > > > -- > |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| > |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| > |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| > |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| > > -- > Libvir-list mailing list > Libvir-list@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/libvir-list -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list