On Thu, Nov 14, 2013 at 11:35:06AM +0800, Gao feng wrote: > I met a problem that container blocked by seteuid/setegid > which is call in lxcContainerSetID on UP system and libvirt > compiled with --with-fuse=yes. > > I looked into the glibc's codes, and found setxid in glibc > calls futex() to wait for other threads to change their > setxid_futex to 0(see setxid_mark_thread in glibc). > > since the process created by clone system call will not > share the memory with the other threads and the context > of memory doesn't changed until we call execl.(COW) > > So if the process which created by clone is called before > fuse thread being stated, the new setxid_futex of fuse > thread will not be saw in this process, it will be blocked > forever. > > This patch makes sure the cloned process calls setxid first, > and then the lxc controller creates fuse thread. This is not required. All we need todo is ensure that the thread is started after clone(). This avoids the async signal safety requirement from fork()+exec(), because the parent is single threaded at the point memory is made to be copy-on-write. So there is no need to synchronize with any calls in the child proccess > diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c > index c000a82..27bdcc0 100644 > --- a/src/lxc/lxc_container.c > +++ b/src/lxc/lxc_container.c > @@ -1844,9 +1844,24 @@ static int lxcContainerChild(void *data) > cmd = lxcContainerBuildInitCmd(vmDef); > virCommandWriteArgLog(cmd, 1); > > + /* call setxid before libvirt controller creates fuse thread. */ > if (lxcContainerSetID(vmDef) < 0) > goto cleanup; > > + /* rename and enable interfaces */ > + if (lxcContainerRenameAndEnableInterfaces(!!(vmDef->features & > + (1 << VIR_DOMAIN_FEATURE_PRIVNET)), > + argv->nveths, > + argv->veths) < 0) { > + goto cleanup; > + } > + > + if (lxcContainerSendContinue(argv->handshakefd) < 0) { > + virReportSystemError(errno, "%s", > + _("Failed to send continue signal to controller")); > + goto cleanup; > + } Moving the SendContinue call means that we loose all error reporting from this point onwards. > + > root = virDomainGetRootFilesystem(vmDef); > > if (argv->nttyPaths) { > @@ -1886,24 +1901,10 @@ static int lxcContainerChild(void *data) > goto cleanup; > } > > - /* rename and enable interfaces */ > - if (lxcContainerRenameAndEnableInterfaces(!!(vmDef->features & > - (1 << VIR_DOMAIN_FEATURE_PRIVNET)), > - argv->nveths, > - argv->veths) < 0) { > - goto cleanup; > - } This patch wasn't created against current git master, since this code has been changed > - > /* drop a set of root capabilities */ > if (lxcContainerDropCapabilities(!!hasReboot) < 0) > goto cleanup; > > - if (lxcContainerSendContinue(argv->handshakefd) < 0) { > - virReportSystemError(errno, "%s", > - _("Failed to send continue signal to controller")); > - goto cleanup; > - } > - > VIR_DEBUG("Setting up security labeling"); > if (virSecurityManagerSetProcessLabel(argv->securityDriver, vmDef) < 0) > goto cleanup; In fact all these changes in this file are bogus. > diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c > index c8f68c0..5d1ec49 100644 > --- a/src/lxc/lxc_controller.c > +++ b/src/lxc/lxc_controller.c > @@ -1981,6 +1981,12 @@ virLXCControllerSetupFuse(virLXCControllerPtr ctrl) > } > > static int > +virLXCControllerStartFuse(virLXCControllerPtr ctrl) > +{ > + return lxcStartFuse(ctrl->fuse); > +} > + > +static int > virLXCControllerSetupConsoles(virLXCControllerPtr ctrl, > char **containerTTYPaths) > { > @@ -2197,7 +2203,9 @@ virLXCControllerRun(virLXCControllerPtr ctrl) > goto cleanup; > } > > - /* Now the container is fully setup... */ > + /* container has already called setxid, we can create thread now.*/ > + if (virLXCControllerStartFuse(ctrl) < 0) > + goto cleanup; > > /* ...and reduce our privileges */ > if (lxcControllerClearCapabilities() < 0) > diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c > index 9d12832..88e122e 100644 > --- a/src/lxc/lxc_fuse.c > +++ b/src/lxc/lxc_fuse.c > @@ -322,12 +322,6 @@ int lxcSetupFuse(virLXCFusePtr *f, virDomainDefPtr def) > goto cleanup1; > } > > - if (virThreadCreate(&fuse->thread, false, lxcFuseRun, > - (void *)fuse) < 0) { > - lxcFuseDestroy(fuse); > - goto cleanup1; > - } > - > ret = 0; > cleanup: > fuse_opt_free_args(&args); > @@ -341,6 +335,17 @@ cleanup2: > goto cleanup; > } > > +int lxcStartFuse(virLXCFusePtr fuse) > +{ > + if (virThreadCreate(&fuse->thread, false, lxcFuseRun, > + (void *)fuse) < 0) { > + lxcFuseDestroy(fuse); > + return -1; > + } > + > + return 0; > +} > + > void lxcFreeFuse(virLXCFusePtr *f) > { > virLXCFusePtr fuse = *f; > @@ -364,6 +369,10 @@ int lxcSetupFuse(virLXCFusePtr *f ATTRIBUTE_UNUSED, > return 0; > } > > +int lxcStartFuse(virLXCFusePtr f ATTRIBUTE_UNUSED) > +{ > +} > + > void lxcFreeFuse(virLXCFusePtr *f ATTRIBUTE_UNUSED) > { > } > diff --git a/src/lxc/lxc_fuse.h b/src/lxc/lxc_fuse.h > index b3713af..d60492b 100644 > --- a/src/lxc/lxc_fuse.h > +++ b/src/lxc/lxc_fuse.h > @@ -58,6 +58,7 @@ struct virLXCFuse { > typedef struct virLXCFuse *virLXCFusePtr; > > extern int lxcSetupFuse(virLXCFusePtr *f, virDomainDefPtr def); > +extern int lxcStartFuse(virLXCFusePtr f); > extern void lxcFreeFuse(virLXCFusePtr *f); > I've copied you on a much simpler patch which avoids the race in my testing. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list