On 6/25/24 12:03, hongmianquan via Devel wrote: > We need to ensure top lock is acquired before nested lock. Otherwise deadlock > issues may arise. We have the stack security driver, which internally manages > other security drivers, we call them "top" and "nested". > > We call virSecurityStackPreFork() to lock the top one, and it also locks > and then unlocks the nested drivers prior to fork. Then in qemuSecurityPostFork(), > it unlocks the top one, but not the nested ones. Thus, if one of the nested > drivers ("dac" or "selinux") is still locked, it will cause a deadlock. > We discovered this case: the nested list obtained through the qemuSecurityGetNested() > will be locked for subsequent use, such as in virQEMUDriverCreateCapabilities(), > where the nested list is locked using qemuSecurityGetDOI, but the top one is not locked beforehand. > > The problem stack is as follows: > > libvirtd thread1 libvirtd thread2 child libvirtd > | | | > | | | > virsh capabilities qemuProcessLanuch | > | | | > | lock top | > | | | > lock nested | | > | | | > | fork------------------->|(held nested lock) > | | | > | | | > unlock nested unlock top unlock top > | > | > qemuSecuritySetSocketLabel > | > | > lock nested (deadlock) > > In this commit, we ensure that the top lock is acquired before the nested lock, > so during fork, it's not possible for another task to acquire the nested lock. > > Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1303031 > > Signed-off-by: hongmianquan <hongmianquan@xxxxxxxxxxxxx> > --- > src/libvirt_private.syms | 3 ++- > src/qemu/qemu_conf.c | 9 ++++++++- > src/qemu/qemu_driver.c | 16 +++++++++------- > src/qemu/qemu_security.h | 2 ++ > src/security/security_manager.c | 22 ++++++++++++++++++++++ > src/security/security_manager.h | 2 ++ > 6 files changed, 45 insertions(+), 9 deletions(-) > > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > index bac4a8a366..39cdb90772 100644 > --- a/src/libvirt_private.syms > +++ b/src/libvirt_private.syms > @@ -1806,7 +1806,8 @@ virSecurityManagerTransactionAbort; > virSecurityManagerTransactionCommit; > virSecurityManagerTransactionStart; > virSecurityManagerVerify; > - > +virSecurityManagerStackLock; > +virSecurityManagerStackUnlock; > This needs to be ordered differently. I believe 'ninja test' complains about ordering. > # security/security_util.h > virSecurityXATTRNamespaceDefined; > diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c > index 4050a82341..21f0739fd5 100644 > --- a/src/qemu/qemu_conf.c > +++ b/src/qemu/qemu_conf.c > @@ -1380,6 +1380,9 @@ virCaps *virQEMUDriverCreateCapabilities(virQEMUDriver *driver) > return NULL; > } > > + /* Ensure top lock is acquired before nested locks */ > + qemuSecurityStackLock(driver->securityManager); > + > /* access sec drivers and create a sec model for each one */ > if (!(sec_managers = qemuSecurityGetNested(driver->securityManager))) > return NULL; > @@ -1402,8 +1405,10 @@ virCaps *virQEMUDriverCreateCapabilities(virQEMUDriver *driver) There's one early 'return' in between these two hunks. It'll also need to call qemuSecurityStackUnlock(). > lbl = qemuSecurityGetBaseLabel(sec_managers[i], virtTypes[j]); > type = virDomainVirtTypeToString(virtTypes[j]); > if (lbl && > - virCapabilitiesHostSecModelAddBaseLabel(sm, type, lbl) < 0) > + virCapabilitiesHostSecModelAddBaseLabel(sm, type, lbl) < 0) { > + qemuSecurityStackUnlock(driver->securityManager); > return NULL; > + } > } > > VIR_DEBUG("Initialized caps for security driver \"%s\" with " > @@ -1412,6 +1417,8 @@ virCaps *virQEMUDriverCreateCapabilities(virQEMUDriver *driver) > > caps->host.numa = virCapabilitiesHostNUMANewHost(); > caps->host.cpu = virQEMUDriverGetHostCPU(driver); > + > + qemuSecurityStackUnlock(driver->securityManager); I believe this can be moved before virCapabilitiesHostNUMANewHost() line. > return g_steal_pointer(&caps); > } > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index fc1704f4fc..c980a0990f 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -560,7 +560,6 @@ qemuStateInitialize(bool privileged, > bool autostart = true; > size_t i; > const char *defsecmodel = NULL; > - g_autofree virSecurityManager **sec_managers = NULL; > g_autoptr(virIdentity) identity = virIdentityGetCurrent(); > > qemu_driver = g_new0(virQEMUDriver, 1); > @@ -835,11 +834,8 @@ qemuStateInitialize(bool privileged, > if (!qemu_driver->qemuCapsCache) > goto error; > > - if (!(sec_managers = qemuSecurityGetNested(qemu_driver->securityManager))) > - goto error; > - > - if (sec_managers[0] != NULL) > - defsecmodel = qemuSecurityGetModel(sec_managers[0]); > + if (qemu_driver->securityManager != NULL) > + defsecmodel = qemuSecurityGetModel(qemu_driver->securityManager); I'd prefer this change to be a separate patch as it's semantically different change. > > if (!(qemu_driver->xmlopt = virQEMUDriverCreateXMLConf(qemu_driver, > defsecmodel))) > @@ -5663,7 +5659,12 @@ static int qemuDomainGetSecurityLabelList(virDomainPtr dom, > ret = 0; > } else { > int len = 0; > - virSecurityManager ** mgrs = qemuSecurityGetNested(driver->securityManager); > + virSecurityManager ** mgrs = NULL; > + > + /* Ensure top lock is acquired before nested locks */ > + qemuSecurityStackLock(driver->securityManager); > + > + mgrs = qemuSecurityGetNested(driver->securityManager); > if (!mgrs) > goto cleanup; > > @@ -5688,6 +5689,7 @@ static int qemuDomainGetSecurityLabelList(virDomainPtr dom, > } > > cleanup: > + qemuSecurityStackUnlock(driver->securityManager); This label is jumped onto also from other places which did not call qemuSecurityStackLock(). > virDomainObjEndAPI(&vm); > return ret; > } Otherwise makes sense. Michal