For https://bugzilla.redhat.com/show_bug.cgi?id=1066801 The nwfilter conf update mutex previously serialized updates to the internal data structures for firewall rules, and updates to the firewall itself. The latter was recently turned into a read/write lock, and filter instantiation allowed to proceed in parallel. It was believed that this was ok, since each filter is created on a seperate iptables/ebtables chain. It turns out that there is a sutle lock ordering problem on virNWFilterObjPtr instances. __virNWFilterInstantiateFilter will hold a lock on the virNWFilterObjPtr it is instantiating. This in turn invokes virNWFilterInstantiate which then invokes virNWFilterDetermineMissingVarsRec which then invokes virNWFilterObjFindByName. This iterates over every single virNWFilterObjPtr in the list, locking them and checking their name. So if 2 or more threads try to instantiate a filter in parallel, they'll all hold 1 lock at the top level in the __virNWFilterInstantiateFilter method which will cause the other thread to deadlock in virNWFilterObjFindByName. The fix is to add an exclusive mutex to serialize the execution of __virNWFilterInstantiateFilter. Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx> --- src/nwfilter/nwfilter_driver.c | 6 ++++-- src/nwfilter/nwfilter_gentech_driver.c | 34 ++++++++++++++++++++++++++++++++-- src/nwfilter/nwfilter_gentech_driver.h | 2 +- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/nwfilter/nwfilter_driver.c b/src/nwfilter/nwfilter_driver.c index 5908df7..2e89d07 100644 --- a/src/nwfilter/nwfilter_driver.c +++ b/src/nwfilter/nwfilter_driver.c @@ -200,7 +200,8 @@ nwfilterStateInitialize(bool privileged, if (virNWFilterDHCPSnoopInit() < 0) goto err_exit_learnshutdown; - virNWFilterTechDriversInit(privileged); + if (virNWFilterTechDriversInit(privileged) < 0) + goto err_dhcpsnoop_shutdown; if (virNWFilterConfLayerInit(virNWFilterDomainFWUpdateCB, driverState) < 0) @@ -251,6 +252,7 @@ error: err_techdrivers_shutdown: virNWFilterTechDriversShutdown(); +err_dhcpsnoop_shutdown: virNWFilterDHCPSnoopShutdown(); err_exit_learnshutdown: virNWFilterLearnShutdown(); @@ -327,10 +329,10 @@ nwfilterStateCleanup(void) { if (driverState->privileged) { virNWFilterConfLayerShutdown(); - virNWFilterTechDriversShutdown(); virNWFilterDHCPSnoopShutdown(); virNWFilterLearnShutdown(); virNWFilterIPAddrMapShutdown(); + virNWFilterTechDriversShutdown(); nwfilterDriverLock(driverState); diff --git a/src/nwfilter/nwfilter_gentech_driver.c b/src/nwfilter/nwfilter_gentech_driver.c index 8c5cd57..5144dce 100644 --- a/src/nwfilter/nwfilter_gentech_driver.c +++ b/src/nwfilter/nwfilter_gentech_driver.c @@ -55,15 +55,34 @@ static virNWFilterTechDriverPtr filter_tech_drivers[] = { NULL }; +/* Serializes instantiation of filters. This is neccessary + * to avoid lock ordering deadlocks. eg __virNWFilterInstantiateFilter + * will hold a lock on a virNWFilterObjPtr. This in turn invokes + * virNWFilterInstantiate which invokes virNWFilterDetermineMissingVarsRec + * which invokes virNWFilterObjFindByName. This iterates over every single + * virNWFilterObjPtr in the list. So if 2 threads try to instantiate a + * filter in parallel, they'll both hold 1 lock at the top level in + * __virNWFilterInstantiateFilter which will cause the other thread + * to dead lock in virNWFilterObjFindByName. + * + * XXX better long term solution is to make virNWFilterObjList use a + * hash table as is done for virDomainObjList. You can then get + * lockless lookup of objects by name. + */ +static virMutex updateMutex; -void virNWFilterTechDriversInit(bool privileged) { +int virNWFilterTechDriversInit(bool privileged) { size_t i = 0; VIR_DEBUG("Initializing NWFilter technology drivers"); + if (virMutexInitRecursive(&updateMutex) < 0) + return -1; + while (filter_tech_drivers[i]) { if (!(filter_tech_drivers[i]->flags & TECHDRV_FLAG_INITIALIZED)) filter_tech_drivers[i]->init(privileged); i++; } + return 0; } @@ -74,6 +93,7 @@ void virNWFilterTechDriversShutdown(void) { filter_tech_drivers[i]->shutdown(); i++; } + virMutexDestroy(&updateMutex); } @@ -935,6 +955,8 @@ _virNWFilterInstantiateFilter(virNWFilterDriverStatePtr driver, int ifindex; int rc; + virMutexLock(&updateMutex); + /* after grabbing the filter update lock check for the interface; if it's not there anymore its filters will be or are being removed (while holding the lock) and we don't want to build new ones */ @@ -962,6 +984,8 @@ _virNWFilterInstantiateFilter(virNWFilterDriverStatePtr driver, foundNewFilter); cleanup: + virMutexUnlock(&updateMutex); + return rc; } @@ -981,6 +1005,7 @@ virNWFilterInstantiateFilterLate(virNWFilterDriverStatePtr driver, bool foundNewFilter = false; virNWFilterReadLockFilterUpdates(); + virMutexLock(&updateMutex); rc = __virNWFilterInstantiateFilter(driver, vmuuid, @@ -1006,6 +1031,7 @@ virNWFilterInstantiateFilterLate(virNWFilterDriverStatePtr driver, } virNWFilterUnlockFilterUpdates(); + virMutexUnlock(&updateMutex); return rc; } @@ -1129,7 +1155,11 @@ _virNWFilterTeardownFilter(const char *ifname) int virNWFilterTeardownFilter(const virDomainNetDef *net) { - return _virNWFilterTeardownFilter(net->ifname); + int ret; + virMutexLock(&updateMutex); + ret = _virNWFilterTeardownFilter(net->ifname); + virMutexUnlock(&updateMutex); + return ret; } diff --git a/src/nwfilter/nwfilter_gentech_driver.h b/src/nwfilter/nwfilter_gentech_driver.h index f4789e1..d72e040 100644 --- a/src/nwfilter/nwfilter_gentech_driver.h +++ b/src/nwfilter/nwfilter_gentech_driver.h @@ -31,7 +31,7 @@ virNWFilterTechDriverPtr virNWFilterTechDriverForName(const char *name); int virNWFilterRuleInstAddData(virNWFilterRuleInstPtr res, void *data); -void virNWFilterTechDriversInit(bool privileged); +int virNWFilterTechDriversInit(bool privileged); void virNWFilterTechDriversShutdown(void); enum instCase { -- 1.8.5.3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list