Deadlock scenario: 1. domain is being started and domain driver takes read lock for nwfilter update lock thru virNWFilterReadLockFilterUpdates. 2. firewalld restarted and event loop hangs trying to take write lock for nwfilter update lock in nwfilterStateReload. This is deadlock because for thread 1 to proceed we need a response from qemu/timeout but whole event loop is stuck by 2. Let's just offload nwfilter driver reload to a distinct thread. Besides resolving a deadlock this change offloads rather heavy operation (many hypervisor drivers X many domains for a driver) off the event loop. --- Concrete stacktrace. Thread 17 (Thread 0x7fbd5a22d880 (LWP 7329)): 0 0x00007fbd56bb903e in pthread_rwlock_wrlock () from /lib64/libpthread.so.0 1 0x00007fbd3d511297 in nwfilterStateReload () from /usr/lib64/libvirt/connection-driver/libvirt_driver_nwfilter.so 2 0x00007fbd3d511355 in nwfilterFirewalldDBusFilter () from /usr/lib64/libvirt/connection-driver/libvirt_driver_nwfilter.so 3 0x00007fbd57bc5dfe in dbus_connection_dispatch () from /lib64/libdbus-1.so.3 4 0x00007fbd5953e641 in virDBusWatchCallback () from /lib64/libvirt.so.0 5 0x00007fbd5954868e in virEventPollRunOnce () from /lib64/libvirt.so.0 6 0x00007fbd59547942 in virEventRunDefaultImpl () from /lib64/libvirt.so.0 7 0x00007fbd596a94ed in virNetDaemonRun () from /lib64/libvirt.so.0 8 0x00007fbd5a28b07c in main () Thread 3 (Thread 0x7fbd49553700 (LWP 8029)): 0 0x00007fbd56bb96d5 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 1 0x00007fbd59596816 in virCondWait () from /lib64/libvirt.so.0 2 0x00007fbd3d0183db in qemuMonitorSend () from /usr/lib64/libvirt/connection-driver/libvirt_driver_qemu.so 3 0x00007fbd3d02c8d0 in qemuMonitorJSONCommandWithFd () from /usr/lib64/libvirt/connection-driver/libvirt_driver_qemu.so 4 0x00007fbd3d02e1a1 in qemuMonitorJSONSetCapabilities () from /usr/lib64/libvirt/connection-driver/libvirt_driver_qemu.so 5 0x00007fbd3cff965c in qemuConnectMonitor () from /usr/lib64/libvirt/connection-driver/libvirt_driver_qemu.so 6 0x00007fbd3cffcb70 in qemuProcessWaitForMonitor () from /usr/lib64/libvirt/connection-driver/libvirt_driver_qemu.so 7 0x00007fbd3d004668 in qemuProcessLaunch () from /usr/lib64/libvirt/connection-driver/libvirt_driver_qemu.so 8 0x00007fbd3d0062a8 in qemuProcessStart () from /usr/lib64/libvirt/connection-driver/libvirt_driver_qemu.so 9 0x00007fbd3d0679cb in qemuDomainObjStart.constprop.50 () from /usr/lib64/libvirt/connection-driver/libvirt_driver_qemu.so 10 0x00007fbd3d068106 in qemuDomainCreateWithFlags () from /usr/lib64/libvirt/connection-driver/libvirt_driver_qemu.so 11 0x00007fbd5964247d in virDomainCreateWithFlags () from /lib64/libvirt.so.0 12 0x00007fbd5a2a3b81 in remoteDispatchDomainCreateWithFlagsHelper () 13 0x00007fbd596af3c3 in virNetServerProgramDispatch () from /lib64/libvirt.so.0 14 0x00007fbd5a2c56cd in virNetServerHandleJob () 15 0x00007fbd59597221 in virThreadPoolWorker () from /lib64/libvirt.so.0 16 0x00007fbd595965a8 in virThreadHelper () from /lib64/libvirt.so.0 17 0x00007fbd56bb5dc5 in start_thread () from /lib64/libpthread.so.0 18 0x00007fbd568e473d in clone () from /lib64/libc.so.6 src/nwfilter/nwfilter_driver.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/nwfilter/nwfilter_driver.c b/src/nwfilter/nwfilter_driver.c index 2f9a51c..bcb4400 100644 --- a/src/nwfilter/nwfilter_driver.c +++ b/src/nwfilter/nwfilter_driver.c @@ -81,6 +81,12 @@ static void nwfilterDriverUnlock(void) #if HAVE_FIREWALLD +static void +nwfilterStateReloadAdapter(void *opaque ATTRIBUTE_UNUSED) +{ + nwfilterStateReload(); +} + static DBusHandlerResult nwfilterFirewalldDBusFilter(DBusConnection *connection ATTRIBUTE_UNUSED, DBusMessage *message, @@ -90,8 +96,12 @@ nwfilterFirewalldDBusFilter(DBusConnection *connection ATTRIBUTE_UNUSED, "NameOwnerChanged") || dbus_message_is_signal(message, "org.fedoraproject.FirewallD1", "Reloaded")) { + virThread thread; + VIR_DEBUG("Reload in nwfilter_driver because of firewalld."); - nwfilterStateReload(); + if (virThreadCreate(&thread, false, nwfilterStateReloadAdapter, NULL) < 0) + VIR_ERROR(_("Could not create thread. Network filter " + "driver reload failed")); } return DBUS_HANDLER_RESULT_NOT_YET_HANDLED; -- 1.8.3.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list