On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote:
When doing some job holding state lock for a long time, we may come across error: "Timed out during operation: cannot acquire state change lock" Well, sometimes it's not a problem and users wanner continue
s/wanner/want to/
to wait, and this patch allow users decide how long time they can wait the state lock. Signed-off-by: Yi Wang <wang.yi59@xxxxxxxxxx> Reviewed-by: Xi Xu <xu.xi8@xxxxxxxxxx> --- changes in v4: - fox syntax-check error changes in v3: - add user-friendly description and nb of state lock - check validity of stateLockTimeout changes in v2: - change default value to 30 in qemu.conf - set the default value in virQEMUDriverConfigNew() v4 Signed-off-by: Yi Wang <wang.yi59@xxxxxxxxxx> --- src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 10 ++++++++++ src/qemu/qemu_conf.c | 14 ++++++++++++++ src/qemu/qemu_conf.h | 2 ++ src/qemu/qemu_domain.c | 5 +---- src/qemu/test_libvirtd_qemu.aug.in | 1 + 6 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug index ddc4bbf..f7287ae 100644 --- a/src/qemu/libvirtd_qemu.aug +++ b/src/qemu/libvirtd_qemu.aug @@ -93,6 +93,7 @@ module Libvirtd_qemu = | limits_entry "max_core" | bool_entry "dump_guest_core" | str_entry "stdio_handler" + | int_entry "state_lock_timeout"
here you add the option at the end of the 'process_entry' group
let device_entry = bool_entry "mac_filter" | bool_entry "relaxed_acs_check" diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index cd57b3c..8920a1a 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -667,6 +667,16 @@ # #max_queued = 0 + +# When two or more threads want to work with the same domain they use a +# job lock to mutually exclude each other. However, waiting for the lock +# is limited up to state_lock_timeout seconds. +# NB, strong recommendation to set the timeout longer than 30 seconds. +# +# Default is 30 +# +#state_lock_timeout = 60
But here in qemu.conf, you add it between the rpc entries. It seems we did not follow the structure with 'stdio_handler', but adding it either right after 'dump_guest_core' or at the end of file would be better than squeezing it between rpc entries.
+ ################################################################### # Keepalive protocol: # This allows qemu driver to detect broken connections to remote diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index a4f545e..c761cae 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -129,6 +129,9 @@ void qemuDomainCmdlineDefFree(qemuDomainCmdlineDefPtr def) #endif +/* Give up waiting for mutex after 30 seconds */ +#define QEMU_JOB_WAIT_TIME (1000ull * 30) +
Here the constant is multiplied by 1000 to convert from seconds to milliseconds.
virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged) { virQEMUDriverConfigPtr cfg; @@ -346,6 +349,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged) cfg->glusterDebugLevel = 4; cfg->stdioLogD = true; + cfg->stateLockTimeout = QEMU_JOB_WAIT_TIME; + if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST))) goto error; @@ -863,6 +868,9 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg, if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0) goto cleanup; + if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout) < 0) + goto cleanup; +
But the parsed value is passed directly here, so '60' in the config file would result in 60 ms.
if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0) goto cleanup; @@ -1055,6 +1063,12 @@ virQEMUDriverConfigValidate(virQEMUDriverConfigPtr cfg) return -1; } + if (cfg->stateLockTimeout <= 0) { + virReportError(VIR_ERR_CONF_SYNTAX, "%s", + _("state_lock_timeout should larger than zero")); + return -1; + } + return 0; } diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index a8d84ef..97cf2e1 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -190,6 +190,8 @@ struct _virQEMUDriverConfig { int keepAliveInterval; unsigned int keepAliveCount; + int stateLockTimeout; + int seccompSandbox; char *migrateHost; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 886e3fb..5a2ca52 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -6652,9 +6652,6 @@ qemuDomainObjCanSetJob(qemuDomainObjPrivatePtr priv, priv->job.agentActive == QEMU_AGENT_JOB_NONE)); } -/* Give up waiting for mutex after 30 seconds */ -#define QEMU_JOB_WAIT_TIME (1000ull * 30) - /** * qemuDomainObjBeginJobInternal: * @driver: qemu driver @@ -6714,7 +6711,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, } priv->jobs_queued++; - then = now + QEMU_JOB_WAIT_TIME; + then = now + cfg->stateLockTimeout; retry: if ((!async && job != QEMU_JOB_DESTROY) && diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in index f1e8806..dc5de96 100644 --- a/src/qemu/test_libvirtd_qemu.aug.in +++ b/src/qemu/test_libvirtd_qemu.aug.in @@ -105,3 +105,4 @@ module Test_libvirtd_qemu = { "pr_helper" = "/usr/bin/qemu-pr-helper" } { "swtpm_user" = "tss" } { "swtpm_group" = "tss" } +{ "state_lock_timeout" = "60" }
This needs to be ordered properly - install the 'augeas' tool to see where 'make check' fails. Jano
Attachment:
signature.asc
Description: Digital signature
-- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list