On 11/13/19 11:06 PM, Jonathon Jongsma wrote: > Some layered products such as oVirt have requested a way to avoid being > blocked by guest agent commands when querying a loaded vm. For example, > many guest agent commands are polled periodically to monitor changes, > and rather than blocking the calling process, they'd prefer to simply > time out when an agent query is taking too long. > > This patch adds a way for the user to specify a custom agent timeout > that is applied to all agent commands. > > One special case to note here is the 'guest-sync' command. 'guest-sync' > is issued internally prior to calling any other command. (For example, > when libvirt wants to call 'guest-get-fsinfo', we first call > 'guest-sync' and then call 'guest-get-fsinfo'). > > Previously, the 'guest-sync' command used a 5-second timeout > (VIR_DOMAIN_QEMU_AGENT_COMMAND_DEFAULT), whereas the actual command that > followed always blocked indefinitely > (VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK). As part of this patch, if a > custom timeout is specified that is shorter than > 5 seconds, this new timeout is also used for 'guest-sync'. If there is > no custom timeout or if the custom timeout is longer than 5 seconds, we > will continue to use the 5-second timeout. > > Signed-off-by: Jonathon Jongsma <jjongsma@xxxxxxxxxx> > --- > Changes in v3: > - Changed enum names per Daniel's suggestion > - incorporated Michal's review and fixup patches > - As discussed on the mailing list, this patch does not acquire any jobs, but > simply uses mutex locking for data integrity > - libvirt release versions updated > - format and parse private agentTimeout data in status xml > > include/libvirt/libvirt-domain.h | 10 +++ > include/libvirt/libvirt-qemu.h | 8 +-- > src/driver-hypervisor.h | 6 ++ > src/libvirt-domain.c | 49 +++++++++++++ > src/libvirt_public.syms | 5 ++ > src/qemu/qemu_agent.c | 70 ++++++++++--------- > src/qemu/qemu_agent.h | 3 + > src/qemu/qemu_domain.c | 6 ++ > src/qemu/qemu_domain.h | 1 + > src/qemu/qemu_driver.c | 47 +++++++++++++ > src/remote/remote_driver.c | 1 + > src/remote/remote_protocol.x | 18 ++++- > src/remote_protocol-structs | 9 +++ > .../blockjob-blockdev-in.xml | 1 + > .../blockjob-mirror-in.xml | 1 + > .../disk-secinfo-upgrade-out.xml | 1 + > .../migration-in-params-in.xml | 1 + > .../migration-out-nbd-out.xml | 1 + > .../migration-out-nbd-tls-out.xml | 1 + > .../migration-out-params-in.xml | 1 + > tests/qemustatusxml2xmldata/modern-in.xml | 1 + > .../qemustatusxml2xmldata/vcpus-multi-in.xml | 1 + > tools/virsh-domain.c | 52 ++++++++++++++ > 23 files changed, 257 insertions(+), 37 deletions(-) Close enough :-) > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index 54dde34f15..86358341d5 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c > @@ -2093,6 +2093,8 @@ qemuDomainObjPrivateAlloc(void *opaque) > if (!(priv->dbusVMStates = virHashCreate(5, dbusVMStateHashFree))) > goto error; > > + /* agent commands block by default, user can choose different behavior */ > + priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK; > priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX; > priv->driver = opaque; > > @@ -2875,6 +2877,8 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf, > if (qemuDomainObjPrivateXMLFormatSlirp(buf, vm) < 0) > return -1; > > + virBufferAsprintf(buf, "<agentTimeout>%i</agentTimeout>\n", priv->agentTimeout); > + > return 0; > } > > @@ -3672,6 +3676,8 @@ qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, > > priv->memPrealloc = virXPathBoolean("boolean(./memPrealloc)", ctxt) == 1; > > + virXPathInt("string(./agentTimeout)", ctxt, &priv->agentTimeout); This is not safe. If the value in status XML is not valid number, ignoring the error is not good. > + > return 0; > > error: > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h > index ab00b25789..98a9540275 100644 > --- a/src/qemu/qemu_domain.h > +++ b/src/qemu/qemu_domain.h > @@ -293,6 +293,7 @@ struct _qemuDomainObjPrivate { > virDomainChrSourceDefPtr monConfig; > bool monError; > unsigned long long monStart; > + int agentTimeout; > > qemuAgentPtr agent; > bool agentError; > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index c969a3d463..77a26ccf2e 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -22666,6 +22666,52 @@ qemuDomainGetGuestInfo(virDomainPtr dom, > return ret; > } > > + > +static int > +qemuDomainAgentSetResponseTimeout(virDomainPtr dom, > + int timeout, > + unsigned int flags) > +{ > + virDomainObjPtr vm = NULL; > + int ret = -1; > + > + virCheckFlags(0, -1); > + > + if (timeout < VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN) { > + virReportError(VIR_ERR_INVALID_ARG, > + _("guest agent timeout '%d' is " > + "less than the minimum '%d'"), > + timeout, VIR_DOMAIN_QEMU_AGENT_COMMAND_MIN); > + return -1; > + } > + > + if (!(vm = qemuDomainObjFromDomain(dom))) > + return -1; > + > + if (virDomainAgentSetResponseTimeoutEnsureACL(dom->conn, vm->def) < 0) > + goto cleanup; > + > + /* If domain has an agent, change its timeout. Otherwise just save the > + * request so that we can set the timeout when the agent appears */ > + if (qemuDomainAgentAvailable(vm, false)) { > + /* We don't need to acquire a job since we're not interacting with the > + * agent or the qemu monitor. We're only setting a struct member, so > + * just acquire the mutex lock. Worst case, any in-process agent > + * commands will use the newly-set agent timeout. */ > + virObjectLock(QEMU_DOMAIN_PRIVATE(vm)->agent); > + qemuAgentSetResponseTimeout(QEMU_DOMAIN_PRIVATE(vm)->agent, timeout); > + virObjectUnlock(QEMU_DOMAIN_PRIVATE(vm)->agent); > + } > + > + QEMU_DOMAIN_PRIVATE(vm)->agentTimeout = timeout; This is the point where we need to save status XML if the domain is running. > + ret = 0; > + > + cleanup: > + virDomainObjEndAPI(&vm); > + return ret; > +} > + > + > diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c > index 0feb21ef17..def4107d27 100644 > --- a/tools/virsh-domain.c > +++ b/tools/virsh-domain.c > @@ -9832,6 +9832,52 @@ cmdQemuAgentCommand(vshControl *ctl, const vshCmd *cmd) > > return ret; > } > +/* > + * "guest-agent-timeout" command > + */ > +static const vshCmdInfo info_guest_agent_timeout[] = { > + {.name = "help", > + .data = N_("Set the guest agent timeout") > + }, > + {.name = "desc", > + .data = N_("Set the number of seconds to wait for a response from the guest agent.") > + }, > + {.name = NULL} > +}; > + > +static const vshCmdOptDef opts_guest_agent_timeout[] = { > + VIRSH_COMMON_OPT_DOMAIN_FULL(0), > + {.name = "timeout", > + .type = VSH_OT_INT, > + .flags = VSH_OFLAG_REQ_OPT, > + .help = N_("timeout seconds.") > + }, > + {.name = NULL} > +}; > + > +static bool > +cmdGuestAgentTimeout(vshControl *ctl, const vshCmd *cmd) > +{ > + virDomainPtr dom = NULL; > + int timeout; > + const unsigned int flags = 0; > + bool ret = false; > + > + dom = virshCommandOptDomain(ctl, cmd, NULL); > + if (dom == NULL) > + goto cleanup; > + > + if (vshCommandOptInt(ctl, cmd, "timeout", &timeout) < 0) > + goto cleanup; > + > + if (virDomainAgentSetResponseTimeout(dom, timeout, flags) < 0) > + goto cleanup; > + > + ret = true; > + cleanup: > + virshDomainFree(dom); > + return ret; > +} > > /* > * "lxc-enter-namespace" namespace > @@ -14492,6 +14538,12 @@ const vshCmdDef domManagementCmds[] = { > .info = info_qemu_agent_command, > .flags = 0 > }, > + {.name = "guest-agent-timeout", > + .handler = cmdGuestAgentTimeout, > + .opts = opts_guest_agent_timeout, > + .info = info_guest_agent_timeout, > + .flags = 0 > + }, > {.name = "reboot", > .handler = cmdReboot, > .opts = opts_reboot, > We need to document new virsh command. I'm fixing all the isues I've raised, ACKing and pushing. Can you please post a follow up patch that updates news.xml? I thinkg it's worth recording in the release notes. Reviewed-by: Michal Privoznik <mprivozn@xxxxxxxxxx> Michal -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list