The qemu implementation for virDomainGetBlockJobInfo() has a minor bug: it grabs the qemu job with intent to QEMU_JOB_MODIFY, which means it cannot be run in parallel with any other domain-modifying command. Among others, virDomainBlockJobAbort() is such a modifying command, and it defaults to being synchronous, and can wait as long as several seconds to ensure that the job has actually finished. Due to the job rules, this means a user cannot obtain status about the job during that timeframe, even though we know management code is using a polling loop on status to see when a job finishes. This bug has been present ever since blockpull support was first introduced (commit b976165, v0.9.4 in Jul 2011), all because we stupidly tried to cram too much multiplexing through a single helper routine. It's time to disentangle some of that mess, and in the process relax block job query to use QEMU_JOB_QUERY, since it can safely be used in parallel with any long running modify command. Technically, there is one case where getting block job info can modify domain XML - we do snooping to see if a 2-phase job has transitioned into the second phase, for an optimization in the case of old qemu that lacked an event for the transition. But I think that optimization is safe; and if it proves to be a problem, we could use the difference between QEMU_CAPS_BLOCKJOB_{ASYNC,SYNC} to determine whether we even need snooping, and request a modifying job in that case. * src/qemu/qemu_driver.c (qemuDomainBlockJobImpl): Move info handling... (qemuDomainGetBlockJobInfo): ...here, and relax job type. (qemuDomainBlockJobAbort, qemuDomainBlockJobSetSpeed) (qemuDomainBlockRebase, qemuDomainBlockPull): Adjust callers. Signed-off-by: Eric Blake <eblake@xxxxxxxxxx> --- src/qemu/qemu_driver.c | 86 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 20 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 177d3e4..bedccc6 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -14994,7 +14994,7 @@ static int qemuDomainBlockJobImpl(virDomainObjPtr vm, virConnectPtr conn, const char *path, const char *base, - unsigned long bandwidth, virDomainBlockJobInfoPtr info, + unsigned long bandwidth, int mode, unsigned int flags) { virQEMUDriverPtr driver = conn->privateData; @@ -15125,25 +15125,14 @@ qemuDomainBlockJobImpl(virDomainObjPtr vm, qemuDomainObjEnterMonitor(driver, vm); ret = qemuMonitorBlockJob(priv->mon, device, basePath, backingPath, - bandwidth, info, mode, async); + bandwidth, NULL, mode, async); qemuDomainObjExitMonitor(driver, vm); - if (info && info->type == VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT && - disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT) - info->type = disk->mirrorJob; if (ret < 0) { if (mode == BLOCK_JOB_ABORT && disk->mirror) disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_NONE; goto endjob; } - /* Snoop block copy operations, so future cancel operations can - * avoid checking if pivot is safe. */ - if (mode == BLOCK_JOB_INFO && ret == 1 && disk->mirror && - info->cur == info->end && !disk->mirrorState) { - disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY; - save = true; - } - waitjob: /* If we have made changes to XML due to a copy job, make a best * effort to save it now. But we can ignore failure, since there @@ -15239,15 +15228,22 @@ qemuDomainBlockJobAbort(virDomainPtr dom, const char *path, unsigned int flags) return -1; } - return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, 0, NULL, BLOCK_JOB_ABORT, - flags); + return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, 0, + BLOCK_JOB_ABORT, flags); } static int qemuDomainGetBlockJobInfo(virDomainPtr dom, const char *path, virDomainBlockJobInfoPtr info, unsigned int flags) { + virQEMUDriverPtr driver = dom->conn->privateData; + qemuDomainObjPrivatePtr priv; virDomainObjPtr vm; + char *device = NULL; + int idx; + virDomainDiskDefPtr disk; + int ret = -1; + virCheckFlags(0, -1); if (!(vm = qemuDomObjFromDomain(dom))) @@ -15258,8 +15254,58 @@ qemuDomainGetBlockJobInfo(virDomainPtr dom, const char *path, return -1; } - return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, 0, info, BLOCK_JOB_INFO, - flags); + priv = vm->privateData; + if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKJOB_ASYNC) && + !virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKJOB_SYNC)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("block jobs not supported with this QEMU binary")); + goto cleanup; + } + + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0) + goto cleanup; + + if (!virDomainObjIsActive(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("domain is not running")); + goto endjob; + } + + device = qemuDiskPathToAlias(vm, path, &idx); + if (!device) + goto endjob; + disk = vm->def->disks[idx]; + + qemuDomainObjEnterMonitor(driver, vm); + ret = qemuMonitorBlockJob(priv->mon, device, NULL, NULL, 0, + info, BLOCK_JOB_INFO, true); + qemuDomainObjExitMonitor(driver, vm); + if (ret < 0) + goto endjob; + + if (info->type == VIR_DOMAIN_BLOCK_JOB_TYPE_COMMIT && + disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_ACTIVE_COMMIT) + info->type = disk->mirrorJob; + + /* Snoop block copy operations, so future cancel operations can + * avoid checking if pivot is safe. Save the change to XML, but + * we can ignore failure because it is only an optimization. */ + if (ret == 1 && disk->mirror && + info->cur == info->end && !disk->mirrorState) { + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + + disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY; + ignore_value(virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm)); + virObjectUnref(cfg); + } + endjob: + if (!qemuDomainObjEndJob(driver, vm)) + vm = NULL; + + cleanup: + if (vm) + virObjectUnlock(vm); + return ret; } static int @@ -15277,7 +15323,7 @@ qemuDomainBlockJobSetSpeed(virDomainPtr dom, const char *path, return -1; } - return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth, NULL, + return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth, BLOCK_JOB_SPEED, flags); } @@ -15486,7 +15532,7 @@ qemuDomainBlockRebase(virDomainPtr dom, const char *path, const char *base, * everything, including vm cleanup. */ if (!(flags & VIR_DOMAIN_BLOCK_REBASE_COPY)) return qemuDomainBlockJobImpl(vm, dom->conn, path, base, bandwidth, - NULL, BLOCK_JOB_PULL, flags); + BLOCK_JOB_PULL, flags); /* If we got here, we are doing a block copy rebase. */ if (flags & VIR_DOMAIN_BLOCK_REBASE_COPY_RAW) @@ -15527,7 +15573,7 @@ qemuDomainBlockPull(virDomainPtr dom, const char *path, unsigned long bandwidth, return -1; } - return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth, NULL, + return qemuDomainBlockJobImpl(vm, dom->conn, path, NULL, bandwidth, BLOCK_JOB_PULL, flags); } -- 1.9.3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list