On Wed, Apr 11, 2012 at 05:40:38PM -0600, Eric Blake wrote: > In my testing, I was able to provoke an odd block pull failure: > > $ virsh blockpull dom vda --bandwidth 10000 > error: Requested operation is not valid: No active operation on device: drive-virtio-disk0 > > merely by using gdb to artifically wait to do the block job set speed > until after the pull had already finished. But in reality, that should > be a success, since the pull finished before we had a chance to set > speed. Furthermore, using a double job lock is annoying; we can alter > the speed as part of the same job that started the block pull for less > likelihood of hitting the speed failure in the first place. > > * src/qemu/qemu_monitor.h (BLOCK_JOB_CMD): Add new mode. > * src/qemu/qemu_driver.c (qemuDomainBlockRebase): Move secondary > job call... > (qemuDomainBlockJobImpl): ...here, for fewer locks. > * src/qemu/qemu_monitor_json.c (qemuMonitorJSONBlockJob): Change > return value on new internal mode. > --- > src/qemu/qemu_driver.c | 13 +++++-------- > src/qemu/qemu_monitor.h | 3 ++- > src/qemu/qemu_monitor_json.c | 31 ++++++++++++++++++++----------- > 3 files changed, 27 insertions(+), 20 deletions(-) > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index bb6089b..e31f407 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -11654,6 +11654,9 @@ qemuDomainBlockJobImpl(virDomainPtr dom, const char *path, const char *base, > * relying on qemu to do this. */ > ret = qemuMonitorBlockJob(priv->mon, device, base, bandwidth, info, mode, > async); > + if (ret == 0 && mode == BLOCK_JOB_PULL && bandwidth) > + ret = qemuMonitorBlockJob(priv->mon, device, NULL, bandwidth, NULL, > + BLOCK_JOB_SPEED_INTERNAL, async); > qemuDomainObjExitMonitorWithDriver(driver, vm); > if (ret < 0) > goto endjob; > @@ -11750,15 +11753,9 @@ static int > qemuDomainBlockRebase(virDomainPtr dom, const char *path, const char *base, > unsigned long bandwidth, unsigned int flags) > { > - int ret; > - > virCheckFlags(0, -1); > - ret = qemuDomainBlockJobImpl(dom, path, base, bandwidth, NULL, > - BLOCK_JOB_PULL, flags); > - if (ret == 0 && bandwidth != 0) > - ret = qemuDomainBlockJobImpl(dom, path, NULL, bandwidth, NULL, > - BLOCK_JOB_SPEED, flags); > - return ret; > + return qemuDomainBlockJobImpl(dom, path, base, bandwidth, NULL, > + BLOCK_JOB_PULL, flags); > } > > static int > diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h > index dc02964..f3cdcdd 100644 > --- a/src/qemu/qemu_monitor.h > +++ b/src/qemu/qemu_monitor.h > @@ -530,7 +530,8 @@ typedef enum { > BLOCK_JOB_ABORT = 0, > BLOCK_JOB_INFO = 1, > BLOCK_JOB_SPEED = 2, > - BLOCK_JOB_PULL = 3, > + BLOCK_JOB_SPEED_INTERNAL = 3, > + BLOCK_JOB_PULL = 4, > } BLOCK_JOB_CMD; > > int qemuMonitorBlockJob(qemuMonitorPtr mon, > diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c > index 242889c..26e41ac 100644 > --- a/src/qemu/qemu_monitor_json.c > +++ b/src/qemu/qemu_monitor_json.c > @@ -3452,6 +3452,7 @@ qemuMonitorJSONBlockJob(qemuMonitorPtr mon, > cmd = qemuMonitorJSONMakeCommand(cmd_name, NULL); > break; > case BLOCK_JOB_SPEED: > + case BLOCK_JOB_SPEED_INTERNAL: > cmd_name = async ? "block-job-set-speed" : "block_job_set_speed"; > cmd = qemuMonitorJSONMakeCommand(cmd_name, "s:device", > device, "U:value", > @@ -3473,22 +3474,30 @@ qemuMonitorJSONBlockJob(qemuMonitorPtr mon, > ret = qemuMonitorJSONCommand(mon, cmd, &reply); > > if (ret == 0 && virJSONValueObjectHasKey(reply, "error")) { > - if (qemuMonitorJSONHasError(reply, "DeviceNotActive")) > - qemuReportError(VIR_ERR_OPERATION_INVALID, > - _("No active operation on device: %s"), device); > - else if (qemuMonitorJSONHasError(reply, "DeviceInUse")) > + ret = -1; > + if (qemuMonitorJSONHasError(reply, "DeviceNotActive")) { > + /* If a job completes before we get a chance to set the > + * speed, we don't want to fail the original command. */ > + if (mode == BLOCK_JOB_SPEED_INTERNAL) > + ret = 0; > + else > + qemuReportError(VIR_ERR_OPERATION_INVALID, > + _("No active operation on device: %s"), > + device); > + } else if (qemuMonitorJSONHasError(reply, "DeviceInUse")){ > qemuReportError(VIR_ERR_OPERATION_FAILED, > - _("Device %s in use"), device); > - else if (qemuMonitorJSONHasError(reply, "NotSupported")) > + _("Device %s in use"), device); > + } else if (qemuMonitorJSONHasError(reply, "NotSupported")) { > qemuReportError(VIR_ERR_OPERATION_INVALID, > - _("Operation is not supported for device: %s"), device); > - else if (qemuMonitorJSONHasError(reply, "CommandNotFound")) > + _("Operation is not supported for device: %s"), > + device); > + } else if (qemuMonitorJSONHasError(reply, "CommandNotFound")) { > qemuReportError(VIR_ERR_OPERATION_INVALID, > - _("Command '%s' is not found"), cmd_name); > - else > + _("Command '%s' is not found"), cmd_name); > + } else { > qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", > _("Unexpected error")); > - ret = -1; > + } > } > > if (ret == 0 && mode == BLOCK_JOB_INFO) ACK, a race we can't really avoid at that level and that's the proper handling. Again I'm wondering how we could automate testing for this ... Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@xxxxxxxxxxxx | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/ -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list