Re: [PATCH 5/5] blockjob: allow for fast-finishing job

Daniel Veillard <veillard@xxxxxxxxxx> · Thu, 12 Apr 2012 09:29:17 +0800

On Wed, Apr 11, 2012 at 05:40:38PM -0600, Eric Blake wrote:
> In my testing, I was able to provoke an odd block pull failure:
> 
> $ virsh blockpull dom vda --bandwidth 10000
> error: Requested operation is not valid: No active operation on device: drive-virtio-disk0
> 
> merely by using gdb to artifically wait to do the block job set speed
> until after the pull had already finished.  But in reality, that should
> be a success, since the pull finished before we had a chance to set
> speed.  Furthermore, using a double job lock is annoying; we can alter
> the speed as part of the same job that started the block pull for less
> likelihood of hitting the speed failure in the first place.
> 
> * src/qemu/qemu_monitor.h (BLOCK_JOB_CMD): Add new mode.
> * src/qemu/qemu_driver.c (qemuDomainBlockRebase): Move secondary
> job call...
> (qemuDomainBlockJobImpl): ...here, for fewer locks.
> * src/qemu/qemu_monitor_json.c (qemuMonitorJSONBlockJob): Change
> return value on new internal mode.
> ---
>  src/qemu/qemu_driver.c       |   13 +++++--------
>  src/qemu/qemu_monitor.h      |    3 ++-
>  src/qemu/qemu_monitor_json.c |   31 ++++++++++++++++++++-----------
>  3 files changed, 27 insertions(+), 20 deletions(-)
> 
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index bb6089b..e31f407 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -11654,6 +11654,9 @@ qemuDomainBlockJobImpl(virDomainPtr dom, const char *path, const char *base,
>       * relying on qemu to do this.  */
>      ret = qemuMonitorBlockJob(priv->mon, device, base, bandwidth, info, mode,
>                                async);
> +    if (ret == 0 && mode == BLOCK_JOB_PULL && bandwidth)
> +        ret = qemuMonitorBlockJob(priv->mon, device, NULL, bandwidth, NULL,
> +                                  BLOCK_JOB_SPEED_INTERNAL, async);
>      qemuDomainObjExitMonitorWithDriver(driver, vm);
>      if (ret < 0)
>          goto endjob;
> @@ -11750,15 +11753,9 @@ static int
>  qemuDomainBlockRebase(virDomainPtr dom, const char *path, const char *base,
>                        unsigned long bandwidth, unsigned int flags)
>  {
> -    int ret;
> -
>      virCheckFlags(0, -1);
> -    ret = qemuDomainBlockJobImpl(dom, path, base, bandwidth, NULL,
> -                                 BLOCK_JOB_PULL, flags);
> -    if (ret == 0 && bandwidth != 0)
> -        ret = qemuDomainBlockJobImpl(dom, path, NULL, bandwidth, NULL,
> -                                     BLOCK_JOB_SPEED, flags);
> -    return ret;
> +    return qemuDomainBlockJobImpl(dom, path, base, bandwidth, NULL,
> +                                  BLOCK_JOB_PULL, flags);
>  }
> 
>  static int
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index dc02964..f3cdcdd 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -530,7 +530,8 @@ typedef enum {
>      BLOCK_JOB_ABORT = 0,
>      BLOCK_JOB_INFO = 1,
>      BLOCK_JOB_SPEED = 2,
> -    BLOCK_JOB_PULL = 3,
> +    BLOCK_JOB_SPEED_INTERNAL = 3,
> +    BLOCK_JOB_PULL = 4,
>  } BLOCK_JOB_CMD;
> 
>  int qemuMonitorBlockJob(qemuMonitorPtr mon,
> diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
> index 242889c..26e41ac 100644
> --- a/src/qemu/qemu_monitor_json.c
> +++ b/src/qemu/qemu_monitor_json.c
> @@ -3452,6 +3452,7 @@ qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
>          cmd = qemuMonitorJSONMakeCommand(cmd_name, NULL);
>          break;
>      case BLOCK_JOB_SPEED:
> +    case BLOCK_JOB_SPEED_INTERNAL:
>          cmd_name = async ? "block-job-set-speed" : "block_job_set_speed";
>          cmd = qemuMonitorJSONMakeCommand(cmd_name, "s:device",
>                                           device, "U:value",
> @@ -3473,22 +3474,30 @@ qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
>      ret = qemuMonitorJSONCommand(mon, cmd, &reply);
> 
>      if (ret == 0 && virJSONValueObjectHasKey(reply, "error")) {
> -        if (qemuMonitorJSONHasError(reply, "DeviceNotActive"))
> -            qemuReportError(VIR_ERR_OPERATION_INVALID,
> -                _("No active operation on device: %s"), device);
> -        else if (qemuMonitorJSONHasError(reply, "DeviceInUse"))
> +        ret = -1;
> +        if (qemuMonitorJSONHasError(reply, "DeviceNotActive")) {
> +            /* If a job completes before we get a chance to set the
> +             * speed, we don't want to fail the original command.  */
> +            if (mode == BLOCK_JOB_SPEED_INTERNAL)
> +                ret = 0;
> +            else
> +                qemuReportError(VIR_ERR_OPERATION_INVALID,
> +                                _("No active operation on device: %s"),
> +                                device);
> +        } else if (qemuMonitorJSONHasError(reply, "DeviceInUse")){
>              qemuReportError(VIR_ERR_OPERATION_FAILED,
> -                _("Device %s in use"), device);
> -        else if (qemuMonitorJSONHasError(reply, "NotSupported"))
> +                            _("Device %s in use"), device);
> +        } else if (qemuMonitorJSONHasError(reply, "NotSupported")) {
>              qemuReportError(VIR_ERR_OPERATION_INVALID,
> -                _("Operation is not supported for device: %s"), device);
> -        else if (qemuMonitorJSONHasError(reply, "CommandNotFound"))
> +                            _("Operation is not supported for device: %s"),
> +                            device);
> +        } else if (qemuMonitorJSONHasError(reply, "CommandNotFound")) {
>              qemuReportError(VIR_ERR_OPERATION_INVALID,
> -                _("Command '%s' is not found"), cmd_name);
> -        else
> +                            _("Command '%s' is not found"), cmd_name);
> +        } else {
>              qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>                  _("Unexpected error"));
> -        ret = -1;
> +        }
>      }
> 
>      if (ret == 0 && mode == BLOCK_JOB_INFO)

  ACK, a race we can't really avoid at that level and that's the proper
handling. Again I'm wondering how we could automate testing for this ...

Daniel

-- 
Daniel Veillard      | libxml Gnome XML XSLT toolkit  http://xmlsoft.org/
daniel@xxxxxxxxxxxx  | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library  http://libvirt.org/

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list