Re: [PATCH v2 04/22] qemu: Use domain condition for synchronous block jobs

Peter Krempa <pkrempa@xxxxxxxxxx> · Tue, 9 Jun 2015 18:11:09 +0200

On Tue, Jun 02, 2015 at 14:34:09 +0200, Jiri Denemark wrote:
> By switching block jobs to use domain conditions, we can drop some
> pretty complicated code in NBD storage migration. Moreover, we are
> getting ready for migration events (to replace our 50ms polling on
> query-migrate). The ultimate goal is to have a single loop waiting
> (virDomainObjWait) for any migration related event (changed status of a
> migration, disk mirror events, internal abort requests, ...). This patch
> makes NBD storage migration ready for this: first we call a QMP command
> to start or cancel drive mirror on all disks we are interested in and
> then we wait for a single condition which is signaled on any event
> related to any of the mirrors.
> 
> Signed-off-by: Jiri Denemark <jdenemar@xxxxxxxxxx>
> ---
> 
> Notes:
>     Version 2:
>     - slightly modified to use domain conditions
> 
>  po/POTFILES.in            |   1 -
>  src/qemu/qemu_blockjob.c  | 137 ++-------------------
>  src/qemu/qemu_blockjob.h  |  12 +-
>  src/qemu/qemu_domain.c    |  17 +--
>  src/qemu/qemu_domain.h    |   1 -
>  src/qemu/qemu_driver.c    |  24 ++--
>  src/qemu/qemu_migration.c | 299 ++++++++++++++++++++++++++--------------------
>  src/qemu/qemu_process.c   |  13 +-
>  8 files changed, 197 insertions(+), 307 deletions(-)
> 

...

> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index 93e29e7..61b3e34 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c

...

> @@ -1733,111 +1733,148 @@ qemuMigrationStopNBDServer(virQEMUDriverPtr driver,
>   *        -1 on error.
>   */
>  static int
> -qemuMigrationCheckDriveMirror(virQEMUDriverPtr driver,
> +qemuMigrationDriveMirrorReady(virQEMUDriverPtr driver,
>                                virDomainObjPtr vm)
>  {
>      size_t i;
> -    int ret = 1;
> +    size_t notReady = 0;
> +    int status;
>  
>      for (i = 0; i < vm->def->ndisks; i++) {
>          virDomainDiskDefPtr disk = vm->def->disks[i];
>          qemuDomainDiskPrivatePtr diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
>  
> -        if (!diskPriv->migrating || !diskPriv->blockJobSync)
> +        if (!diskPriv->migrating)
>              continue;
>  
> -        /* process any pending event */
> -        if (qemuBlockJobSyncWaitWithTimeout(driver, vm, disk,
> -                                            0ull, NULL) < 0)
> -            return -1;
> -
> -        switch (disk->mirrorState) {
> -        case VIR_DOMAIN_DISK_MIRROR_STATE_NONE:
> -            ret = 0;
> -            break;
> -        case VIR_DOMAIN_DISK_MIRROR_STATE_ABORT:
> +        status = qemuBlockJobUpdate(driver, vm, disk);
> +        if (status == VIR_DOMAIN_BLOCK_JOB_FAILED) {
>              virReportError(VIR_ERR_OPERATION_FAILED,
>                             _("migration of disk %s failed"),
>                             disk->dst);
>              return -1;
>          }
> +
> +        if (disk->mirrorState != VIR_DOMAIN_DISK_MIRROR_STATE_READY)
> +            notReady++;
>      }
>  
> -    return ret;
> +    if (notReady) {
> +        VIR_DEBUG("Waiting for %zu disk mirrors to get ready", notReady);
> +        return 0;
> +    } else {
> +        VIR_DEBUG("All disk mirrors are ready");
> +        return 1;
> +    }
>  }
>  
>  
> -/**
> - * qemuMigrationCancelOneDriveMirror:
> - * @driver: qemu driver
> - * @vm: domain
> +/*
> + * If @failed is not NULL, the function will report an error and set @failed
> + * to true in case a block job fails. This way we can properly abort migration
> + * in case some block job failed once all memory has already been transferred.
>   *
> - * Cancel all drive-mirrors started by qemuMigrationDriveMirror.
> - * Any pending block job events for the mirrored disks will be
> - * processed.
> - *
> - * Returns 0 on success, -1 otherwise.
> + * Returns 1 if all mirrors are gone,
> + *         0 if some mirrors are still active,
> + *         -1 on error.

The code below doesn't ever return -1. Perhaps you could use it instead
of passing the pointer.

>   */
>  static int
> -qemuMigrationCancelOneDriveMirror(virQEMUDriverPtr driver,
> +qemuMigrationDriveMirrorCancelled(virQEMUDriverPtr driver,
>                                    virDomainObjPtr vm,
> -                                  virDomainDiskDefPtr disk)
> +                                  bool *failed)
>  {
> -    qemuDomainObjPrivatePtr priv = vm->privateData;
> -    char *diskAlias = NULL;
> -    int ret = -1;
> +    size_t i;
> +    size_t active = 0;
> +    int status;
>  
> -    /* No need to cancel if mirror already aborted */
> -    if (disk->mirrorState == VIR_DOMAIN_DISK_MIRROR_STATE_ABORT) {
> -        ret = 0;
> -    } else {
> -        virConnectDomainEventBlockJobStatus status = -1;
> +    for (i = 0; i < vm->def->ndisks; i++) {
> +        virDomainDiskDefPtr disk = vm->def->disks[i];
> +        qemuDomainDiskPrivatePtr diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
>  
> -        if (virAsprintf(&diskAlias, "%s%s",
> -                        QEMU_DRIVE_HOST_PREFIX, disk->info.alias) < 0)
> -            goto cleanup;
> +        if (!diskPriv->migrating)
> +            continue;
>  
> -        if (qemuDomainObjEnterMonitorAsync(driver, vm,
> -                                           QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
> -            goto endjob;
> -        ret = qemuMonitorBlockJobCancel(priv->mon, diskAlias, true);
> -        if (qemuDomainObjExitMonitor(driver, vm) < 0)
> -            goto endjob;
> -
> -        if (ret < 0) {
> -            virDomainBlockJobInfo info;
> -
> -            /* block-job-cancel can fail if QEMU simultaneously
> -             * aborted the job; probe for it again to detect this */
> -            if (qemuMonitorBlockJobInfo(priv->mon, diskAlias,
> -                                        &info, NULL) == 0) {
> -                ret = 0;
> -            } else {
> +        status = qemuBlockJobUpdate(driver, vm, disk);
> +        switch (status) {
> +        case VIR_DOMAIN_BLOCK_JOB_FAILED:
> +            if (failed) {
>                  virReportError(VIR_ERR_OPERATION_FAILED,
> -                               _("could not cancel migration of disk %s"),
> +                               _("migration of disk %s failed"),
>                                 disk->dst);
> +                *failed = true;
>              }
> +            /* fallthrough */
> +        case VIR_DOMAIN_BLOCK_JOB_CANCELED:
> +        case VIR_DOMAIN_BLOCK_JOB_COMPLETED:
> +            qemuBlockJobSyncEnd(driver, vm, disk);
> +            diskPriv->migrating = false;
> +            break;
>  
> -            goto endjob;
> +        default:
> +            active++;
>          }
> +    }
>  
> -        /* Mirror may become ready before cancellation takes
> -         * effect; loop if we get that event first */
> -        do {
> -            ret = qemuBlockJobSyncWait(driver, vm, disk, &status);
> -            if (ret < 0) {
> -                VIR_WARN("Unable to wait for block job on %s to cancel",
> -                         diskAlias);
> -                goto endjob;
> -            }
> -        } while (status == VIR_DOMAIN_BLOCK_JOB_READY);
> +    if (active) {
> +        VIR_DEBUG("Waiting for %zu disk mirrors to finish", active);
> +        return 0;
> +    } else {
> +        if (failed && *failed)
> +            VIR_DEBUG("All disk mirrors are gone; some of them failed");
> +        else
> +            VIR_DEBUG("All disk mirrors are gone");
> +        return 1;
>      }
> +}
> +
> +
...

> @@ -2467,6 +2515,10 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver,
>          if (qemuMigrationUpdateJobStatus(driver, vm, job, asyncJob) == -1)
>              goto error;
>  
> +        if (storage &&
> +            qemuMigrationDriveMirrorReady(driver, vm) < 0)
> +            break;

I think this hunk and the related changes should be in a separate patch.
This code preparses for changing to a event based migration finish,
while the rest of the patch touches mostly other parts that don't deal
with the big picture of migration.

> +
>          /* cancel migration if disk I/O error is emitted while migrating */
>          if (abort_on_error &&
>              virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED &&

...

> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 9c5d0f4..a945401 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -1001,10 +1001,10 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
>      diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
>  
>      if (diskPriv->blockJobSync) {
> +        /* We have a SYNC API waiting for this event, dispatch it back */
>          diskPriv->blockJobType = type;
>          diskPriv->blockJobStatus = status;
> -        /* We have an SYNC API waiting for this event, dispatch it back */
> -        virCondSignal(&diskPriv->blockJobSyncCond);
> +        virDomainObjSignal(vm);

Once there will be a possibility to have more waiting threads for a
certain events we might need to revisit these.

>      } else {
>          /* there is no waiting SYNC API, dispatch the update to a thread */
>          if (VIR_ALLOC(processEvent) < 0)

Looks good but I'd rather see a version that avoids doing both the
refactor to the new condition and preparation to use events for
migration.

Peter
Attachment:
signature.asc

Description: Digital signature
--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list