Re: [PATCH v2.1 09/11] qemu_migration: Implement qemuMigrationDriveMirror

Jiri Denemark <jdenemar@xxxxxxxxxx> · Fri, 18 Jan 2013 00:15:22 +0100

On Fri, Jan 11, 2013 at 17:52:21 +0100, Michal Privoznik wrote:
> This function does the source part of NBD magic.
> It invokes drive-mirror on each non shared disk
> and wait till the mirroring process completes.
> When it does we can proceed with migration.
> 
> Currently, an active waiting is done: every 50ms
> libvirt asks qemu if block-job is finished or not.
> However, once the job finishes, qemu doesn't
> report its progress so we can only assume if
> the job finished successfully or not. The better
> solution would be to listen to the event which
> is sent as soon as the job finishes. The event
> does contain the result of job.
> ---
>  src/qemu/qemu_migration.c | 179 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 178 insertions(+), 1 deletion(-)
> 
> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index fc410c0..1f9a880 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c
> @@ -1175,6 +1175,177 @@ cleanup:
>      return ret;
>  }
>  
> +/**
> + * qemuMigrationDiskMirror:
> + * @driver: qemu driver
> + * @vm: domain
> + * @mig: migration cookie
> + * @migrate_flags: migrate monitor command flags
> + *
> + * Run drive-mirror to feed NBD server running on dst and
> + * wait till the process completes. On success, update
> + * @migrate_flags so we don't tell 'migrate' command to
> + * do the very same operation.
> + *
> + * Returns 0 on success (@migrate_flags updated),
> + *        -1 otherwise.
> + */
> +static int
> +qemuMigrationDriveMirror(virQEMUDriverPtr driver,
> +                         virDomainObjPtr vm,
> +                         qemuMigrationCookiePtr mig,
> +                         const char *host,
> +                         unsigned long speed,
> +                         unsigned int *migrate_flags)
> +{
> +    int ret = -1;
> +    int mon_ret;
> +    qemuDomainObjPrivatePtr priv = vm->privateData;
> +    size_t ndisks = 0, i;
> +    char **disks = NULL;
> +    unsigned int mirror_flags = VIR_DOMAIN_BLOCK_REBASE_REUSE_EXT;
> +    char *nbd_dest = NULL;
> +
> +    if (*migrate_flags & QEMU_MONITOR_MIGRATE_NON_SHARED_DISK) {
> +        /* dummy */
> +    } else if (*migrate_flags & QEMU_MONITOR_MIGRATE_NON_SHARED_INC) {
> +        mirror_flags |= VIR_DOMAIN_BLOCK_REBASE_SHALLOW;
> +    } else {
> +        /* Nothing to be done here. Claim success */
> +        return 0;
> +    }

I find this hard to read, I'd rather rewrite this as:

    if (*migrate_flags & QEMU_MONITOR_MIGRATE_NON_SHARED_INC)
        mirror_flags |= VIR_DOMAIN_BLOCK_REBASE_SHALLOW;
    else if (!(*migrate_flags & QEMU_MONITOR_MIGRATE_NON_SHARED_DISK))
        return 0;

or (even easier but redundant):

    if (!(*migrate_flags & (QEMU_MONITOR_MIGRATE_NON_SHARED_DISK |
                            QEMU_MONITOR_MIGRATE_NON_SHARED_INC)))
        return 0;

    if (*migrate_flags & QEMU_MONITOR_MIGRATE_NON_SHARED_INC)
        mirror_flags |= VIR_DOMAIN_BLOCK_REBASE_SHALLOW;

> +
> +    for (i = 0; i < vm->def->ndisks; i++) {
> +        virDomainDiskDefPtr disk = vm->def->disks[i];
> +
> +        /* skip shared disks */
> +        if (disk->shared)
> +            continue;
> +
> +        if (VIR_REALLOC_N(disks, ndisks + 1) < 0) {
> +            virReportOOMError();
> +            goto cleanup;
> +        }
> +
> +        if (virAsprintf(&disks[ndisks++], "%s%s",
> +                        QEMU_DRIVE_HOST_PREFIX, disk->info.alias) < 0) {
> +            virReportOOMError();
> +            goto cleanup;
> +        }
> +    }
> +
> +    if (!ndisks) {
> +        /* Hooray! Nothing to care about */
> +        ret = 0;
> +        goto cleanup;
> +    }

This is just a duplicated code from qemuMigrationStartNBDServer. Should
we move it to a separate function?

> +
> +    if (!mig->nbd) {
> +        /* Destination doesn't support NBD server.
> +         * Fall back to previous implementation.
> +         * XXX Or should we report an error here? */
> +        VIR_DEBUG("Destination doesn't support NBD server "
> +                  "Falling back to previous implementation.");
> +        ret = 0;
> +        goto cleanup;
> +    }

Ah, this is the check I was looking for in one of the previous patches.

> +
> +    for (i = 0; i < ndisks; i++) {
> +        virDomainBlockJobInfo info;
> +        VIR_FREE(nbd_dest);
> +        if (virAsprintf(&nbd_dest, "nbd:%s:%u:exportname=%s",
> +                        host, mig->nbd->port, disks[i]) < 0) {
> +            virReportOOMError();
> +            goto error;
> +        }

Hmm, looks like this is not IPv6 ready at all or does it allow
"[fec0::1]" IPv6 addresses to be passed as host?

> +
> +        if (qemuDomainObjEnterMonitorAsync(driver, vm,
> +                                           QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
> +            goto error;
> +        mon_ret = qemuMonitorDriveMirror(priv->mon, disks[i], nbd_dest,
> +                                         NULL, speed, mirror_flags);
> +        qemuDomainObjExitMonitorWithDriver(driver, vm);
> +
> +        if (mon_ret < 0)
> +            goto error;
> +
> +        /* wait for completion */
> +        while (true) {
> +            /* Poll every 50ms for progress & to allow cancellation */
> +            struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull };
> +            if (qemuDomainObjEnterMonitorAsync(driver, vm,
> +                                               QEMU_ASYNC_JOB_MIGRATION_OUT) < 0)
> +                goto error;
> +            if (priv->job.asyncAbort) {
> +                /* explicitly do this *after* we entered the monitor,
> +                 * as this is a critical section so we are guaranteed
> +                 * priv->job.asyncAbort will not change */
> +                qemuDomainObjExitMonitorWithDriver(driver, vm);
> +                virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"),
> +                               qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
> +                               _("canceled by client"));
> +                goto cleanup;

I believe this should jump to the error label and cancel all started
block jobs.

> +            }
> +            mon_ret = qemuMonitorBlockJob(priv->mon, disks[i], NULL, 0,
> +                                          &info, BLOCK_JOB_INFO, true);
> +            qemuDomainObjExitMonitorWithDriver(driver, vm);
> +
> +            if (mon_ret < 0) {
> +                /* qemu doesn't report finished jobs */
> +                VIR_WARN("Unable to query drive-mirror job status. "
> +                         "Stop polling on '%s' cur:%llu end:%llu",
> +                         disks[i], info.cur, info.end);
> +                break;

Isn't this fatal? If not, I think the comment above is too short.

> +            }
> +
> +            if (info.cur == info.end) {
> +                VIR_DEBUG("Drive mirroring of '%s' completed", disks[i]);
> +                break;
> +            }
> +
> +            /* XXX Frankly speaking, we should listen to the events,
> +             * instead of doing this. But this works for now and we
> +             * are doing something similar in migration itself anyway */

Well, migration itself does not send any events. But I agree we can do
polling here and change to events at the same time we do it for
migration (if we ever get that event from qemu).

> +
> +            virDomainObjUnlock(vm);
> +            qemuDriverUnlock(driver);
> +
> +            nanosleep(&ts, NULL);
> +
> +            qemuDriverLock(driver);
> +            virDomainObjLock(vm);
> +
> +        }
> +    }
> +
> +    /* okay, copied. modify migrate_flags */
> +    *migrate_flags &= ~(QEMU_MONITOR_MIGRATE_NON_SHARED_DISK |
> +                        QEMU_MONITOR_MIGRATE_NON_SHARED_INC);
> +    ret = 0;
> +
> +cleanup:
> +    for (i = 0; i < ndisks; i++)
> +        VIR_FREE(disks[i]);
> +    VIR_FREE(disks);
> +    VIR_FREE(nbd_dest);
> +    return ret;
> +
> +error:
> +    /* cancel any outstanding jobs */
> +    if (qemuDomainObjEnterMonitorAsync(driver, vm,
> +                                       QEMU_ASYNC_JOB_MIGRATION_OUT) == 0) {
> +        while (i) {
> +            if (qemuMonitorBlockJob(priv->mon, disks[i], NULL, 0,
> +                                    NULL, BLOCK_JOB_ABORT, true) < 0)
> +                VIR_WARN("Unable to cancel block-job on '%s'", disks[i]);
> +            i--;
> +        }
> +        qemuDomainObjExitMonitorWithDriver(driver, vm);
> +    } else {
> +        VIR_WARN("Unable to enter monitor. No block job cancelled");
> +    }
> +    goto cleanup;
> +}
>  
>  /* Validate whether the domain is safe to migrate.  If vm is NULL,
>   * then this is being run in the v2 Prepare stage on the destination
...

Jirka

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list