On Mon, Feb 18, 2013 at 15:38:40 +0100, Michal Privoznik wrote: > This function does the source part of NBD magic. It > invokes drive-mirror on each non shared and RW disk with > a source and wait till the mirroring process completes. > When it does we can proceed with migration. > > Currently, an active waiting is done: every 500ms libvirt > asks qemu if block-job is finished or not. However, once > the job finishes, qemu doesn't report its progress so we > can only assume if the job finished successfully or not. > The better solution would be to listen to the event which > is sent as soon as the job finishes. The event does > contain the result of job. > --- > src/qemu/qemu_migration.c | 188 +++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 187 insertions(+), 1 deletion(-) > > diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c > index 876e81b..e2c7804 100644 > --- a/src/qemu/qemu_migration.c > +++ b/src/qemu/qemu_migration.c > @@ -1171,6 +1171,186 @@ cleanup: > return ret; > } > > +/** > + * qemuMigrationDiskMirror: Name mismatch. > + * @driver: qemu driver > + * @vm: domain > + * @mig: migration cookie > + * @host: where are we migrating to > + * @speed: how much should the copying be limited > + * @migrate_flags: migrate monitor command flags > + * > + * Run drive-mirror to feed NBD server running on dst and > + * wait till the process completes. On success, update > + * @migrate_flags so we don't tell 'migrate' command to > + * do the very same operation. I think the comment should mention that the process does not actually completes until the end of migration, it just switches into another phase where writes go simultaneously to both source and destination. And this switch is what we are waiting for before proceeding with the next disk. It can apparently confuse people looking at the code that shuts down all drive-mirror jobs. > + * > + * Returns 0 on success (@migrate_flags updated), > + * -1 otherwise. > + */ > +static int > +qemuMigrationDriveMirror(virQEMUDriverPtr driver, > + virDomainObjPtr vm, > + qemuMigrationCookiePtr mig, > + const char *host, > + unsigned long speed, > + unsigned int *migrate_flags) > +{ > + qemuDomainObjPrivatePtr priv = vm->privateData; > + int ret = -1; > + int mon_ret; > + int port; > + ssize_t i; > + char *diskAlias = NULL; > + char *nbd_dest = NULL; > + unsigned int mirror_flags = VIR_DOMAIN_BLOCK_REBASE_REUSE_EXT; > + virErrorPtr err = NULL; > + > + if (!(*migrate_flags & (QEMU_MONITOR_MIGRATE_NON_SHARED_DISK | > + QEMU_MONITOR_MIGRATE_NON_SHARED_INC))) > + return 0; > + > + if (!mig->nbd) { > + /* Destination doesn't support NBD server. > + * Fall back to previous implementation. > + * XXX Or should we report an error here? */ No, we don't want to report an error here since the feature was enabled automatically. We would only want to fail the process if we had a flag that apps/users may use to explicitly request NBD. > + VIR_DEBUG("Destination doesn't support NBD server " > + "Falling back to previous implementation."); OK, this addresses one of my comments to 2/12. > + ret = 0; > + goto cleanup; You may just return 0 as you do few lines above but it's not a big deal. > + } > + > + /* steal NBD port and thus prevent it's propagation back to destination */ > + port = mig->nbd->port; > + mig->nbd->port = 0; > + > + if (*migrate_flags & QEMU_MONITOR_MIGRATE_NON_SHARED_INC) > + mirror_flags |= VIR_DOMAIN_BLOCK_REBASE_SHALLOW; > + > + for (i = 0; i < vm->def->ndisks; i++) { > + virDomainDiskDefPtr disk = vm->def->disks[i]; > + virDomainBlockJobInfo info; > + > + /* skip shared, RO and source-less disks */ > + if (disk->shared || disk->readonly || !disk->src) > + continue; > + > + VIR_FREE(diskAlias); > + VIR_FREE(nbd_dest); > + if ((virAsprintf(&diskAlias, "%s%s", > + QEMU_DRIVE_HOST_PREFIX, disk->info.alias) < 0) || > + (virAsprintf(&nbd_dest, "nbd:%s:%u:exportname=%s", > + host, port, diskAlias) < 0)) { > + virReportOOMError(); > + goto error; > + } > + > + if (qemuDomainObjEnterMonitorAsync(driver, vm, > + QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) > + goto error; > + mon_ret = qemuMonitorDriveMirror(priv->mon, diskAlias, nbd_dest, > + NULL, speed, mirror_flags); > + qemuDomainObjExitMonitor(driver, vm); > + > + if (mon_ret < 0) > + goto error; > + > + /* wait for completion */ > + while (true) { > + /* Poll every 500ms for progress & to allow cancellation */ > + struct timespec ts = { .tv_sec = 0, .tv_nsec = 500 * 1000 * 1000ull }; > + > + memset(&info, 0, sizeof(info)); > + > + if (qemuDomainObjEnterMonitorAsync(driver, vm, > + QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) > + goto error; > + if (priv->job.asyncAbort) { > + /* explicitly do this *after* we entered the monitor, > + * as this is a critical section so we are guaranteed > + * priv->job.asyncAbort will not change */ > + qemuDomainObjExitMonitor(driver, vm); > + virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"), > + qemuDomainAsyncJobTypeToString(priv->job.asyncJob), > + _("canceled by client")); > + goto error; > + } > + mon_ret = qemuMonitorBlockJob(priv->mon, diskAlias, NULL, 0, > + &info, BLOCK_JOB_INFO, true); > + qemuDomainObjExitMonitor(driver, vm); > + > + if (mon_ret < 0) { > + /* If there's not any job running on a block device, > + * qemu won't report anything, so it is not fatal > + * if we fail to query status as job may have finished > + * while we were sleeping. */ > + virReportError(VIR_ERR_OPERATION_FAILED, > + _("Unable to query drive-mirror job status. " > + "Stop polling on '%s' cur:%llu end:%llu"), > + diskAlias, info.cur, info.end); Again, don't overwrite errors from lower layers. Anyway, I don't understand the code at all. You're telling failure is not fatal but still report an error and abort the process. Not to mention that if some failure was not fatal some of them definitely is. Thus you either want to remove the comment altogether or make it more clear and distinguish fatal and non-fatal errors. > + goto error; > + } > + > + if (info.cur == info.end) { > + VIR_DEBUG("Drive mirroring of '%s' completed", diskAlias); > + break; > + } > + > + /* XXX Frankly speaking, we should listen to the events, > + * instead of doing this. But this works for now and we > + * are doing something similar in migration itself anyway */ > + > + virObjectUnlock(vm); > + > + nanosleep(&ts, NULL); > + > + virObjectLock(vm); > + } > + } ... Jirka -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list