On 02/06/2013 08:52 AM, Michal Privoznik wrote: > With new NBD storage migration approach there are several > requirements that need to be meet for successful use of the > feature. One of them is - the file representing a disk, needs to > have at least same size as on the source. Hence, we must transfer > a list of pairs [disk target, size] and check on destination that > this requirement is met and/or take actions to meet it. > --- > src/qemu/qemu_migration.c | 302 +++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 288 insertions(+), 14 deletions(-) > > diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c > index defad6b..d10c5b5 100644 > --- a/src/qemu/qemu_migration.c > +++ b/src/qemu/qemu_migration.c > @@ -29,6 +29,9 @@ > #endif > #include <fcntl.h> > #include <poll.h> > +#include <unistd.h> > +#include <sys/types.h> > +#include <sys/stat.h> > > #include "qemu_migration.h" > #include "qemu_monitor.h" > @@ -134,6 +137,15 @@ struct _qemuMigrationCookieNBD { > int port; /* on which port does NBD server listen for incoming data. > Zero value has special meaning - it is there just to let > destination know we (the source) do support NBD. */ > + > + /* The list of pairs [disk, size] (in Bytes). > + * This is needed because the same disk size is one of > + * prerequisites for NBD storage migration. */ > + size_t ndisks; > + struct { > + char *target; > + size_t bytes; > + } *disk; > }; > > typedef struct _qemuMigrationCookie qemuMigrationCookie; > @@ -196,6 +208,22 @@ qemuMigrationCookieNetworkFree(qemuMigrationCookieNetworkPtr network) > } > > > +static void > +qemuMigrationCookieNBDFree(qemuMigrationCookieNBDPtr nbd) > +{ > + size_t i; > + > + if (!nbd) > + return; > + > + for (i = 0; i < nbd->ndisks; i++) > + VIR_FREE(nbd->disk[i].target); > + > + VIR_FREE(nbd->disk); > + VIR_FREE(nbd); > +} > + > + > static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) > { > if (!mig) > @@ -207,12 +235,14 @@ static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) > if (mig->flags & QEMU_MIGRATION_COOKIE_NETWORK) > qemuMigrationCookieNetworkFree(mig->network); > > + if (mig->flags & QEMU_MIGRATION_COOKIE_NBD) > + qemuMigrationCookieNBDFree(mig->nbd); > + > VIR_FREE(mig->localHostname); > VIR_FREE(mig->remoteHostname); > VIR_FREE(mig->name); > VIR_FREE(mig->lockState); > VIR_FREE(mig->lockDriver); > - VIR_FREE(mig->nbd); > VIR_FREE(mig); > } > > @@ -525,22 +555,71 @@ qemuMigrationCookieAddNetwork(qemuMigrationCookiePtr mig, > > static int > qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig, > - virQEMUDriverPtr driver ATTRIBUTE_UNUSED, > + virQEMUDriverPtr driver, > virDomainObjPtr vm) > { > qemuDomainObjPrivatePtr priv = vm->privateData; > + int ret = -1; > + size_t i; > > /* It is not a bug if there already is a NBD data */ > if (!mig->nbd && > VIR_ALLOC(mig->nbd) < 0) { > virReportOOMError(); > - return -1; > + return ret; > + } > + > + /* in Begin phase add info about disks */ > + if (priv->job.phase == QEMU_MIGRATION_PHASE_BEGIN3 && > + vm->def->ndisks) { > + if (VIR_ALLOC_N(mig->nbd->disk, vm->def->ndisks) < 0) { > + virReportOOMError(); > + goto cleanup; > + } Could be a lot of wasted space potentially, right? I've seen other code make use of VIR_REALLOC_N > + > + for (i = 0; i < vm->def->ndisks; i++) { > + virDomainDiskDefPtr disk = vm->def->disks[i]; > + virDomainBlockInfo info; > + int format = disk->format; > + > + /* Add only non-shared RW disks with source */ > + if (!disk->src || disk->shared || disk->readonly) > + continue; > + > + memset(&info, 0, sizeof(info)); > + > + if (qemuDomainGetDiskBlockInfo(driver, vm, disk, &info) < 0) > + goto cleanup; > + > + if (format != VIR_STORAGE_FILE_RAW && > + format != VIR_STORAGE_FILE_QCOW && > + format != VIR_STORAGE_FILE_QCOW2) { > + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, > + _("Cannot pre-create storage of %s format"), > + virStorageFileFormatTypeToString(format)); > + goto cleanup; > + } > + > + if (!(mig->nbd->disk[mig->nbd->ndisks].target = strdup(disk->dst))) { > + virReportOOMError(); > + goto cleanup; > + } > + > + mig->nbd->disk[mig->nbd->ndisks].bytes = info.capacity; > + mig->nbd->ndisks++; > + } > } > > mig->nbd->port = priv->nbdPort; > mig->flags |= QEMU_MIGRATION_COOKIE_NBD; > > - return 0; > + ret = 0; > + > +cleanup: > + if (ret < 0) > + qemuMigrationCookieNBDFree(mig->nbd); > + > + return ret; > } > > > @@ -650,7 +729,16 @@ qemuMigrationCookieXMLFormat(virQEMUDriverPtr driver, > virBufferAddLit(buf, " <nbd"); > if (mig->nbd->port) > virBufferAsprintf(buf, " port='%d'", mig->nbd->port); > - virBufferAddLit(buf, "/>\n"); > + if (mig->nbd->ndisks) { > + virBufferAddLit(buf, ">\n"); > + for (i = 0; i < mig->nbd->ndisks; i++) > + virBufferAsprintf(buf, " <disk target='%s' size='%zu'/>\n", > + mig->nbd->disk[i].target, > + mig->nbd->disk[i].bytes); > + virBufferAddLit(buf, " </nbd>\n"); > + } else { > + virBufferAddLit(buf, "/>\n"); > + } > } > > virBufferAddLit(buf, "</qemu-migration>\n"); > @@ -942,20 +1030,56 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, > virXPathBoolean("boolean(./nbd)", ctxt)) { > char *port; > > + if (VIR_ALLOC(mig->nbd) < 0) { > + virReportOOMError(); > + goto error; > + } > + > port = virXPathString("string(./nbd/@port)", ctxt); > - if (port) { > - if (VIR_ALLOC(mig->nbd) < 0) { > + if (port && virStrToLong_i(port, NULL, 10, &mig->nbd->port) < 0) { > + VIR_FREE(port); > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Malformed nbd port '%s'"), > + port); > + goto error; > + } > + VIR_FREE(port); > + > + if ((n = virXPathNodeSet("./nbd/disk", ctxt, &nodes)) > 0) { > + xmlNodePtr oldNode = ctxt->node; > + if (VIR_ALLOC_N(mig->nbd->disk, n) < 0) { > virReportOOMError(); > goto error; > } > - if (virStrToLong_i(port, NULL, 10, &mig->nbd->port) < 0) { > - VIR_FREE(port); > - virReportError(VIR_ERR_INTERNAL_ERROR, > - _("Malformed nbd port '%s'"), > - port); > - goto error; > + mig->nbd->ndisks = n; > + > + for (i = 0; i < n; i++) { > + ctxt->node = nodes[i]; > + > + tmp = virXPathString("string(./@target)", ctxt); > + if (!tmp) { > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("Malformed target attribute")); > + goto error; > + } > + mig->nbd->disk[i].target = tmp; > + /* deliberately don't free @tmp here, as the > + * cookie has the reference now and it is > + * responsible for freeing it later */ > + > + tmp = virXPathString("string(./@size)", ctxt); > + if (virStrToLong_ull(tmp, NULL, 10, (unsigned long long *) > + &mig->nbd->disk[i].bytes) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Malformed size attribute '%s'"), > + tmp); > + VIR_FREE(tmp); > + goto error; > + } > + VIR_FREE(tmp); > } > - VIR_FREE(port); > + VIR_FREE(nodes); > + ctxt->node = oldNode; > } > } > > @@ -1348,6 +1472,152 @@ error: > goto cleanup; > } > > +static int > +qemuMigrationPreCreateStorageRaw(virDomainDiskDefPtr disk, > + size_t bytes) > +{ > + int ret = -1; > + struct stat sb; > + int fd = -1; > + > + if ((fd = virFileOpenAs(disk->src, O_RDWR | O_CREAT, 0660, > + -1, -1, VIR_FILE_OPEN_NOFORK)) < 0) { > + virReportSystemError(errno, _("Unable to create '%s'"), disk->src); > + goto cleanup; > + } > + > + if (fstat(fd, &sb) < 0) { > + virReportSystemError(errno, _("Unable to stat '%s'"), disk->src); > + goto unlink; > + } > + > + VIR_DEBUG("File '%s' is %zuB big. Required %zuB", disk->src, sb.st_size, bytes); > + if (sb.st_size < bytes) { > + if (ftruncate(fd, bytes) < 0) { > + virReportSystemError(errno, _("Unable to ftruncate '%s'"), disk->src); > + goto unlink; > + } > + > + if (fsync(fd) < 0) { > + ret = -errno; > + virReportSystemError(errno, _("cannot sync data to file '%s'"), > + disk->src); > + goto unlink; > + } > + } > + > + ret = 0; > + > +cleanup: > + VIR_FORCE_CLOSE(fd); > + return ret; > + > +unlink: > + if (unlink(disk->src) < 0) > + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); > + goto cleanup; > +} > + > +static int > +qemuMigrationPreCreateStorageQCOW(virQEMUDriverPtr driver, > + virDomainDiskDefPtr disk, > + int format, > + size_t bytes) > +{ > + int ret = -1; > + const char *imgBinary = qemuFindQemuImgBinary(driver); > + virCommandPtr cmd = NULL; > + size_t size_arg; > + > + if (!imgBinary) > + return ret; > + > + /* Size in KB */ > + size_arg = VIR_DIV_UP(bytes, 1024); > + > + cmd = virCommandNewArgList(imgBinary, "create", "-f", > + virStorageFileFormatTypeToString(format), > + "-o", "preallocation=metadata", > + disk->src, NULL); > + > + virCommandAddArgFormat(cmd, "%zuK", size_arg); > + > + if (virCommandRun(cmd, NULL) < 0) > + goto unlink; > + > + ret = 0; > + > +cleanup: Doesn't there need to be a virCommandFree(cmd) here? > + return ret; > + > +unlink: > + if (unlink(disk->src) < 0) > + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); > + goto cleanup; > +} > + > +static int > +qemuMigrationPreCreateStorage(virQEMUDriverPtr driver, > + virDomainObjPtr vm, > + qemuMigrationCookiePtr mig) > +{ > + int ret = -1; > + size_t i = 0; > + > + if (!mig->nbd || !mig->nbd->ndisks) { > + /* nothing to do here */ > + return 0; > + } > + > + for (i = 0; i < mig->nbd->ndisks; i++) { > + virDomainDiskDefPtr disk; > + int format; > + size_t bytes; > + int index; > + > + index = virDomainDiskIndexByName(vm->def, mig->nbd->disk[i].target, false); > + if (index < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("No such disk '%s"), > + mig->nbd->disk[i].target); > + goto cleanup; > + } > + > + disk = vm->def->disks[i]; Huh? Is 'i' supposed to be 'index'? > + format = disk->format; > + bytes = mig->nbd->disk[i].bytes; > + > + VIR_DEBUG("Checking '%s' of %s format for its size (requested %zuB)", > + disk->src, virStorageFileFormatTypeToString(format), bytes); > + switch (format) { > + case VIR_STORAGE_FILE_RAW: > + if (qemuMigrationPreCreateStorageRaw(disk, bytes) < 0) > + goto cleanup; > + break; > + case VIR_STORAGE_FILE_QCOW: > + case VIR_STORAGE_FILE_QCOW2: > + if (qemuMigrationPreCreateStorageQCOW(driver, disk, > + format, bytes) < 0) > + goto cleanup; > + break; > + default: > + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, > + _("Cannot pre-create disks of %s format"), > + virStorageFileFormatTypeToString(format)); > + goto cleanup; > + } > + > + } > + > + ret = 0; > +cleanup: > + /* free from migration data to prevent > + * infinite sending from src to dst and back */ > + VIR_FREE(mig->nbd->disk); > + mig->nbd->ndisks = 0; > + return ret; > +} > + > /* Validate whether the domain is safe to migrate. If vm is NULL, > * then this is being run in the v2 Prepare stage on the destination > * (where we only have the target xml); if vm is provided, then this > @@ -2003,6 +2273,10 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver, > QEMU_MIGRATION_COOKIE_NBD))) > goto cleanup; > > + /* pre-create all storage */ > + if (qemuMigrationPreCreateStorage(driver, vm, mig) < 0) > + goto cleanup; > + > if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_IN) < 0) > goto cleanup; > qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PREPARE); > -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list