On Mon, Feb 18, 2013 at 15:38:42 +0100, Michal Privoznik wrote: > With new NBD storage migration approach there are several > requirements that need to be meet for successful use of the s/meet/met/ > feature. One of them is - the file representing a disk, needs to > have at least same size as on the source. Hence, we must transfer > a list of pairs [disk target, size] and check on destination that > this requirement is met and/or take actions to meet it. > --- > src/qemu/qemu_migration.c | 302 +++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 288 insertions(+), 14 deletions(-) > > diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c > index e2c7804..9b38a0c 100644 > --- a/src/qemu/qemu_migration.c > +++ b/src/qemu/qemu_migration.c ... > @@ -525,22 +555,71 @@ qemuMigrationCookieAddNetwork(qemuMigrationCookiePtr mig, > > static int > qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig, > - virQEMUDriverPtr driver ATTRIBUTE_UNUSED, > + virQEMUDriverPtr driver, > virDomainObjPtr vm) > { > qemuDomainObjPrivatePtr priv = vm->privateData; > + int ret = -1; > + size_t i; > > /* It is not a bug if there already is a NBD data */ > if (!mig->nbd && > VIR_ALLOC(mig->nbd) < 0) { > virReportOOMError(); > - return -1; > + return ret; > + } > + > + /* in Begin phase add info about disks */ > + if (priv->job.phase == QEMU_MIGRATION_PHASE_BEGIN3 && > + vm->def->ndisks) { > + if (VIR_ALLOC_N(mig->nbd->disk, vm->def->ndisks) < 0) { > + virReportOOMError(); > + goto cleanup; > + } > + > + for (i = 0; i < vm->def->ndisks; i++) { > + virDomainDiskDefPtr disk = vm->def->disks[i]; > + virDomainBlockInfo info; > + int format = disk->format; > + > + /* Add only non-shared RW disks with source */ > + if (!disk->src || disk->shared || disk->readonly) > + continue; > + > + memset(&info, 0, sizeof(info)); > + > + if (qemuDomainGetDiskBlockInfo(driver, vm, disk, &info) < 0) > + goto cleanup; Hmm, I see solving the need to enter a job for getting disk block info won't be exactly easy. > + > + if (format != VIR_STORAGE_FILE_RAW && > + format != VIR_STORAGE_FILE_QCOW && > + format != VIR_STORAGE_FILE_QCOW2) { > + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, > + _("Cannot pre-create storage of %s format"), > + virStorageFileFormatTypeToString(format)); > + goto cleanup; > + } > + > + if (!(mig->nbd->disk[mig->nbd->ndisks].target = strdup(disk->dst))) { > + virReportOOMError(); > + goto cleanup; > + } > + > + mig->nbd->disk[mig->nbd->ndisks].bytes = info.capacity; > + mig->nbd->ndisks++; > + } > } > > mig->nbd->port = priv->nbdPort; > mig->flags |= QEMU_MIGRATION_COOKIE_NBD; > > - return 0; > + ret = 0; > + > +cleanup: > + if (ret < 0) > + qemuMigrationCookieNBDFree(mig->nbd); > + > + return ret; > } > > ... > @@ -946,20 +1034,56 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, > virXPathBoolean("boolean(./nbd)", ctxt)) { > char *port; > > + if (VIR_ALLOC(mig->nbd) < 0) { > + virReportOOMError(); > + goto error; > + } > + > port = virXPathString("string(./nbd/@port)", ctxt); > - if (port) { > - if (VIR_ALLOC(mig->nbd) < 0) { > + if (port && virStrToLong_i(port, NULL, 10, &mig->nbd->port) < 0) { > + VIR_FREE(port); > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Malformed nbd port '%s'"), > + port); Oops, I believe you wanted to swap these two lines :-) > + goto error; > + } > + VIR_FREE(port); > + > + if ((n = virXPathNodeSet("./nbd/disk", ctxt, &nodes)) > 0) { > + xmlNodePtr oldNode = ctxt->node; > + if (VIR_ALLOC_N(mig->nbd->disk, n) < 0) { > virReportOOMError(); > goto error; > } > - if (virStrToLong_i(port, NULL, 10, &mig->nbd->port) < 0) { > - VIR_FREE(port); > - virReportError(VIR_ERR_INTERNAL_ERROR, > - _("Malformed nbd port '%s'"), > - port); > - goto error; > + mig->nbd->ndisks = n; > + > + for (i = 0; i < n; i++) { > + ctxt->node = nodes[i]; When accessing just attributes, it's usually easier to just call virXMLPropString(nodes[i], "name") instead of using XPath machinery. > + > + tmp = virXPathString("string(./@target)", ctxt); > + if (!tmp) { > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("Malformed target attribute")); I think this should rather be "Malformed NBD disk target attribute". > + goto error; > + } > + mig->nbd->disk[i].target = tmp; > + /* deliberately don't free @tmp here, as the > + * cookie has the reference now and it is > + * responsible for freeing it later */ > + > + tmp = virXPathString("string(./@size)", ctxt); > + if (virStrToLong_ull(tmp, NULL, 10, (unsigned long long *) > + &mig->nbd->disk[i].bytes) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Malformed size attribute '%s'"), You should mention NBD disk here as well. > + tmp); > + VIR_FREE(tmp); > + goto error; > + } > + VIR_FREE(tmp); > } > - VIR_FREE(port); > + VIR_FREE(nodes); > + ctxt->node = oldNode; > } > } > > @@ -1352,6 +1476,152 @@ error: > goto cleanup; > } > > +static int > +qemuMigrationPreCreateStorageRaw(virDomainDiskDefPtr disk, > + size_t bytes) > +{ > + int ret = -1; > + struct stat sb; > + int fd = -1; > + > + if ((fd = virFileOpenAs(disk->src, O_RDWR | O_CREAT, 0660, > + -1, -1, VIR_FILE_OPEN_NOFORK)) < 0) { > + virReportSystemError(errno, _("Unable to create '%s'"), disk->src); > + goto cleanup; > + } > + > + if (fstat(fd, &sb) < 0) { > + virReportSystemError(errno, _("Unable to stat '%s'"), disk->src); > + goto unlink; > + } > + > + VIR_DEBUG("File '%s' is %zuB big. Required %zuB", disk->src, sb.st_size, bytes); > + if (sb.st_size < bytes) { > + if (ftruncate(fd, bytes) < 0) { > + virReportSystemError(errno, _("Unable to ftruncate '%s'"), disk->src); > + goto unlink; > + } Shouldn't we rather call posix_fallocate or something similar? > + > + if (fsync(fd) < 0) { > + ret = -errno; No need to pass errno to the caller. > + virReportSystemError(errno, _("cannot sync data to file '%s'"), > + disk->src); > + goto unlink; > + } > + } > + > + ret = 0; > + > +cleanup: > + VIR_FORCE_CLOSE(fd); You should really call VIR_CLOSE(fd) here and check for errors. > + return ret; > + > +unlink: > + if (unlink(disk->src) < 0) > + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); > + goto cleanup; What if the file was already there but it was just too small and making it larger failed. I don't think you want to unlink it in this case. > +} > + > +static int > +qemuMigrationPreCreateStorageQCOW(virQEMUDriverPtr driver, > + virDomainDiskDefPtr disk, > + int format, > + size_t bytes) > +{ > + int ret = -1; > + const char *imgBinary = qemuFindQemuImgBinary(driver); > + virCommandPtr cmd = NULL; > + size_t size_arg; > + > + if (!imgBinary) > + return ret; > + > + /* Size in KB */ > + size_arg = VIR_DIV_UP(bytes, 1024); > + > + cmd = virCommandNewArgList(imgBinary, "create", "-f", > + virStorageFileFormatTypeToString(format), > + "-o", "preallocation=metadata", > + disk->src, NULL); Wrong indentation. > + > + virCommandAddArgFormat(cmd, "%zuK", size_arg); Any particular reason why we need to round up the size when we can just use bytes? virCommandAddArgFormat(cmd, "%zu", bytes); > + > + if (virCommandRun(cmd, NULL) < 0) > + goto unlink; What happens if the QCOW is already there? > + > + ret = 0; > + > +cleanup: > + return ret; > + > +unlink: > + if (unlink(disk->src) < 0) > + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); > + goto cleanup; > +} Anyway, I feel like all this storage creation code should be placed in a storage driver somewhere. > + > +static int > +qemuMigrationPreCreateStorage(virQEMUDriverPtr driver, > + virDomainObjPtr vm, > + qemuMigrationCookiePtr mig) > +{ > + int ret = -1; > + size_t i = 0; > + > + if (!mig->nbd || !mig->nbd->ndisks) { > + /* nothing to do here */ > + return 0; > + } > + > + for (i = 0; i < mig->nbd->ndisks; i++) { > + virDomainDiskDefPtr disk; > + int format; > + size_t bytes; > + int index; This fails to compile: error: declaration of 'index' shadows a global declaration > + > + index = virDomainDiskIndexByName(vm->def, mig->nbd->disk[i].target, false); > + if (index < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("No such disk '%s"), > + mig->nbd->disk[i].target); > + goto cleanup; > + } > + > + disk = vm->def->disks[i]; As John already said, I think you want vm->def->disks[index] here. > + format = disk->format; > + bytes = mig->nbd->disk[i].bytes; > + > + VIR_DEBUG("Checking '%s' of %s format for its size (requested %zuB)", > + disk->src, virStorageFileFormatTypeToString(format), bytes); > + switch (format) { > + case VIR_STORAGE_FILE_RAW: > + if (qemuMigrationPreCreateStorageRaw(disk, bytes) < 0) > + goto cleanup; > + break; > + case VIR_STORAGE_FILE_QCOW: > + case VIR_STORAGE_FILE_QCOW2: > + if (qemuMigrationPreCreateStorageQCOW(driver, disk, > + format, bytes) < 0) > + goto cleanup; > + break; > + default: > + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, > + _("Cannot pre-create disks of %s format"), > + virStorageFileFormatTypeToString(format)); > + goto cleanup; > + } > + > + } > + > + ret = 0; > +cleanup: > + /* free from migration data to prevent > + * infinite sending from src to dst and back */ > + VIR_FREE(mig->nbd->disk); This leaks all disk targets. > + mig->nbd->ndisks = 0; > + return ret; > +} > + > /* Validate whether the domain is safe to migrate. If vm is NULL, > * then this is being run in the v2 Prepare stage on the destination > * (where we only have the target xml); if vm is provided, then this > @@ -2014,6 +2284,10 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver, > QEMU_MIGRATION_COOKIE_NBD))) > goto cleanup; > > + /* pre-create all storage */ > + if (qemuMigrationPreCreateStorage(driver, vm, mig) < 0) > + goto cleanup; > + What happens if the disks we are pre-creating exist and are used by another domain on the destination host? :-P Normally, migration would fail if locking manager is in use but with your code, we'd just happily overwrite all disks, right? :-) I'm afraid we should deal with lock manager somehow to avoid doing that :-/ > if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_IN) < 0) > goto cleanup; > qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PREPARE); Jirka -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list