On Fri, Sep 13, 2013 at 1:31 AM, Michal Privoznik <mprivozn@xxxxxxxxxx> wrote: > With new NBD storage migration approach there are several > requirements that need to be meet for successful use of the > feature. One of them is - the file representing a disk, needs to > have at least same size as on the source. Hence, we must transfer > a list of pairs [disk target, size] and check on destination that Would a better word be "tuple" instead of "pairs"? > this requirement is met and/or take actions to meet it. > > Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> > --- > src/qemu/qemu_migration.c | 286 +++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 281 insertions(+), 5 deletions(-) > > diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c > index 69a9013..5080b0a 100644 > --- a/src/qemu/qemu_migration.c > +++ b/src/qemu/qemu_migration.c > @@ -31,6 +31,9 @@ > #endif > #include <fcntl.h> > #include <poll.h> > +#include <unistd.h> > +#include <sys/types.h> > +#include <sys/stat.h> > > #include "qemu_migration.h" > #include "qemu_monitor.h" > @@ -135,6 +138,15 @@ typedef struct _qemuMigrationCookieNBD qemuMigrationCookieNBD; > typedef qemuMigrationCookieNBD *qemuMigrationCookieNBDPtr; > struct _qemuMigrationCookieNBD { > int port; /* on which port does NBD server listen for incoming data */ > + > + /* The list of pairs [disk, size] (in Bytes). Same here. > + * This is needed because the same disk size is one of > + * prerequisites for NBD storage migration. */ > + size_t ndisks; > + struct { > + char *target; > + size_t bytes; > + } *disk; > }; > > typedef struct _qemuMigrationCookie qemuMigrationCookie; > @@ -197,6 +209,22 @@ qemuMigrationCookieNetworkFree(qemuMigrationCookieNetworkPtr network) > } > > > +static void > +qemuMigrationCookieNBDFree(qemuMigrationCookieNBDPtr nbd) > +{ > + size_t i; > + > + if (!nbd) > + return; > + > + for (i = 0; i < nbd->ndisks; i++) > + VIR_FREE(nbd->disk[i].target); > + > + VIR_FREE(nbd->disk); > + VIR_FREE(nbd); > +} > + > + > static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) > { > if (!mig) > @@ -208,12 +236,14 @@ static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) > if (mig->flags & QEMU_MIGRATION_COOKIE_NETWORK) > qemuMigrationCookieNetworkFree(mig->network); > > + if (mig->flags & QEMU_MIGRATION_COOKIE_NBD) > + qemuMigrationCookieNBDFree(mig->nbd); > + > VIR_FREE(mig->localHostname); > VIR_FREE(mig->remoteHostname); > VIR_FREE(mig->name); > VIR_FREE(mig->lockState); > VIR_FREE(mig->lockDriver); > - VIR_FREE(mig->nbd); > VIR_FREE(mig); > } > > @@ -518,20 +548,58 @@ qemuMigrationCookieAddNetwork(qemuMigrationCookiePtr mig, > > static int > qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig, > - virQEMUDriverPtr driver ATTRIBUTE_UNUSED, > + virQEMUDriverPtr driver, > virDomainObjPtr vm) > { > qemuDomainObjPrivatePtr priv = vm->privateData; > + int ret = -1; > + size_t i; > > /* It is not a bug if there already is a NBD data */ > if (!mig->nbd && > VIR_ALLOC(mig->nbd) < 0) > - return -1; > + return ret; > + > + /* in Begin phase add info about disks */ > + if (priv->job.phase == QEMU_MIGRATION_PHASE_BEGIN3 && > + vm->def->ndisks) { > + if (VIR_ALLOC_N(mig->nbd->disk, vm->def->ndisks) < 0) > + goto cleanup; > + > + for (i = 0; i < vm->def->ndisks; i++) { > + virDomainDiskDefPtr disk = vm->def->disks[i]; > + virDomainBlockInfo info; > + > + /* Add only non-shared RW disks with source */ > + if (!disk->src || disk->shared || disk->readonly) > + continue; > + > + memset(&info, 0, sizeof(info)); > + > + if (qemuDomainGetDiskBlockInfo(driver, vm, disk, &info) < 0) > + goto cleanup; > + > + /* Explicitly not checking which formats can be pre-created here, > + * as we might be talking to newer libvirt which knows more than we > + * do now in here. Just let the destination libvirt decide. */ > + if (VIR_STRDUP(mig->nbd->disk[mig->nbd->ndisks].target, disk->dst) < 0) > + goto cleanup; > + > + mig->nbd->disk[mig->nbd->ndisks].bytes = info.capacity; > + mig->nbd->ndisks++; > + } > + } > > mig->nbd->port = priv->nbdPort; > mig->flags |= QEMU_MIGRATION_COOKIE_NBD; > > - return 0; > + ret = 0; > + > +cleanup: > + if (ret < 0) > + qemuMigrationCookieNBDFree(mig->nbd); > + > + return ret; > } > > > @@ -641,7 +709,16 @@ qemuMigrationCookieXMLFormat(virQEMUDriverPtr driver, > virBufferAddLit(buf, " <nbd"); > if (mig->nbd->port) > virBufferAsprintf(buf, " port='%d'", mig->nbd->port); > - virBufferAddLit(buf, "/>\n"); > + if (mig->nbd->ndisks) { > + virBufferAddLit(buf, ">\n"); > + for (i = 0; i < mig->nbd->ndisks; i++) > + virBufferAsprintf(buf, " <disk target='%s' size='%zu'/>\n", > + mig->nbd->disk[i].target, > + mig->nbd->disk[i].bytes); > + virBufferAddLit(buf, " </nbd>\n"); > + } else { > + virBufferAddLit(buf, "/>\n"); > + } > } > > virBufferAddLit(buf, "</qemu-migration>\n"); > @@ -941,6 +1018,44 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, > goto error; > } > VIR_FREE(port); > + > + if ((n = virXPathNodeSet("./nbd/disk", ctxt, &nodes)) > 0) { > + xmlNodePtr oldNode = ctxt->node; > + if (VIR_ALLOC_N(mig->nbd->disk, n) < 0) { > + virReportOOMError(); Doesn't VIR_ALLOC_N() handle the virReportOOMError() now? > + goto error; > + } > + mig->nbd->ndisks = n; > + > + for (i = 0; i < n; i++) { > + ctxt->node = nodes[i]; > + > + tmp = virXPathString("string(./@target)", ctxt); > + if (!tmp) { > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("Malformed target attribute")); > + goto error; > + } > + mig->nbd->disk[i].target = tmp; > + /* deliberately don't free @tmp here, as the > + * cookie has the reference now and it is > + * responsible for freeing it later */ > + > + tmp = virXPathString("string(./@size)", ctxt); > + if (virStrToLong_ull(tmp, NULL, 10, (unsigned long long *) > + &mig->nbd->disk[i].bytes) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Malformed size attribute '%s'"), > + tmp); > + VIR_FREE(tmp); > + goto error; > + } > + VIR_FREE(tmp); > + } > + VIR_FREE(nodes); > + ctxt->node = oldNode; > + } > + VIR_FREE(port); Why's this added here? Its already at the start of this hunk and I don't see port used. > } > > virObjectUnref(caps); > @@ -1389,6 +1504,163 @@ cleanup: > return; > } > > +static int > +qemuMigrationPreCreateStorageRaw(virQEMUDriverPtr driver, > + virDomainObjPtr vm, > + virDomainDiskDefPtr disk, > + size_t bytes) > +{ > + int ret = -1; > + struct stat sb; > + int fd = -1; > + bool need_unlink = false; > + > + if ((fd = qemuOpenFile(driver, vm, disk->src, O_RDWR | O_CREAT, > + &need_unlink, NULL)) < 0) > + goto cleanup; > + > + if (fstat(fd, &sb) < 0) { > + virReportSystemError(errno, _("Unable to stat '%s'"), disk->src); > + goto cleanup; > + } > + > + VIR_DEBUG("File '%s' is %zuB big. Required %zuB", disk->src, sb.st_size, bytes); > + if (sb.st_size != bytes && > + posix_fallocate(fd, 0, bytes) < 0) { > + virReportSystemError(errno, _("Unable to fallocate '%s'"), disk->src); > + goto cleanup; > + } > + > + ret = 0; > + > +cleanup: > + VIR_FORCE_CLOSE(fd); > + if (ret < 0 && need_unlink && unlink(disk->src)) > + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); > + return ret; > +} > + > +static int > +qemuMigrationPreCreateStorageQCOW(virQEMUDriverPtr driver, > + virDomainDiskDefPtr disk, > + int format, > + size_t bytes) > +{ > + int ret = -1; > + const char *imgBinary = qemuFindQemuImgBinary(driver); > + virCommandPtr cmd = NULL; > + size_t size_arg; > + > + if (!imgBinary) > + return ret; > + > + /* Size in KB */ > + size_arg = VIR_DIV_UP(bytes, 1024); > + > + cmd = virCommandNewArgList(imgBinary, "create", "-f", > + virStorageFileFormatTypeToString(format), > + "-o", "preallocation=metadata", > + disk->src, NULL); > + > + virCommandAddArgFormat(cmd, "%zuK", size_arg); > + > + if (virCommandRun(cmd, NULL) < 0) > + goto unlink; > + > + ret = 0; > + > +cleanup: > + return ret; > + > +unlink: > + if (unlink(disk->src) < 0) > + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); > + goto cleanup; > +} > + > +static int > +qemuMigrationPreCreateStorage(virQEMUDriverPtr driver, > + virDomainObjPtr vm, > + qemuMigrationCookiePtr mig) > +{ > + int ret = -1; > + size_t i = 0; > + > + if (!mig->nbd || !mig->nbd->ndisks) { > + /* nothing to do here */ > + return 0; > + } > + > + for (i = 0; i < mig->nbd->ndisks; i++) { > + virDomainDiskDefPtr disk; > + int format; > + size_t bytes; > + int index; > + > + index = virDomainDiskIndexByName(vm->def, mig->nbd->disk[i].target, false); > + if (index < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("No such disk '%s"), > + mig->nbd->disk[i].target); > + goto cleanup; > + } > + > + disk = vm->def->disks[i]; > + format = disk->format; > + bytes = mig->nbd->disk[i].bytes; > + > + VIR_DEBUG("Checking '%s' of %s format for its size (requested %zuB)", > + disk->src, virStorageFileFormatTypeToString(format), bytes); > + switch ((enum virStorageFileFormat) format) { > + /* These below we know how to pre-create. */ > + case VIR_STORAGE_FILE_RAW: > + if (qemuMigrationPreCreateStorageRaw(driver, vm, disk, bytes) < 0) > + goto cleanup; > + break; > + case VIR_STORAGE_FILE_QCOW: > + case VIR_STORAGE_FILE_QCOW2: > + if (qemuMigrationPreCreateStorageQCOW(driver, disk, > + format, bytes) < 0) > + goto cleanup; > + break; > + > + /* While these we don't know yet. */ > + case VIR_STORAGE_FILE_AUTO_SAFE: > + case VIR_STORAGE_FILE_AUTO: > + case VIR_STORAGE_FILE_NONE: > + case VIR_STORAGE_FILE_DIR: > + case VIR_STORAGE_FILE_BOCHS: > + case VIR_STORAGE_FILE_CLOOP: > + case VIR_STORAGE_FILE_COW: > + case VIR_STORAGE_FILE_DMG: > + case VIR_STORAGE_FILE_ISO: > + case VIR_STORAGE_FILE_QED: > + case VIR_STORAGE_FILE_VMDK: > + case VIR_STORAGE_FILE_VPC: > + case VIR_STORAGE_FILE_FAT: > + case VIR_STORAGE_FILE_VHD: > + case VIR_STORAGE_FILE_VDI: > + case VIR_STORAGE_FILE_LAST: > + /* Should we error here? As long as user has no control over which > + * disks are copied (currently there is no way specifying only a > + * set of disks to copy) we can't error here. What we can do is > + * leave users with old migration prerequisite: You (users) are > + * responsible for creating the storage on the destination. */ > + VIR_WARN("Don't know how to pre-create disks of %s type ('%s')", > + virStorageFileFormatTypeToString(format), disk->src); I think this is reasonable. We can support a few more format types with qemu-img so we could flesh that out. > + break; > + } > + } > + > + ret = 0; > +cleanup: > + /* free from migration data to prevent > + * infinite sending from src to dst and back */ > + VIR_FREE(mig->nbd->disk); Aren't we leaking each of the disk's target here? > + mig->nbd->ndisks = 0; > + return ret; > +} > + > /* Validate whether the domain is safe to migrate. If vm is NULL, > * then this is being run in the v2 Prepare stage on the destination > * (where we only have the target xml); if vm is provided, then this > @@ -2305,6 +2577,10 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver, > QEMU_MIGRATION_COOKIE_NBD))) > goto cleanup; > > + /* pre-create all storage */ > + if (qemuMigrationPreCreateStorage(driver, vm, mig) < 0) > + goto cleanup; > + > if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_IN) < 0) > goto cleanup; > qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PREPARE); > -- > 1.8.1.5 > > -- > libvir-list mailing list > libvir-list@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/libvir-list -- Doug Goldstein -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list