On 10/26/2016 07:33 AM, Bob Liu wrote: > Tunnelled migration doesn't require any extra network connections beside the > libvirt daemon. > It's capable of strong encryption and is the default option in openstack-nova. > > This patch add the tunnelled migration(Tunnel3params) support to libxl. > The data flow in the src side is: > * libxlDoMigrateSend() -> pipe > * libxlTunnel3MigrationFunc() poll pipe out and then write to dest stream. > > While in the dest side: > Stream -> pipe -> 'recvfd of libxlDomainStartRestore' > > The usage is the same as p2p migration, execpt adding one more '--tunnelled' to ^^^^^^ except > the libvirt p2p migration command. > > Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx> Nice :) Now openstack no longer needs to have tunnelled flag removed on nova to get migration working. See some comments below, its a first review as I would still like to test it. > --- > src/libxl/libxl_driver.c | 58 ++++++++++- > src/libxl/libxl_migration.c | 241 +++++++++++++++++++++++++++++++++++++++++--- > src/libxl/libxl_migration.h | 9 ++ > 3 files changed, 292 insertions(+), 16 deletions(-) > > diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c > index b66cb1f..a01bbff 100644 > --- a/src/libxl/libxl_driver.c > +++ b/src/libxl/libxl_driver.c > @@ -5918,6 +5918,61 @@ libxlDomainMigrateBegin3Params(virDomainPtr domain, > } > > static int > +libxlDomainMigratePrepareTunnel3Params(virConnectPtr dconn, > + virStreamPtr st, > + virTypedParameterPtr params, > + int nparams, > + const char *cookiein, > + int cookieinlen, > + char **cookieout ATTRIBUTE_UNUSED, > + int *cookieoutlen ATTRIBUTE_UNUSED, > + unsigned int flags) > +{ > + libxlDriverPrivatePtr driver = dconn->privateData; > + virDomainDefPtr def = NULL; > + const char *dom_xml = NULL; > + const char *dname = NULL; > + const char *uri_in = NULL; > + > +#ifdef LIBXL_HAVE_NO_SUSPEND_RESUME > + virReportUnsupportedError(); > + return -1; > +#endif > + > + virCheckFlags(LIBXL_MIGRATION_FLAGS, -1); > + if (virTypedParamsValidate(params, nparams, LIBXL_MIGRATION_PARAMETERS) < 0) > + goto error; > + > + if (virTypedParamsGetString(params, nparams, > + VIR_MIGRATE_PARAM_DEST_XML, > + &dom_xml) < 0 || > + virTypedParamsGetString(params, nparams, > + VIR_MIGRATE_PARAM_DEST_NAME, > + &dname) < 0 || > + virTypedParamsGetString(params, nparams, > + VIR_MIGRATE_PARAM_URI, > + &uri_in) < 0) > + > + goto error; > + > + if (!(def = libxlDomainMigrationPrepareDef(driver, dom_xml, dname))) > + goto error; > + > + if (virDomainMigratePrepareTunnel3ParamsEnsureACL(dconn, def) < 0) > + goto error; > + > + if (libxlDomainMigrationPrepareTunnel3(dconn, st, &def, cookiein, > + cookieinlen, flags) < 0) > + goto error; > + > + return 0; > + > + error: > + virDomainDefFree(def); > + return -1; > +} > + > +static int > libxlDomainMigratePrepare3Params(virConnectPtr dconn, > virTypedParameterPtr params, > int nparams, > @@ -6017,7 +6072,7 @@ libxlDomainMigratePerform3Params(virDomainPtr dom, > if (virDomainMigratePerform3ParamsEnsureACL(dom->conn, vm->def) < 0) > goto cleanup; > > - if (flags & VIR_MIGRATE_PEER2PEER) { > + if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { > if (libxlDomainMigrationPerformP2P(driver, vm, dom->conn, dom_xml, > dconnuri, uri, dname, flags) < 0) > goto cleanup; > @@ -6501,6 +6556,7 @@ static virHypervisorDriver libxlHypervisorDriver = { > .nodeDeviceReset = libxlNodeDeviceReset, /* 1.2.3 */ > .domainMigrateBegin3Params = libxlDomainMigrateBegin3Params, /* 1.2.6 */ > .domainMigratePrepare3Params = libxlDomainMigratePrepare3Params, /* 1.2.6 */ > + .domainMigratePrepareTunnel3Params = libxlDomainMigratePrepareTunnel3Params, /* 2.3.1 */ The version here is incorrect. It should be the next one to be tagged (after the ongoing freeze). Which means 2.5.0. Note that the versioning used has changed a bit: major number is incremented per year, minor per month and bugfix number for -maint releases. > .domainMigratePerform3Params = libxlDomainMigratePerform3Params, /* 1.2.6 */ > .domainMigrateFinish3Params = libxlDomainMigrateFinish3Params, /* 1.2.6 */ > .domainMigrateConfirm3Params = libxlDomainMigrateConfirm3Params, /* 1.2.6 */ > diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c > index 534abb8..88c9bb8 100644 > --- a/src/libxl/libxl_migration.c > +++ b/src/libxl/libxl_migration.c > @@ -44,6 +44,7 @@ > #include "libxl_migration.h" > #include "locking/domain_lock.h" > #include "virtypedparam.h" > +#include "fdstream.h" > > #define VIR_FROM_THIS VIR_FROM_LIBXL > > @@ -484,6 +485,90 @@ libxlDomainMigrationPrepareDef(libxlDriverPrivatePtr driver, > } > > int > +libxlDomainMigrationPrepareTunnel3(virConnectPtr dconn, > + virStreamPtr st, > + virDomainDefPtr *def, > + const char *cookiein, > + int cookieinlen, > + unsigned int flags) > +{ > + libxlMigrationCookiePtr mig = NULL; > + libxlDriverPrivatePtr driver = dconn->privateData; > + virDomainObjPtr vm = NULL; > + libxlMigrationDstArgs *args = NULL; > + virThread thread; > + int dataFD[2] = { -1, -1 }; > + int ret = 0; The general codestyle I usually see is to have this initialized to -1, and then set ret = 0 on success (before the goto done) mentioned further below. In other words, assume error and set it to 0 if all good, as you have more error paths than the sole success one. > + > + if (libxlMigrationEatCookie(cookiein, cookieinlen, &mig) < 0) > + goto error; > + > + if (mig->xenMigStreamVer > LIBXL_SAVE_VERSION) { > + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, > + _("Xen migration stream version '%d' is not supported on this host"), > + mig->xenMigStreamVer); > + goto error; > + } > + > + if (!(vm = virDomainObjListAdd(driver->domains, *def, > + driver->xmlopt, > + VIR_DOMAIN_OBJ_LIST_ADD_LIVE | > + VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE, > + NULL))) > + goto error; > + > + /* > + * The data flow of tunnel3 migration in the dest side: > + * stream -> pipe -> recvfd of libxlDomainStartRestore > + */ > + if (pipe(dataFD) < 0) > + goto error; > + > + /* Stream data will be written to pipeIn */ > + if (virFDStreamOpen(st, dataFD[1]) < 0) > + goto error; We probably need to add this (same as qemu): dataFD[1] = -1; /* 'st' owns the FD now & will close it */ > + > + if (libxlMigrationDstArgsInitialize() < 0) > + goto error; > + > + if (!(args = virObjectNew(libxlMigrationDstArgsClass))) > + goto error; > + > + args->conn = dconn; > + args->vm = vm; > + args->flags = flags; > + args->migcookie = mig; > + /* Receive from pipeOut */ > + args->recvfd = dataFD[0]; > + args->nsocks = 0; > + if (virThreadCreate(&thread, false, libxlDoMigrateReceive, args) < 0) { > + virReportError(VIR_ERR_OPERATION_FAILED, "%s", > + _("Failed to create thread for receiving migration data")); > + goto error; > + } > + > + goto done; Have ret = 0 before "goto done". > + > + error: > + VIR_FORCE_CLOSE(dataFD[1]); > + VIR_FORCE_CLOSE(dataFD[0]); > + virObjectUnref(args); > + /* Remove virDomainObj from domain list */ > + if (vm) { > + virDomainObjListRemove(driver->domains, vm); > + vm = NULL; > + } > + ret = -1; And removing this one. Albeit the three comments about the style are just nitpick and could be ignored if others are ok with it. > + > + done: > + /* Nobody will close dataFD[1]? */ virFDStreamOpen(st, dataFD[1]) will set virStream private data which stores this file descriptor. A later call to virStreamFinish would result in a call to virFDStreamClose(...) which closes the file descriptor. So unless I misunderstood, I think it's reasonable to remove this comment here, assuming you set dataFD[1] = -1 as commented above. > + if (vm) > + virObjectUnlock(vm); > + > + return ret; > +} > + > +int > libxlDomainMigrationPrepare(virConnectPtr dconn, > virDomainDefPtr *def, > const char *uri_in, > @@ -710,9 +795,90 @@ libxlDomainMigrationPrepare(virConnectPtr dconn, > return ret; > } > > -/* This function is a simplification of virDomainMigrateVersion3Full > - * excluding tunnel support and restricting it to migration v3 > - * with params since it was the first to be introduced in libxl. > +typedef struct _libxlTunnelMigrationThread libxlTunnelMigrationThread; > +struct _libxlTunnelMigrationThread { > + virThread thread; > + virStreamPtr st; > + int srcFD; > +}; > +#define TUNNEL_SEND_BUF_SIZE 65536 > + > +/* > + * The data flow of tunnel3 migration in the src side: > + * libxlDoMigrateSend() -> pipe > + * libxlTunnel3MigrationFunc() polls pipe out and then write to dest stream. > + */ > +static void libxlTunnel3MigrationFunc(void *arg) > +{ > + libxlTunnelMigrationThread *data = (libxlTunnelMigrationThread *)arg; > + char *buffer = NULL; > + struct pollfd fds[1]; > + int timeout = -1; > + > + if (VIR_ALLOC_N(buffer, TUNNEL_SEND_BUF_SIZE) < 0) { > + virReportError(errno, "%s", _("poll failed in migration tunnel")); > + return; > + } > + > + fds[0].fd = data->srcFD; > + for (;;) { > + int ret; > + > + fds[0].events = POLLIN; > + fds[0].revents = 0; > + ret = poll(fds, ARRAY_CARDINALITY(fds), timeout); > + if (ret < 0) { > + if (errno == EAGAIN || errno == EINTR) > + continue; > + virReportError(errno, "%s", > + _("poll failed in libxlTunnel3MigrationFunc")); > + goto abrt; > + } > + > + if (ret == 0) { > + VIR_DEBUG("poll got timeout"); > + break; > + } > + > + if (fds[0].revents & (POLLIN | POLLERR | POLLHUP)) { > + int nbytes; > + > + nbytes = read(data->srcFD, buffer, TUNNEL_SEND_BUF_SIZE); > + if (nbytes > 0) { > + /* Write to dest stream */ > + if (virStreamSend(data->st, buffer, nbytes) < 0) { > + virReportError(errno, "%s", > + _("tunnelled migration failed to send to virStream")); > + goto abrt; > + } > + } else if (nbytes < 0) { > + virReportError(errno, "%s", > + _("tunnelled migration failed to read from xen side")); > + goto abrt; > + } else { > + /* EOF; transferred all data */ > + break; > + } > + } > + } > + > + if (virStreamFinish(data->st) < 0) > + virReportError(errno, "%s", > + _("tunnelled migration failed to finish stream")); > + > + cleanup: > + VIR_FREE(buffer); > + > + return; > + > + abrt: Do you mean 'abort' here? Ahh, but there aren't any 'abort' labels in the whole source code, so I assume you used it to be in line with the rest. > + virStreamAbort(data->st); > + goto cleanup; > +} > + > +/* This function is a simplification of virDomainMigrateVersion3Full and > + * restricting it to migration v3 with params since it was the first to be > + * introduced in libxl. > */ > static int > libxlDoMigrateP2P(libxlDriverPrivatePtr driver, > @@ -737,6 +903,10 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver, > bool cancelled = true; > virErrorPtr orig_err = NULL; > int ret = -1; > + /* For tunnel migration */ > + virStreamPtr st = NULL; > + libxlTunnelMigrationThread *libxlTunnelMigationThreadPtr = NULL; This variable could probably be smaller, like "tnlthread". This would allow you to avoid both the long name, plus the typo you have in variable name. > + int dataFD[2] = { -1, -1 }; > > dom_xml = libxlDomainMigrationBegin(sconn, vm, xmlin, > &cookieout, &cookieoutlen); > @@ -764,29 +934,62 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver, > > VIR_DEBUG("Prepare3"); > virObjectUnlock(vm); > - ret = dconn->driver->domainMigratePrepare3Params > - (dconn, params, nparams, cookieout, cookieoutlen, NULL, NULL, &uri_out, destflags); > + if (flags & VIR_MIGRATE_TUNNELLED) { > + if (!(st = virStreamNew(dconn, 0))) > + goto cleanup; > + ret = dconn->driver->domainMigratePrepareTunnel3Params > + (dconn, st, params, nparams, cookieout, cookieoutlen, NULL, NULL, destflags); > + } else { > + ret = dconn->driver->domainMigratePrepare3Params > + (dconn, params, nparams, cookieout, cookieoutlen, NULL, NULL, &uri_out, destflags); > + } > virObjectLock(vm); > > if (ret == -1) > goto cleanup; > > - if (uri_out) { > - if (virTypedParamsReplaceString(¶ms, &nparams, > - VIR_MIGRATE_PARAM_URI, uri_out) < 0) { > - orig_err = virSaveLastError(); > + if (!(flags & VIR_MIGRATE_TUNNELLED)) { > + if (uri_out) { > + if (virTypedParamsReplaceString(¶ms, &nparams, > + VIR_MIGRATE_PARAM_URI, uri_out) < 0) { > + orig_err = virSaveLastError(); > + goto finish; > + } > + } else { > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("domainMigratePrepare3 did not set uri")); > goto finish; > } > - } else { > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > - _("domainMigratePrepare3 did not set uri")); > - goto finish; > } > > VIR_DEBUG("Perform3 uri=%s", NULLSTR(uri_out)); > - ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL, > - uri_out, NULL, flags); > + if (flags & VIR_MIGRATE_TUNNELLED) { > + if (VIR_ALLOC(libxlTunnelMigationThreadPtr) < 0) > + goto cleanup; > + if (pipe(dataFD) < 0) { > + virReportError(errno, "%s", _("Unable to make pipes")); > + goto cleanup; > + } > + /* Read from pipe */ > + libxlTunnelMigationThreadPtr->srcFD = dataFD[0]; > + /* Write to dest stream */ > + libxlTunnelMigationThreadPtr->st = st; > + if (virThreadCreate(&libxlTunnelMigationThreadPtr->thread, true, > + libxlTunnel3MigrationFunc, > + libxlTunnelMigationThreadPtr) < 0) { > + virReportError(errno, "%s", > + _("Unable to create tunnel migration thread")); > + goto cleanup; > + } I still wonder how this chunk above inside VIR_MIGRATE_TUNNELLED and ... > > + virObjectUnlock(vm); > + /* Send data to pipe */ > + ret = libxlDoMigrateSend(driver, vm, flags, dataFD[1]); > + virObjectLock(vm); > + } else { > + ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL, > + uri_out, NULL, flags); > + } > if (ret < 0) > orig_err = virSaveLastError(); > > @@ -824,6 +1027,14 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver, > vm->def->name); > > cleanup: > + if (libxlTunnelMigationThreadPtr) { > + virThreadCancel(&libxlTunnelMigationThreadPtr->thread); > + VIR_FREE(libxlTunnelMigationThreadPtr); > + } > + VIR_FORCE_CLOSE(dataFD[0]); > + VIR_FORCE_CLOSE(dataFD[1]); ... and this one could be turned into helpers like libxlMigrationStartTunnel and libxlMigrationStopTunnel? The latter you probably add a dstFD such that you can also cleanup both descriptors in StopTunnel. I sort of understand the way you propose is to consolidate cleanup. But it would help the reader of the P2P migration function (which is considerably big) to move the big portions of tunnel-specific codepaths into helpers, such that the migration flow is clearer. This is just a suggestion though, unless others feel strongly about it. One other issue is that virThreadCancel is asynchronous and then you close the file descriptors after requesting the cancellation request, which sounds erroneous prone to me IIUC. It's not guaranteed the thread has closed but that after returning the request for canceling was successful. So probably you would need a virThreadJoin after that, such that you can actually know the thread really has cancelled? Then it's probably safe to close both descriptors afterwards. Qemu driver also seems to write a single byte signalling the Tunnel thread to abort/finish. Joao -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list