On 10/26/2016 07:24 PM, Joao Martins wrote: > On 10/26/2016 07:33 AM, Bob Liu wrote: >> Tunnelled migration doesn't require any extra network connections beside the >> libvirt daemon. >> It's capable of strong encryption and is the default option in openstack-nova. >> >> This patch add the tunnelled migration(Tunnel3params) support to libxl. >> The data flow in the src side is: >> * libxlDoMigrateSend() -> pipe >> * libxlTunnel3MigrationFunc() poll pipe out and then write to dest stream. >> >> While in the dest side: >> Stream -> pipe -> 'recvfd of libxlDomainStartRestore' >> >> The usage is the same as p2p migration, execpt adding one more '--tunnelled' to > ^^^^^^ except >> the libvirt p2p migration command. >> >> Signed-off-by: Bob Liu <bob.liu@xxxxxxxxxx> > Nice :) Now openstack no longer needs to have tunnelled flag removed on nova to > get migration working. > > See some comments below, its a first review as I would still like to test it. > Will take all of your suggestions. But let's wait for more reviews before I post a updated version. Thanks, Bob >> --- >> src/libxl/libxl_driver.c | 58 ++++++++++- >> src/libxl/libxl_migration.c | 241 +++++++++++++++++++++++++++++++++++++++++--- >> src/libxl/libxl_migration.h | 9 ++ >> 3 files changed, 292 insertions(+), 16 deletions(-) >> >> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c >> index b66cb1f..a01bbff 100644 >> --- a/src/libxl/libxl_driver.c >> +++ b/src/libxl/libxl_driver.c >> @@ -5918,6 +5918,61 @@ libxlDomainMigrateBegin3Params(virDomainPtr domain, >> } >> >> static int >> +libxlDomainMigratePrepareTunnel3Params(virConnectPtr dconn, >> + virStreamPtr st, >> + virTypedParameterPtr params, >> + int nparams, >> + const char *cookiein, >> + int cookieinlen, >> + char **cookieout ATTRIBUTE_UNUSED, >> + int *cookieoutlen ATTRIBUTE_UNUSED, >> + unsigned int flags) >> +{ >> + libxlDriverPrivatePtr driver = dconn->privateData; >> + virDomainDefPtr def = NULL; >> + const char *dom_xml = NULL; >> + const char *dname = NULL; >> + const char *uri_in = NULL; >> + >> +#ifdef LIBXL_HAVE_NO_SUSPEND_RESUME >> + virReportUnsupportedError(); >> + return -1; >> +#endif >> + >> + virCheckFlags(LIBXL_MIGRATION_FLAGS, -1); >> + if (virTypedParamsValidate(params, nparams, LIBXL_MIGRATION_PARAMETERS) < 0) >> + goto error; >> + >> + if (virTypedParamsGetString(params, nparams, >> + VIR_MIGRATE_PARAM_DEST_XML, >> + &dom_xml) < 0 || >> + virTypedParamsGetString(params, nparams, >> + VIR_MIGRATE_PARAM_DEST_NAME, >> + &dname) < 0 || >> + virTypedParamsGetString(params, nparams, >> + VIR_MIGRATE_PARAM_URI, >> + &uri_in) < 0) >> + >> + goto error; >> + >> + if (!(def = libxlDomainMigrationPrepareDef(driver, dom_xml, dname))) >> + goto error; >> + >> + if (virDomainMigratePrepareTunnel3ParamsEnsureACL(dconn, def) < 0) >> + goto error; >> + >> + if (libxlDomainMigrationPrepareTunnel3(dconn, st, &def, cookiein, >> + cookieinlen, flags) < 0) >> + goto error; >> + >> + return 0; >> + >> + error: >> + virDomainDefFree(def); >> + return -1; >> +} >> + >> +static int >> libxlDomainMigratePrepare3Params(virConnectPtr dconn, >> virTypedParameterPtr params, >> int nparams, >> @@ -6017,7 +6072,7 @@ libxlDomainMigratePerform3Params(virDomainPtr dom, >> if (virDomainMigratePerform3ParamsEnsureACL(dom->conn, vm->def) < 0) >> goto cleanup; >> >> - if (flags & VIR_MIGRATE_PEER2PEER) { >> + if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { >> if (libxlDomainMigrationPerformP2P(driver, vm, dom->conn, dom_xml, >> dconnuri, uri, dname, flags) < 0) >> goto cleanup; >> @@ -6501,6 +6556,7 @@ static virHypervisorDriver libxlHypervisorDriver = { >> .nodeDeviceReset = libxlNodeDeviceReset, /* 1.2.3 */ >> .domainMigrateBegin3Params = libxlDomainMigrateBegin3Params, /* 1.2.6 */ >> .domainMigratePrepare3Params = libxlDomainMigratePrepare3Params, /* 1.2.6 */ >> + .domainMigratePrepareTunnel3Params = libxlDomainMigratePrepareTunnel3Params, /* 2.3.1 */ > The version here is incorrect. It should be the next one to be tagged (after the > ongoing freeze). Which means 2.5.0. Note that the versioning used has changed a > bit: major number is incremented per year, minor per month and bugfix number for > -maint releases. > >> .domainMigratePerform3Params = libxlDomainMigratePerform3Params, /* 1.2.6 */ >> .domainMigrateFinish3Params = libxlDomainMigrateFinish3Params, /* 1.2.6 */ >> .domainMigrateConfirm3Params = libxlDomainMigrateConfirm3Params, /* 1.2.6 */ >> diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c >> index 534abb8..88c9bb8 100644 >> --- a/src/libxl/libxl_migration.c >> +++ b/src/libxl/libxl_migration.c >> @@ -44,6 +44,7 @@ >> #include "libxl_migration.h" >> #include "locking/domain_lock.h" >> #include "virtypedparam.h" >> +#include "fdstream.h" >> >> #define VIR_FROM_THIS VIR_FROM_LIBXL >> >> @@ -484,6 +485,90 @@ libxlDomainMigrationPrepareDef(libxlDriverPrivatePtr driver, >> } >> >> int >> +libxlDomainMigrationPrepareTunnel3(virConnectPtr dconn, >> + virStreamPtr st, >> + virDomainDefPtr *def, >> + const char *cookiein, >> + int cookieinlen, >> + unsigned int flags) >> +{ >> + libxlMigrationCookiePtr mig = NULL; >> + libxlDriverPrivatePtr driver = dconn->privateData; >> + virDomainObjPtr vm = NULL; >> + libxlMigrationDstArgs *args = NULL; >> + virThread thread; >> + int dataFD[2] = { -1, -1 }; >> + int ret = 0; > The general codestyle I usually see is to have this initialized to -1, and then > set ret = 0 on success (before the goto done) mentioned further below. In other > words, assume error and set it to 0 if all good, as you have more error paths > than the sole success one. > >> + >> + if (libxlMigrationEatCookie(cookiein, cookieinlen, &mig) < 0) >> + goto error; >> + >> + if (mig->xenMigStreamVer > LIBXL_SAVE_VERSION) { >> + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, >> + _("Xen migration stream version '%d' is not supported on this host"), >> + mig->xenMigStreamVer); >> + goto error; >> + } >> + >> + if (!(vm = virDomainObjListAdd(driver->domains, *def, >> + driver->xmlopt, >> + VIR_DOMAIN_OBJ_LIST_ADD_LIVE | >> + VIR_DOMAIN_OBJ_LIST_ADD_CHECK_LIVE, >> + NULL))) >> + goto error; >> + >> + /* >> + * The data flow of tunnel3 migration in the dest side: >> + * stream -> pipe -> recvfd of libxlDomainStartRestore >> + */ >> + if (pipe(dataFD) < 0) >> + goto error; >> + >> + /* Stream data will be written to pipeIn */ >> + if (virFDStreamOpen(st, dataFD[1]) < 0) >> + goto error; > We probably need to add this (same as qemu): > > dataFD[1] = -1; /* 'st' owns the FD now & will close it */ > >> + >> + if (libxlMigrationDstArgsInitialize() < 0) >> + goto error; >> + >> + if (!(args = virObjectNew(libxlMigrationDstArgsClass))) >> + goto error; >> + >> + args->conn = dconn; >> + args->vm = vm; >> + args->flags = flags; >> + args->migcookie = mig; >> + /* Receive from pipeOut */ >> + args->recvfd = dataFD[0]; >> + args->nsocks = 0; >> + if (virThreadCreate(&thread, false, libxlDoMigrateReceive, args) < 0) { >> + virReportError(VIR_ERR_OPERATION_FAILED, "%s", >> + _("Failed to create thread for receiving migration data")); >> + goto error; >> + } >> + >> + goto done; > Have ret = 0 before "goto done". > >> + >> + error: >> + VIR_FORCE_CLOSE(dataFD[1]); >> + VIR_FORCE_CLOSE(dataFD[0]); >> + virObjectUnref(args); >> + /* Remove virDomainObj from domain list */ >> + if (vm) { >> + virDomainObjListRemove(driver->domains, vm); >> + vm = NULL; >> + } >> + ret = -1; > And removing this one. Albeit the three comments about the style are just > nitpick and could be ignored if others are ok with it. > >> + >> + done: >> + /* Nobody will close dataFD[1]? */ > virFDStreamOpen(st, dataFD[1]) will set virStream private data which stores this > file descriptor. A later call to virStreamFinish would result in a call to > virFDStreamClose(...) which closes the file descriptor. So unless I > misunderstood, I think it's reasonable to remove this comment here, assuming you > set dataFD[1] = -1 as commented above. > >> + if (vm) >> + virObjectUnlock(vm); >> + >> + return ret; >> +} >> + >> +int >> libxlDomainMigrationPrepare(virConnectPtr dconn, >> virDomainDefPtr *def, >> const char *uri_in, >> @@ -710,9 +795,90 @@ libxlDomainMigrationPrepare(virConnectPtr dconn, >> return ret; >> } >> >> -/* This function is a simplification of virDomainMigrateVersion3Full >> - * excluding tunnel support and restricting it to migration v3 >> - * with params since it was the first to be introduced in libxl. >> +typedef struct _libxlTunnelMigrationThread libxlTunnelMigrationThread; >> +struct _libxlTunnelMigrationThread { >> + virThread thread; >> + virStreamPtr st; >> + int srcFD; >> +}; >> +#define TUNNEL_SEND_BUF_SIZE 65536 >> + >> +/* >> + * The data flow of tunnel3 migration in the src side: >> + * libxlDoMigrateSend() -> pipe >> + * libxlTunnel3MigrationFunc() polls pipe out and then write to dest stream. >> + */ >> +static void libxlTunnel3MigrationFunc(void *arg) >> +{ >> + libxlTunnelMigrationThread *data = (libxlTunnelMigrationThread *)arg; >> + char *buffer = NULL; >> + struct pollfd fds[1]; >> + int timeout = -1; >> + >> + if (VIR_ALLOC_N(buffer, TUNNEL_SEND_BUF_SIZE) < 0) { >> + virReportError(errno, "%s", _("poll failed in migration tunnel")); >> + return; >> + } >> + >> + fds[0].fd = data->srcFD; >> + for (;;) { >> + int ret; >> + >> + fds[0].events = POLLIN; >> + fds[0].revents = 0; >> + ret = poll(fds, ARRAY_CARDINALITY(fds), timeout); >> + if (ret < 0) { >> + if (errno == EAGAIN || errno == EINTR) >> + continue; >> + virReportError(errno, "%s", >> + _("poll failed in libxlTunnel3MigrationFunc")); >> + goto abrt; >> + } >> + >> + if (ret == 0) { >> + VIR_DEBUG("poll got timeout"); >> + break; >> + } >> + >> + if (fds[0].revents & (POLLIN | POLLERR | POLLHUP)) { >> + int nbytes; >> + >> + nbytes = read(data->srcFD, buffer, TUNNEL_SEND_BUF_SIZE); >> + if (nbytes > 0) { >> + /* Write to dest stream */ >> + if (virStreamSend(data->st, buffer, nbytes) < 0) { >> + virReportError(errno, "%s", >> + _("tunnelled migration failed to send to virStream")); >> + goto abrt; >> + } >> + } else if (nbytes < 0) { >> + virReportError(errno, "%s", >> + _("tunnelled migration failed to read from xen side")); >> + goto abrt; >> + } else { >> + /* EOF; transferred all data */ >> + break; >> + } >> + } >> + } >> + >> + if (virStreamFinish(data->st) < 0) >> + virReportError(errno, "%s", >> + _("tunnelled migration failed to finish stream")); >> + >> + cleanup: >> + VIR_FREE(buffer); >> + >> + return; >> + >> + abrt: > Do you mean 'abort' here? Ahh, but there aren't any 'abort' labels in the > whole source code, so I assume you used it to be in line with the rest. > >> + virStreamAbort(data->st); >> + goto cleanup; >> +} >> + >> +/* This function is a simplification of virDomainMigrateVersion3Full and >> + * restricting it to migration v3 with params since it was the first to be >> + * introduced in libxl. >> */ >> static int >> libxlDoMigrateP2P(libxlDriverPrivatePtr driver, >> @@ -737,6 +903,10 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver, >> bool cancelled = true; >> virErrorPtr orig_err = NULL; >> int ret = -1; >> + /* For tunnel migration */ >> + virStreamPtr st = NULL; >> + libxlTunnelMigrationThread *libxlTunnelMigationThreadPtr = NULL; > This variable could probably be smaller, like "tnlthread". This would allow you > to avoid both the long name, plus the typo you have in variable name. > >> + int dataFD[2] = { -1, -1 }; >> >> dom_xml = libxlDomainMigrationBegin(sconn, vm, xmlin, >> &cookieout, &cookieoutlen); >> @@ -764,29 +934,62 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver, >> >> VIR_DEBUG("Prepare3"); >> virObjectUnlock(vm); >> - ret = dconn->driver->domainMigratePrepare3Params >> - (dconn, params, nparams, cookieout, cookieoutlen, NULL, NULL, &uri_out, destflags); >> + if (flags & VIR_MIGRATE_TUNNELLED) { >> + if (!(st = virStreamNew(dconn, 0))) >> + goto cleanup; >> + ret = dconn->driver->domainMigratePrepareTunnel3Params >> + (dconn, st, params, nparams, cookieout, cookieoutlen, NULL, NULL, destflags); >> + } else { >> + ret = dconn->driver->domainMigratePrepare3Params >> + (dconn, params, nparams, cookieout, cookieoutlen, NULL, NULL, &uri_out, destflags); >> + } >> virObjectLock(vm); >> >> if (ret == -1) >> goto cleanup; >> >> - if (uri_out) { >> - if (virTypedParamsReplaceString(¶ms, &nparams, >> - VIR_MIGRATE_PARAM_URI, uri_out) < 0) { >> - orig_err = virSaveLastError(); >> + if (!(flags & VIR_MIGRATE_TUNNELLED)) { >> + if (uri_out) { >> + if (virTypedParamsReplaceString(¶ms, &nparams, >> + VIR_MIGRATE_PARAM_URI, uri_out) < 0) { >> + orig_err = virSaveLastError(); >> + goto finish; >> + } >> + } else { >> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", >> + _("domainMigratePrepare3 did not set uri")); >> goto finish; >> } >> - } else { >> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", >> - _("domainMigratePrepare3 did not set uri")); >> - goto finish; >> } >> >> VIR_DEBUG("Perform3 uri=%s", NULLSTR(uri_out)); >> - ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL, >> - uri_out, NULL, flags); >> + if (flags & VIR_MIGRATE_TUNNELLED) { >> + if (VIR_ALLOC(libxlTunnelMigationThreadPtr) < 0) >> + goto cleanup; >> + if (pipe(dataFD) < 0) { >> + virReportError(errno, "%s", _("Unable to make pipes")); >> + goto cleanup; >> + } >> + /* Read from pipe */ >> + libxlTunnelMigationThreadPtr->srcFD = dataFD[0]; >> + /* Write to dest stream */ >> + libxlTunnelMigationThreadPtr->st = st; >> + if (virThreadCreate(&libxlTunnelMigationThreadPtr->thread, true, >> + libxlTunnel3MigrationFunc, >> + libxlTunnelMigationThreadPtr) < 0) { >> + virReportError(errno, "%s", >> + _("Unable to create tunnel migration thread")); >> + goto cleanup; >> + } > I still wonder how this chunk above inside VIR_MIGRATE_TUNNELLED and ... > >> >> + virObjectUnlock(vm); >> + /* Send data to pipe */ >> + ret = libxlDoMigrateSend(driver, vm, flags, dataFD[1]); >> + virObjectLock(vm); >> + } else { >> + ret = libxlDomainMigrationPerform(driver, vm, NULL, NULL, >> + uri_out, NULL, flags); >> + } >> if (ret < 0) >> orig_err = virSaveLastError(); >> >> @@ -824,6 +1027,14 @@ libxlDoMigrateP2P(libxlDriverPrivatePtr driver, >> vm->def->name); >> >> cleanup: >> + if (libxlTunnelMigationThreadPtr) { >> + virThreadCancel(&libxlTunnelMigationThreadPtr->thread); >> + VIR_FREE(libxlTunnelMigationThreadPtr); >> + } >> + VIR_FORCE_CLOSE(dataFD[0]); >> + VIR_FORCE_CLOSE(dataFD[1]); > ... and this one could be turned into helpers like libxlMigrationStartTunnel and > libxlMigrationStopTunnel? The latter you probably add a dstFD such that you can > also cleanup both descriptors in StopTunnel. I sort of understand the way you > propose is to consolidate cleanup. But it would help the reader of the P2P > migration function (which is considerably big) to move the big portions of > tunnel-specific codepaths into helpers, such that the migration flow is clearer. > This is just a suggestion though, unless others feel strongly about it. > > One other issue is that virThreadCancel is asynchronous and then you close the > file descriptors after requesting the cancellation request, which sounds > erroneous prone to me IIUC. It's not guaranteed the thread has closed but that > after returning the request for canceling was successful. So probably you would > need a virThreadJoin after that, such that you can actually know the thread > really has cancelled? Then it's probably safe to close both descriptors > afterwards. Qemu driver also seems to write a single byte signalling the Tunnel > thread to abort/finish. > -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list