On Mon, Jan 13, 2014 at 14:28:12 +0800, mrhines@xxxxxxxxxxxxxxxxxx wrote: > From: "Michael R. Hines" <mrhines@xxxxxxxxxx> > > RDMA Live migration requires registering memory with the hardware, Hmm, I forgot to ask when I was reviewing the previous patch but does any of this RDMA migration functionality require special privileges or is any unprivileged process able to use RDMA? > and thus QEMU offers a new 'capability' which supports the ability > to pre-register / mlock() the guest memory in advance for higher > RDMA performance before the migration begins. > > This patch exposes this capability with the following example usage: > > virsh migrate --live --rdma-pin-all --migrateuri rdma:hostname domain qemu+ssh://hostname/system The URI should be rdma://hostname > This capability is disabled by default, and thus ommiting it will > cause QEMU to register the memory with the hardware in an on-demand basis. > > Signed-off-by: Michael R. Hines <mrhines@xxxxxxxxxx> > --- > include/libvirt/libvirt.h.in | 1 + > src/qemu/qemu_migration.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ > src/qemu/qemu_migration.h | 3 ++- > src/qemu/qemu_monitor.c | 2 +- > src/qemu/qemu_monitor.h | 1 + > tools/virsh-domain.c | 7 +++++ > 6 files changed, 76 insertions(+), 2 deletions(-) > > diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in > index 5ac2694..476521b 100644 > --- a/include/libvirt/libvirt.h.in > +++ b/include/libvirt/libvirt.h.in > @@ -1192,6 +1192,7 @@ typedef enum { > VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */ > VIR_MIGRATE_COMPRESSED = (1 << 11), /* compress data during migration */ > VIR_MIGRATE_ABORT_ON_ERROR = (1 << 12), /* abort migration on I/O errors happened during migration */ > + VIR_MIGRATE_RDMA_PIN_ALL = (1 << 13), /* RDMA memory pinning */ > } virDomainMigrateFlags; > > > diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c > index 1e0f538..f4358ba 100644 > --- a/src/qemu/qemu_migration.c > +++ b/src/qemu/qemu_migration.c > @@ -1566,6 +1566,46 @@ cleanup: > } > > static int > +qemuMigrationSetPinAll(virQEMUDriverPtr driver, > + virDomainObjPtr vm, > + enum qemuDomainAsyncJob job) Bad indentation. > +{ > + qemuDomainObjPrivatePtr priv = vm->privateData; > + int ret; > + > + if (qemuDomainObjEnterMonitorAsync(driver, vm, job) < 0) > + return -1; > + > + ret = qemuMonitorGetMigrationCapability( > + priv->mon, > + QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL); Is this capability always present when QEMU supports RDMA migration? If so, it could be used when we check if QEMU supports RDMA migration. > + > + if (ret < 0) { > + goto cleanup; > + } else if (ret == 0) { > + if (job == QEMU_ASYNC_JOB_MIGRATION_IN) { > + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", > + _("rdma pinning migration is not supported by " > + "target QEMU binary")); > + } else { > + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", > + _("rdma pinning migration is not supported by " > + "source QEMU binary")); > + } > + ret = -1; > + goto cleanup; > + } > + > + ret = qemuMonitorSetMigrationCapability( > + priv->mon, > + QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL); > + > +cleanup: > + qemuDomainObjExitMonitor(driver, vm); > + return ret; > +} > + > +static int > qemuMigrationWaitForSpice(virQEMUDriverPtr driver, > virDomainObjPtr vm) > { > @@ -2395,6 +2435,18 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver, > QEMU_ASYNC_JOB_MIGRATION_IN) < 0) > goto stop; > > + if (flags & VIR_MIGRATE_RDMA_PIN_ALL && > + qemuMigrationSetPinAll(driver, vm, > + QEMU_ASYNC_JOB_MIGRATION_IN) < 0) Indentation. And the flag should be rejected when protocol is not rdma. > + goto stop; > + > + if (strstr(protocol, "rdma")) { I think we don't want to match just a part of the protocol; use STREQ() instead. > + unsigned long long memKB = vm->def->mem.hard_limit ? > + vm->def->mem.hard_limit : > + vm->def->mem.max_balloon + 1024 * 1024; > + virProcessSetMaxMemLock(vm->pid, memKB * 3); Apart from weird indentation of the virProcessSetMaxMemLock line, there are several issues here: - this code should be moved inside qemuMigrationSetPinAll and done only if VIR_MIGRATE_RDMA_PIN_ALL flag was used. - virProcessSetMaxMemLock wants the value in bytes, thus memKB should rather by multiplied by 1024. - what memory is going to be mlock()ed with rdma-pin-all? Is it going to be all memory allocated by QEMU or just the domain's memory? If it's all QEMU memory, we already painfully found out it's impossible to automatically compute how much memory QEMU consumes in addition to the configured domain's memory and I think a better approach would be to just migration with rdma-pin-all unless hard_limit is specified. > + } > + > if (mig->lockState) { > VIR_DEBUG("Received lockstate %s", mig->lockState); > VIR_FREE(priv->lockState); > @@ -3209,6 +3261,11 @@ qemuMigrationRun(virQEMUDriverPtr driver, > QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) > goto cleanup; > > + if (flags & VIR_MIGRATE_RDMA_PIN_ALL && > + qemuMigrationSetPinAll(driver, vm, > + QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) > + goto cleanup; > + > if (qemuDomainObjEnterMonitorAsync(driver, vm, > QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) > goto cleanup; Same comments as for the equivalent hunk in qemuMigrationPrepareAny. > @@ -3238,6 +3295,13 @@ qemuMigrationRun(virQEMUDriverPtr driver, > > switch (spec->destType) { > case MIGRATION_DEST_HOST: > + if (strstr(spec->dest.host.proto, "rdma")) { > + unsigned long long memKB = vm->def->mem.hard_limit ? > + vm->def->mem.hard_limit : > + vm->def->mem.max_balloon + 1024 * 1024; > + virProcessSetMaxMemLock(vm->pid, memKB * 3); > + } > + > ret = qemuMonitorMigrateToHost(priv->mon, migrate_flags, > spec->dest.host.proto, > spec->dest.host.name, Same comments as for the equivalent hunk in qemuMigrationPrepareAny. Jirka -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list