From: "Michael R. Hines" <mrhines@xxxxxxxxxx> RDMA Live migration requires registering memory with the hardware, and thus QEMU offers a new 'capability' which supports the ability to pre-register / mlock() the guest memory in advance for higher RDMA performance before the migration begins. This patch exposes this capability with the following example usage: virsh migrate --live --rdma-pin-all --migrateuri rdma:hostname domain qemu+ssh://hostname/system This capability is disabled by default, and thus ommiting it will cause QEMU to register the memory with the hardware in an on-demand basis. Signed-off-by: Michael R. Hines <mrhines@xxxxxxxxxx> --- include/libvirt/libvirt.h.in | 1 + src/qemu/qemu_migration.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_migration.h | 3 ++- src/qemu/qemu_monitor.c | 2 +- src/qemu/qemu_monitor.h | 1 + tools/virsh-domain.c | 7 +++++ 6 files changed, 76 insertions(+), 2 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 5ac2694..476521b 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -1192,6 +1192,7 @@ typedef enum { VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */ VIR_MIGRATE_COMPRESSED = (1 << 11), /* compress data during migration */ VIR_MIGRATE_ABORT_ON_ERROR = (1 << 12), /* abort migration on I/O errors happened during migration */ + VIR_MIGRATE_RDMA_PIN_ALL = (1 << 13), /* RDMA memory pinning */ } virDomainMigrateFlags; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 1e0f538..f4358ba 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1566,6 +1566,46 @@ cleanup: } static int +qemuMigrationSetPinAll(virQEMUDriverPtr driver, + virDomainObjPtr vm, + enum qemuDomainAsyncJob job) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int ret; + + if (qemuDomainObjEnterMonitorAsync(driver, vm, job) < 0) + return -1; + + ret = qemuMonitorGetMigrationCapability( + priv->mon, + QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL); + + if (ret < 0) { + goto cleanup; + } else if (ret == 0) { + if (job == QEMU_ASYNC_JOB_MIGRATION_IN) { + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", + _("rdma pinning migration is not supported by " + "target QEMU binary")); + } else { + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", + _("rdma pinning migration is not supported by " + "source QEMU binary")); + } + ret = -1; + goto cleanup; + } + + ret = qemuMonitorSetMigrationCapability( + priv->mon, + QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL); + +cleanup: + qemuDomainObjExitMonitor(driver, vm); + return ret; +} + +static int qemuMigrationWaitForSpice(virQEMUDriverPtr driver, virDomainObjPtr vm) { @@ -2395,6 +2435,18 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver, QEMU_ASYNC_JOB_MIGRATION_IN) < 0) goto stop; + if (flags & VIR_MIGRATE_RDMA_PIN_ALL && + qemuMigrationSetPinAll(driver, vm, + QEMU_ASYNC_JOB_MIGRATION_IN) < 0) + goto stop; + + if (strstr(protocol, "rdma")) { + unsigned long long memKB = vm->def->mem.hard_limit ? + vm->def->mem.hard_limit : + vm->def->mem.max_balloon + 1024 * 1024; + virProcessSetMaxMemLock(vm->pid, memKB * 3); + } + if (mig->lockState) { VIR_DEBUG("Received lockstate %s", mig->lockState); VIR_FREE(priv->lockState); @@ -3209,6 +3261,11 @@ qemuMigrationRun(virQEMUDriverPtr driver, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) goto cleanup; + if (flags & VIR_MIGRATE_RDMA_PIN_ALL && + qemuMigrationSetPinAll(driver, vm, + QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) + goto cleanup; + if (qemuDomainObjEnterMonitorAsync(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) goto cleanup; @@ -3238,6 +3295,13 @@ qemuMigrationRun(virQEMUDriverPtr driver, switch (spec->destType) { case MIGRATION_DEST_HOST: + if (strstr(spec->dest.host.proto, "rdma")) { + unsigned long long memKB = vm->def->mem.hard_limit ? + vm->def->mem.hard_limit : + vm->def->mem.max_balloon + 1024 * 1024; + virProcessSetMaxMemLock(vm->pid, memKB * 3); + } + ret = qemuMonitorMigrateToHost(priv->mon, migrate_flags, spec->dest.host.proto, spec->dest.host.name, diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index cafa2a2..a76aaef 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -39,7 +39,8 @@ VIR_MIGRATE_UNSAFE | \ VIR_MIGRATE_OFFLINE | \ VIR_MIGRATE_COMPRESSED | \ - VIR_MIGRATE_ABORT_ON_ERROR) + VIR_MIGRATE_ABORT_ON_ERROR | \ + VIR_MIGRATE_RDMA_PIN_ALL) /* All supported migration parameters and their types. */ # define QEMU_MIGRATION_PARAMETERS \ diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 5a450e2..86bffaa 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -118,7 +118,7 @@ VIR_ENUM_IMPL(qemuMonitorMigrationStatus, VIR_ENUM_IMPL(qemuMonitorMigrationCaps, QEMU_MONITOR_MIGRATION_CAPS_LAST, - "xbzrle") + "xbzrle", "rdma-pin-all") VIR_ENUM_IMPL(qemuMonitorVMStatus, QEMU_MONITOR_VM_STATUS_LAST, diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h index 16b0b77..a8b1cc6 100644 --- a/src/qemu/qemu_monitor.h +++ b/src/qemu/qemu_monitor.h @@ -452,6 +452,7 @@ int qemuMonitorGetSpiceMigrationStatus(qemuMonitorPtr mon, typedef enum { QEMU_MONITOR_MIGRATION_CAPS_XBZRLE, + QEMU_MONITOR_MIGRATION_CAPS_RDMA_PIN_ALL, QEMU_MONITOR_MIGRATION_CAPS_LAST } qemuMonitorMigrationCaps; diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index 1fe138c..31df7f6 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -8532,6 +8532,10 @@ static const vshCmdOptDef opts_migrate[] = { .type = VSH_OT_BOOL, .help = N_("compress repeated pages during live migration") }, + {.name = "rdma-pin-all", + .type = VSH_OT_BOOL, + .help = N_("support memory pinning during RDMA live migration") + }, {.name = "abort-on-error", .type = VSH_OT_BOOL, .help = N_("abort on soft errors during migration") @@ -8676,6 +8680,9 @@ doMigrate(void *opaque) if (vshCommandOptBool(cmd, "compressed")) flags |= VIR_MIGRATE_COMPRESSED; + if (vshCommandOptBool(cmd, "rdma-pin-all")) + flags |= VIR_MIGRATE_RDMA_PIN_ALL; + if (vshCommandOptBool(cmd, "offline")) { flags |= VIR_MIGRATE_OFFLINE; } -- 1.8.1.2 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list