From: Federico Simoncelli <fsimonce@xxxxxxxxxx> Originally most of libvirt domain-specific calls were blocking during a migration. A new mechanism to allow specific calls (blkstat/blkinfo) to be executed in such condition has been implemented. In the long term it'd be desirable to get a more general solution to mark further APIs as migration safe, without needing special case code. * src/qemu/qemu_migration.c: add some additional job signal flags for doing blkstat/blkinfo during a migration * src/qemu/qemu_domain.c: add a condition variable that can be used to efficiently wait for the migration code to clear the signal flag * src/qemu/qemu_driver.c: execute blkstat/blkinfo using the job signal flags during migration --- My changes in v4: Fix the mailmap (elided from this mail) for Federico's preferred address. Add cleanup for jobCond. Defer changes to jobSignals until after jobSignalData is stable. Alter qemuMigrationProcessJobSignals to have a mode of operation that guarantees that a jobSignals bit is cleared, regardless of other errors. .mailmap | 1 + AUTHORS | 2 +- src/qemu/qemu_domain.c | 14 ++++++ src/qemu/qemu_domain.h | 9 ++++ src/qemu/qemu_driver.c | 107 +++++++++++++++++++++++++++++++------------- src/qemu/qemu_migration.c | 76 +++++++++++++++++++++++++------- 6 files changed, 159 insertions(+), 50 deletions(-) diff --git a/.mailmap b/.mailmap index f73e26b..2b98322 100644 --- a/.mailmap +++ b/.mailmap diff --git a/AUTHORS b/AUTHORS index 1bb1f0f..47860a9 100644 --- a/AUTHORS +++ b/AUTHORS diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index bcacb18..8f4915c 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -89,7 +89,19 @@ static void *qemuDomainObjPrivateAlloc(void) if (VIR_ALLOC(priv) < 0) return NULL; + if (virCondInit(&priv->jobCond) < 0) + goto initfail; + + if (virCondInit(&priv->signalCond) < 0) { + ignore_value(virCondDestroy(&priv->jobCond)); + goto initfail; + } + return priv; + +initfail: + VIR_FREE(priv); + return NULL; } static void qemuDomainObjPrivateFree(void *data) @@ -101,6 +113,8 @@ static void qemuDomainObjPrivateFree(void *data) qemuDomainPCIAddressSetFree(priv->pciaddrs); virDomainChrSourceDefFree(priv->monConfig); VIR_FREE(priv->vcpupids); + ignore_value(virCondDestroy(&priv->jobCond)); + ignore_value(virCondDestroy(&priv->signalCond)); /* This should never be non-NULL if we get here, but just in case... */ if (priv->mon) { diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 6d24f53..af513e7 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -47,11 +47,19 @@ enum qemuDomainJobSignals { QEMU_JOB_SIGNAL_SUSPEND = 1 << 1, /* Request VM suspend to finish live migration offline */ QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME = 1 << 2, /* Request migration downtime change */ QEMU_JOB_SIGNAL_MIGRATE_SPEED = 1 << 3, /* Request migration speed change */ + QEMU_JOB_SIGNAL_BLKSTAT = 1 << 4, /* Request blkstat during migration */ + QEMU_JOB_SIGNAL_BLKINFO = 1 << 5, /* Request blkinfo during migration */ }; struct qemuDomainJobSignalsData { unsigned long long migrateDowntime; /* Data for QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME */ unsigned long migrateBandwidth; /* Data for QEMU_JOB_SIGNAL_MIGRATE_SPEED */ + char *statDevName; /* Device name used by blkstat calls */ + virDomainBlockStatsPtr blockStat; /* Block statistics for QEMU_JOB_SIGNAL_BLKSTAT */ + int statRetCode; /* Return code for the blkstat calls */ + char *infoDevName; /* Device name used by blkinfo calls */ + virDomainBlockInfoPtr blockInfo; /* Block information for QEMU_JOB_SIGNAL_BLKINFO */ + int infoRetCode; /* Return code for the blkinfo calls */ }; typedef struct _qemuDomainPCIAddressSet qemuDomainPCIAddressSet; @@ -61,6 +69,7 @@ typedef struct _qemuDomainObjPrivate qemuDomainObjPrivate; typedef qemuDomainObjPrivate *qemuDomainObjPrivatePtr; struct _qemuDomainObjPrivate { virCond jobCond; /* Use in conjunction with main virDomainObjPtr lock */ + virCond signalCond; /* Use to coordinate the safe queries during migration */ enum qemuDomainJob jobActive; /* Currently running job */ unsigned int jobSignals; /* Signals for running job */ struct qemuDomainJobSignalsData jobSignalsData; /* Signal specific data */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index ccaae66..2bd4d0b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5205,15 +5205,6 @@ qemudDomainBlockStats (virDomainPtr dom, goto cleanup; } - if (qemuDomainObjBeginJob(vm) < 0) - goto cleanup; - - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_OPERATION_INVALID, - "%s", _("domain is not running")); - goto endjob; - } - for (i = 0 ; i < vm->def->ndisks ; i++) { if (STREQ(path, vm->def->disks[i]->dst)) { disk = vm->def->disks[i]; @@ -5224,29 +5215,57 @@ qemudDomainBlockStats (virDomainPtr dom, if (!disk) { qemuReportError(VIR_ERR_INVALID_ARG, _("invalid path: %s"), path); - goto endjob; + goto cleanup; } if (!disk->info.alias) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("missing disk device alias name for %s"), disk->dst); - goto endjob; + goto cleanup; } priv = vm->privateData; - qemuDomainObjEnterMonitor(vm); - ret = qemuMonitorGetBlockStatsInfo(priv->mon, - disk->info.alias, - &stats->rd_req, - &stats->rd_bytes, - &stats->wr_req, - &stats->wr_bytes, - &stats->errs); - qemuDomainObjExitMonitor(vm); + if ((priv->jobActive == QEMU_JOB_MIGRATION_OUT) + || (priv->jobActive == QEMU_JOB_SAVE)) { + virDomainObjRef(vm); + while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKSTAT) + ignore_value(virCondWait(&priv->signalCond, &vm->lock)); + + priv->jobSignalsData.statDevName = disk->info.alias; + priv->jobSignalsData.blockStat = stats; + priv->jobSignalsData.statRetCode = -1; + priv->jobSignals |= QEMU_JOB_SIGNAL_BLKSTAT; + + while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKSTAT) + ignore_value(virCondWait(&priv->signalCond, &vm->lock)); + + ret = priv->jobSignalsData.statRetCode; + if (virDomainObjUnref(vm) == 0) + vm = NULL; + } else { + if (qemuDomainObjBeginJob(vm) < 0) + goto cleanup; + + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto endjob; + } + + qemuDomainObjEnterMonitor(vm); + ret = qemuMonitorGetBlockStatsInfo(priv->mon, + disk->info.alias, + &stats->rd_req, + &stats->rd_bytes, + &stats->wr_req, + &stats->wr_bytes, + &stats->errs); + qemuDomainObjExitMonitor(vm); endjob: - if (qemuDomainObjEndJob(vm) == 0) - vm = NULL; + if (qemuDomainObjEndJob(vm) == 0) + vm = NULL; + } cleanup: if (vm) @@ -5650,20 +5669,44 @@ static int qemuDomainGetBlockInfo(virDomainPtr dom, format != VIR_STORAGE_FILE_RAW && S_ISBLK(sb.st_mode)) { qemuDomainObjPrivatePtr priv = vm->privateData; - if (qemuDomainObjBeginJob(vm) < 0) - goto cleanup; - if (!virDomainObjIsActive(vm)) - ret = 0; - else { + + if ((priv->jobActive == QEMU_JOB_MIGRATION_OUT) + || (priv->jobActive == QEMU_JOB_SAVE)) { + virDomainObjRef(vm); + while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKINFO) + ignore_value(virCondWait(&priv->signalCond, &vm->lock)); + + priv->jobSignalsData.infoDevName = disk->info.alias; + priv->jobSignalsData.blockInfo = info; + priv->jobSignalsData.infoRetCode = -1; + priv->jobSignals |= QEMU_JOB_SIGNAL_BLKINFO; + + while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKINFO) + ignore_value(virCondWait(&priv->signalCond, &vm->lock)); + + ret = priv->jobSignalsData.infoRetCode; + if (virDomainObjUnref(vm) == 0) + vm = NULL; + } else { + if (qemuDomainObjBeginJob(vm) < 0) + goto cleanup; + + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto endjob; + } + qemuDomainObjEnterMonitor(vm); ret = qemuMonitorGetBlockExtent(priv->mon, disk->info.alias, &info->allocation); qemuDomainObjExitMonitor(vm); - } - if (qemuDomainObjEndJob(vm) == 0) - vm = NULL; +endjob: + if (qemuDomainObjEndJob(vm) == 0) + vm = NULL; + } } else { ret = 0; } @@ -6543,8 +6586,8 @@ qemuDomainMigrateSetMaxDowntime(virDomainPtr dom, } VIR_DEBUG("Requesting migration downtime change to %llums", downtime); - priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME; priv->jobSignalsData.migrateDowntime = downtime; + priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME; ret = 0; cleanup: @@ -6592,8 +6635,8 @@ qemuDomainMigrateSetMaxSpeed(virDomainPtr dom, } VIR_DEBUG("Requesting migration speed change to %luMbs", bandwidth); - priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_SPEED; priv->jobSignalsData.migrateBandwidth = bandwidth; + priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_SPEED; ret = 0; cleanup: diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index cb408ac..a091e2a 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -586,7 +586,8 @@ qemuMigrationSetOffline(struct qemud_driver *driver, static int qemuMigrationProcessJobSignals(struct qemud_driver *driver, virDomainObjPtr vm, - const char *job) + const char *job, + bool cleanup) { qemuDomainObjPrivatePtr priv = vm->privateData; int ret = -1; @@ -594,6 +595,11 @@ qemuMigrationProcessJobSignals(struct qemud_driver *driver, if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"), job, _("guest unexpectedly quit")); + if (cleanup) { + priv->jobSignals = 0; + priv->jobSignalsData.statRetCode = -1; + priv->jobSignalsData.infoRetCode = -1; + } return -1; } @@ -633,6 +639,34 @@ qemuMigrationProcessJobSignals(struct qemud_driver *driver, qemuDomainObjExitMonitorWithDriver(driver, vm); if (ret < 0) VIR_WARN("Unable to set migration speed"); + } else if (priv->jobSignals & QEMU_JOB_SIGNAL_BLKSTAT) { + qemuDomainObjEnterMonitorWithDriver(driver, vm); + ret = qemuMonitorGetBlockStatsInfo(priv->mon, + priv->jobSignalsData.statDevName, + &priv->jobSignalsData.blockStat->rd_req, + &priv->jobSignalsData.blockStat->rd_bytes, + &priv->jobSignalsData.blockStat->wr_req, + &priv->jobSignalsData.blockStat->wr_bytes, + &priv->jobSignalsData.blockStat->errs); + qemuDomainObjExitMonitorWithDriver(driver, vm); + + priv->jobSignalsData.statRetCode = ret; + priv->jobSignals ^= QEMU_JOB_SIGNAL_BLKSTAT; + + if (ret < 0) + VIR_WARN("Unable to get block statistics"); + } else if (priv->jobSignals & QEMU_JOB_SIGNAL_BLKINFO) { + qemuDomainObjEnterMonitorWithDriver(driver, vm); + ret = qemuMonitorGetBlockExtent(priv->mon, + priv->jobSignalsData.infoDevName, + &priv->jobSignalsData.blockInfo->allocation); + qemuDomainObjExitMonitorWithDriver(driver, vm); + + priv->jobSignalsData.infoRetCode = ret; + priv->jobSignals ^= QEMU_JOB_SIGNAL_BLKINFO; + + if (ret < 0) + VIR_WARN("Unable to get block information"); } else { ret = 0; } @@ -726,30 +760,33 @@ int qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + const char *job; + + switch (priv->jobActive) { + case QEMU_JOB_MIGRATION_OUT: + job = _("migration job"); + break; + case QEMU_JOB_SAVE: + job = _("domain save job"); + break; + case QEMU_JOB_DUMP: + job = _("domain core dump job"); + break; + default: + job = _("job"); + } priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; while (priv->jobInfo.type == VIR_DOMAIN_JOB_UNBOUNDED) { /* Poll every 50ms for progress & to allow cancellation */ struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull }; - const char *job; - - switch (priv->jobActive) { - case QEMU_JOB_MIGRATION_OUT: - job = _("migration job"); - break; - case QEMU_JOB_SAVE: - job = _("domain save job"); - break; - case QEMU_JOB_DUMP: - job = _("domain core dump job"); - break; - default: - job = _("job"); + while (priv->jobSignals) { + if (qemuMigrationProcessJobSignals(driver, vm, job, false) < 0) + goto cleanup; } - if (qemuMigrationProcessJobSignals(driver, vm, job) < 0) - goto cleanup; + virCondSignal(&priv->signalCond); if (qemuMigrationUpdateJobStatus(driver, vm, job) < 0) goto cleanup; @@ -765,6 +802,11 @@ qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm) } cleanup: + while (priv->jobSignals) { + qemuMigrationProcessJobSignals(driver, vm, job, true); + } + virCondBroadcast(&priv->signalCond); + if (priv->jobInfo.type == VIR_DOMAIN_JOB_COMPLETED) return 0; else -- 1.7.4.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list