On 19.01.2015 22:58, John Ferlan wrote: > > > On 01/09/2015 07:22 AM, Michal Privoznik wrote: >> There's one bug that got more visible with my patches that >> automatically precreate storage on migration. The problem is, if >> there's not enough disk space on destination, we successfully finish >> the migration instead of failing. >> >> Firstly, we instruct qemu to copy the storage. And as it copies the >> data, one write() will return -ENOSPC, eventually, to which qemu >> reacts by emitting BLOCK_JOB_ERROR. The event is successfully ignored >> by us. Then, since the block job has finished, BLOCK_JOB_COMPLETED >> event is emitted too. >> >> Secondly, we are not checking for the block job completion as we >> should. Currently, we do the check by issuing 'query-block-jobs' and >> then looking in the command's output for the job we are interested in. >> If course, we don't fail if the job is not there, in which case the > > s/If/Of > >> number of total bytes to be transferred and bytes already transferred, >> well they equal both to zero. This is actually the line causing the > > s/well they equal both to zero/will both be equal to zero. > >> bug as it sees both numbers equal to each other and gives green light >> to the actual migration. >> >> The fix consist of two parts: >> >> In the first, code handling BLOCK_JOB_ERROR is added. It's a monitor >> event and we should have handler for that anyway. >> >> In the second part, the code doing drive mirroring is rewritten so it >> waits for the events instead of querying on the monitor repeatedly. >> >> Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> >> --- >> src/conf/domain_conf.c | 3 ++- >> src/conf/domain_conf.h | 1 + >> src/qemu/qemu_migration.c | 25 +++++++------------------ >> src/qemu/qemu_monitor_json.c | 10 ++++++++++ >> src/qemu/qemu_process.c | 14 ++++++++++++++ >> 5 files changed, 34 insertions(+), 19 deletions(-) >> > > The concept seems reasonable - of course the code has change since you > first sent this, so a rebase will be necessary... > >> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c >> index d1a483a..751a9b5 100644 >> --- a/src/conf/domain_conf.c >> +++ b/src/conf/domain_conf.c >> @@ -787,7 +787,8 @@ VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST, >> "none", >> "yes", >> "abort", >> - "pivot") >> + "pivot", >> + "error") >> >> VIR_ENUM_IMPL(virDomainLoader, >> VIR_DOMAIN_LOADER_TYPE_LAST, >> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h >> index ac1f4f8..f18fa80 100644 >> --- a/src/conf/domain_conf.h >> +++ b/src/conf/domain_conf.h >> @@ -654,6 +654,7 @@ typedef enum { >> VIR_DOMAIN_DISK_MIRROR_STATE_READY, /* Job in second phase */ >> VIR_DOMAIN_DISK_MIRROR_STATE_ABORT, /* Job aborted, waiting for event */ >> VIR_DOMAIN_DISK_MIRROR_STATE_PIVOT, /* Job pivoted, waiting for event */ >> + VIR_DOMAIN_DISK_MIRROR_STATE_ERROR, /* Job failed */ >> >> VIR_DOMAIN_DISK_MIRROR_STATE_LAST >> } virDomainDiskMirrorState; >> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c >> index 77e0b35..e2309ea 100644 >> --- a/src/qemu/qemu_migration.c >> +++ b/src/qemu/qemu_migration.c >> @@ -1743,7 +1743,6 @@ qemuMigrationDriveMirror(virQEMUDriverPtr driver, >> >> for (i = 0; i < vm->def->ndisks; i++) { >> virDomainDiskDefPtr disk = vm->def->disks[i]; >> - virDomainBlockJobInfo info; >> >> /* skip shared, RO and source-less disks */ >> if (disk->src->shared || disk->src->readonly || >> @@ -1768,6 +1767,7 @@ qemuMigrationDriveMirror(virQEMUDriverPtr driver, >> if (mon_ret < 0) >> goto error; >> >> + disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_COPY; > > I assume this is how we "ensure" we get into the else of > qemuProcessHandleBlockJob. Exactly. > Is setting this outside the context of this > bugfix fixing a bug? That is should it have been set all along? Curious > mostly. Well, it wouldn't hurt anything it it was set. On the other hand, it wouldn't help anything either. > >> lastGood = i; >> >> /* wait for completion */ >> @@ -1775,38 +1775,27 @@ qemuMigrationDriveMirror(virQEMUDriverPtr driver, >> /* Poll every 500ms for progress & to allow cancellation */ >> struct timespec ts = { .tv_sec = 0, .tv_nsec = 500 * 1000 * 1000ull }; >> >> - memset(&info, 0, sizeof(info)); >> - >> - if (qemuDomainObjEnterMonitorAsync(driver, vm, >> - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) >> - goto error; >> if (priv->job.asyncAbort) { >> /* explicitly do this *after* we entered the monitor, >> * as this is a critical section so we are guaranteed >> * priv->job.asyncAbort will not change */ > > So, since we're not entering the monitor any more - does the text need > to change? Oh, right. > Also, is it 'safe' to look at priv->job.current->type? Or > even priv->job.asyncAbort since we won't have a monitor lock? I think it is. If you look at qemuDomainAbortJob() impl, there's qemuDomainObjAbortAsyncJob() called with only @vm lock held, outside of Enter/ExitMonitor(). And the qemuDomainObjAbortAsyncJob() just just a wrapper over direct access to priv->job.asyncAbort. > > Considering the recent series on ExitMonitor error checks vis-a-vis the > vm dying during some monitor interaction could this end up being an > infinite loop? That is would the necessary mirrorState be set in the > case where the vm dies? Interesting. I don't think that can happen. If vm dies, disk->mirrorState should go to error state. > >> - qemuDomainObjExitMonitor(driver, vm); >> priv->job.current->type = VIR_DOMAIN_JOB_CANCELLED; >> virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"), >> qemuDomainAsyncJobTypeToString(priv->job.asyncJob), >> _("canceled by client")); > > s/canceled/cancelled > > (I see there's also another place in qemuMigrationUpdateJobStatus as > well as libvirt-domain and virsh* modules, sigh) > >> goto error; >> } >> - mon_ret = qemuMonitorBlockJobInfo(priv->mon, diskAlias, &info, >> - NULL); >> - qemuDomainObjExitMonitor(driver, vm); >> >> - if (mon_ret < 0) >> - goto error; >> - >> - if (info.cur == info.end) { >> + if (disk->mirrorState == VIR_DOMAIN_DISK_MIRROR_STATE_READY) { >> VIR_DEBUG("Drive mirroring of '%s' completed", diskAlias); >> break; >> + } else if (disk->mirrorState == VIR_DOMAIN_DISK_MIRROR_STATE_ERROR) { >> + virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"), >> + qemuDomainAsyncJobTypeToString(priv->job.asyncJob), >> + _("canceled by destination")); > > s/canceled/cancelled > >> + goto error; >> } >> >> - /* XXX Frankly speaking, we should listen to the events, >> - * instead of doing this. But this works for now and we >> - * are doing something similar in migration itself anyway */ >> - >> virObjectUnlock(vm); >> >> nanosleep(&ts, NULL); >> diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c >> index e567aa7..5d6bbca 100644 >> --- a/src/qemu/qemu_monitor_json.c >> +++ b/src/qemu/qemu_monitor_json.c >> @@ -76,6 +76,7 @@ static void qemuMonitorJSONHandlePMWakeup(qemuMonitorPtr mon, virJSONValuePtr da >> static void qemuMonitorJSONHandlePMSuspend(qemuMonitorPtr mon, virJSONValuePtr data); >> static void qemuMonitorJSONHandleBlockJobCompleted(qemuMonitorPtr mon, virJSONValuePtr data); >> static void qemuMonitorJSONHandleBlockJobCanceled(qemuMonitorPtr mon, virJSONValuePtr data); >> +static void qemuMonitorJSONHandleBlockJobError(qemuMonitorPtr mon, virJSONValuePtr data); >> static void qemuMonitorJSONHandleBlockJobReady(qemuMonitorPtr mon, virJSONValuePtr data); >> static void qemuMonitorJSONHandleBalloonChange(qemuMonitorPtr mon, virJSONValuePtr data); >> static void qemuMonitorJSONHandlePMSuspendDisk(qemuMonitorPtr mon, virJSONValuePtr data); >> @@ -94,6 +95,7 @@ static qemuEventHandler eventHandlers[] = { >> { "BLOCK_IO_ERROR", qemuMonitorJSONHandleIOError, }, >> { "BLOCK_JOB_CANCELLED", qemuMonitorJSONHandleBlockJobCanceled, }, >> { "BLOCK_JOB_COMPLETED", qemuMonitorJSONHandleBlockJobCompleted, }, >> + { "BLOCK_JOB_ERROR", qemuMonitorJSONHandleBlockJobError, }, >> { "BLOCK_JOB_READY", qemuMonitorJSONHandleBlockJobReady, }, >> { "DEVICE_DELETED", qemuMonitorJSONHandleDeviceDeleted, }, >> { "DEVICE_TRAY_MOVED", qemuMonitorJSONHandleTrayChange, }, >> @@ -842,6 +844,14 @@ qemuMonitorJSONHandleBlockJobCanceled(qemuMonitorPtr mon, >> } >> >> static void >> +qemuMonitorJSONHandleBlockJobError(qemuMonitorPtr mon, >> + virJSONValuePtr data) >> +{ >> + qemuMonitorJSONHandleBlockJobImpl(mon, data, >> + VIR_DOMAIN_BLOCK_JOB_FAILED); >> +} >> + >> +static void >> qemuMonitorJSONHandleBlockJobReady(qemuMonitorPtr mon, >> virJSONValuePtr data) >> { >> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c >> index c18204b..b29ecf1 100644 >> --- a/src/qemu/qemu_process.c >> +++ b/src/qemu/qemu_process.c >> @@ -1110,6 +1110,20 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED, >> disk->mirrorJob = VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN; >> save = true; >> } >> + } else if (disk->mirrorJob == VIR_DOMAIN_BLOCK_JOB_TYPE_COPY) { >> + switch ((virConnectDomainEventBlockJobStatus) status) { >> + case VIR_DOMAIN_BLOCK_JOB_READY: >> + case VIR_DOMAIN_BLOCK_JOB_COMPLETED: > > "theoretically" if status == COMPLETED we shouldn't be here either since > that the first "if" checks that... > >> + disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_READY; >> + break; >> + case VIR_DOMAIN_BLOCK_JOB_CANCELED: >> + case VIR_DOMAIN_BLOCK_JOB_FAILED: >> + disk->mirrorState = VIR_DOMAIN_DISK_MIRROR_STATE_ERROR; >> + break; >> + case VIR_DOMAIN_BLOCK_JOB_LAST: >> + VIR_DEBUG("should not get here"); >> + break; > > Not that we'd get here, but what effect is not setting mirrorState > > Is a switch overkill in this instance? I bet compiler will evaluate what's best and generate efficient code. Michal -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list