On Thu, Jan 05, 2023 at 12:47:04 +0100, Pavel Hrdina wrote: > If the daemon crashes or is restarted while the snapshot delete is in > progress we have to handle it gracefully to not leave any block jobs > active. > > For now we will simply abort the snapshot delete operation so user can > start it again. We need to refuse deleting external snapshots if there > is already another active job as we would have to figure out which jobs > we can abort. > > Signed-off-by: Pavel Hrdina <phrdina@xxxxxxxxxx> > Reviewed-by: Peter Krempa <pkrempa@xxxxxxxxxx> > --- > src/qemu/qemu_process.c | 32 ++++++++++++++++++++++++++++++++ > src/qemu/qemu_snapshot.c | 7 +++++++ > 2 files changed, 39 insertions(+) > > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > index b6adcf2f2a..09ab231aaa 100644 > --- a/src/qemu/qemu_process.c > +++ b/src/qemu/qemu_process.c > @@ -3677,6 +3677,37 @@ qemuProcessRecoverMigration(virQEMUDriver *driver, > } > > > +static void > +qemuProcessAbortSnapshotDelete(virDomainObj *vm) > +{ > + size_t i; > + qemuDomainObjPrivate *priv = vm->privateData; > + > + for (i = 0; i < vm->def->ndisks; i++) { > + virDomainDiskDef *disk = vm->def->disks[i]; > + g_autoptr(qemuBlockJobData) diskJob = qemuBlockJobDiskGetJob(disk); > + > + if (!diskJob) > + continue; > + > + if (diskJob->type != QEMU_BLOCKJOB_TYPE_COMMIT && > + diskJob->type != QEMU_BLOCKJOB_TYPE_ACTIVE_COMMIT) { > + continue; > + } > + > + qemuBlockJobSyncBegin(diskJob); > + > + qemuDomainObjEnterMonitor(vm); > + ignore_value(qemuMonitorBlockJobCancel(priv->mon, diskJob->name, false)); > + qemuDomainObjExitMonitor(vm); > + > + diskJob->state = QEMU_BLOCKJOB_STATE_ABORTING; > + > + qemuBlockJobSyncEnd(vm, diskJob, VIR_ASYNC_JOB_NONE); > + } > +} > + > + > static int > qemuProcessRecoverJob(virQEMUDriver *driver, > virDomainObj *vm, > @@ -3726,6 +3757,7 @@ qemuProcessRecoverJob(virQEMUDriver *driver, > vm->def->name); > } > } > + qemuProcessAbortSnapshotDelete(vm); Note that this code path is taken with any job using VIR_ASYNC_JOB_SNAPSHOT thus also the snapshot creation job. For snapshot creation we check that only disks which will be part of the snapshot don't have an active block job, but we allow any other disks to keep their block job running. Since you use the same code path here, this recovery still can kill off unsuspecting jobs not related to snapshot deletion. The above code must not be called when the recovered job is snapshot creation.