Re: [libvirt PATCH 24/30] qemu_snapshot: implement deletion of external snapshot

Peter Krempa <pkrempa@xxxxxxxxxx> · Tue, 13 Dec 2022 17:01:03 +0100

On Thu, Dec 08, 2022 at 14:31:00 +0100, Pavel Hrdina wrote:
> When deleting snapshot we are starting block-commit job over all disks
> that are part of the snapshot.
> 
> This operation may fail as it writes data changes to the backing qcow2
> image so we need to wait for all the disks to finish the operation and
> wait for correct signal from QEMU. If deleting active snapshot we will
> get `ready` signal and for inactive snapshots we need to disable
> autofinalize in order to get `pending` signal.
> 
> At this point if commit for any disk fails for some reason and we abort
> the VM is still in consistent state and user can fix the reason why the
> deletion failed.
> 
> After that we do `pivot` or `finalize` if it's active snapshot or not to
> finish the block job. It still may fail but there is nothing else we can
> do about it.
> 
> Signed-off-by: Pavel Hrdina <phrdina@xxxxxxxxxx>
> ---
>  src/qemu/qemu_snapshot.c | 266 +++++++++++++++++++++++++++++++++++----
>  1 file changed, 245 insertions(+), 21 deletions(-)
> 
> diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
> index 882224b0a7..c493a3e94f 100644
> --- a/src/qemu/qemu_snapshot.c
> +++ b/src/qemu/qemu_snapshot.c
> @@ -2394,6 +2394,207 @@ qemuSnapshotChildrenReparent(void *payload,
>  }
>  
>  
> +/* Deleting external snapshot is started by running qemu block-commit job.
> + * We need to wait for all block-commit jobs to be 'ready' or 'pending' to
> + * continue with external snapshot deletion. */
> +static int
> +qemuSnapshotJobIsRunning(qemuBlockjobState state)

This is more considering snapshot deletion blockjobs... so perhaps:

qemuSnapshotDeleteBlockjobIsRunning

?

> +{
> +    switch (state) {
> +    case QEMU_BLOCKJOB_STATE_COMPLETED:
> +    case QEMU_BLOCKJOB_STATE_FAILED:
> +    case QEMU_BLOCKJOB_STATE_CANCELLED:
> +    case QEMU_BLOCKJOB_STATE_READY:
> +    case QEMU_BLOCKJOB_STATE_CONCLUDED:
> +    case QEMU_BLOCKJOB_STATE_PENDING:
> +        return 0;
> +
> +    case QEMU_BLOCKJOB_STATE_NEW:
> +    case QEMU_BLOCKJOB_STATE_RUNNING:
> +    case QEMU_BLOCKJOB_STATE_ABORTING:
> +    case QEMU_BLOCKJOB_STATE_PIVOTING:
> +        return 1;
> +
> +    case QEMU_BLOCKJOB_STATE_LAST:
> +        break;
> +    }
> +
> +    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                   _("invalid block job state"));
> +    return -1;

Can this happen?

> +}
> +
> +
> +/* When finishing or aborting qemu blockjob we only need to know if the
> + * job is still active or not. */
> +static int
> +qemuSnapshotJobIsActive(qemuBlockjobState state)

Same naming patern as above.

> +{
> +    switch (state) {
> +    case QEMU_BLOCKJOB_STATE_COMPLETED:
> +    case QEMU_BLOCKJOB_STATE_FAILED:
> +    case QEMU_BLOCKJOB_STATE_CANCELLED:
> +    case QEMU_BLOCKJOB_STATE_CONCLUDED:
> +        return 0;
> +
> +    case QEMU_BLOCKJOB_STATE_READY:
> +    case QEMU_BLOCKJOB_STATE_NEW:
> +    case QEMU_BLOCKJOB_STATE_RUNNING:
> +    case QEMU_BLOCKJOB_STATE_ABORTING:
> +    case QEMU_BLOCKJOB_STATE_PENDING:
> +    case QEMU_BLOCKJOB_STATE_PIVOTING:
> +        return 1;
> +
> +    case QEMU_BLOCKJOB_STATE_LAST:
> +        break;
> +    }
> +
> +    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                   _("invalid block job state"));
> +    return -1;

-||-

> +}
> +
> +
> +/* Wait for qemu blockjob to finish 'block-commit' operation until it is
> + * ready to be finished by calling 'block-pivot' or 'block-finalize'. */
> +static int
> +qemuSnapshotJobRunning(virDomainObj *vm,
> +                       qemuBlockJobData *job)
> +{
> +    int rc;
> +    qemuBlockJobUpdate(vm, job, VIR_ASYNC_JOB_SNAPSHOT);
> +
> +    while ((rc = qemuSnapshotJobIsRunning(job->state)) > 0) {
> +        if (qemuDomainObjWait(vm) < 0)
> +            return -1;
> +        qemuBlockJobUpdate(vm, job, VIR_ASYNC_JOB_SNAPSHOT);
> +    }
> +
> +    if (rc < 0)
> +        return -1;
> +
> +    return 0;
> +}
> +
> +
> +/* Wait for qemu blockjob to be done after 'block-pivot' or 'block-finalize'
> + * was started. */
> +static int
> +qemuSnapshotJobFinishing(virDomainObj *vm,
> +                         qemuBlockJobData *job)
> +{
> +    int rc;
> +    qemuBlockJobUpdate(vm, job, VIR_ASYNC_JOB_SNAPSHOT);
> +
> +    while ((rc = qemuSnapshotJobIsActive(job->state)) > 0) {
> +        if (qemuDomainObjWait(vm) < 0)
> +            return -1;
> +        qemuBlockJobUpdate(vm, job, VIR_ASYNC_JOB_SNAPSHOT);
> +    }
> +
> +    if (rc < 0)
> +        return -1;
> +
> +    return 0;
> +}

And both functions above have confusing naming too.

> +
> +
> +static int
> +qemuSnapshotDiscardExternal(virDomainObj *vm,
> +                            GSList *externalData)
> +{
> +    GSList *cur = NULL;
> +
> +    for (cur = externalData; cur; cur = g_slist_next(cur)) {
> +        qemuSnapshotDeleteExternalData *data = cur->data;
> +        virTristateBool autofinalize = VIR_TRISTATE_BOOL_NO;
> +        unsigned int commitFlags = VIR_DOMAIN_BLOCK_COMMIT_DELETE;
> +
> +        if (data->domDisk->src == data->diskSrc) {
> +            commitFlags |= VIR_DOMAIN_BLOCK_COMMIT_ACTIVE;
> +            autofinalize = VIR_TRISTATE_BOOL_YES;
> +        }
> +
> +        data->job = qemuBlockCommit(vm,
> +                                    data->domDisk,
> +                                    data->parentDiskSrc,
> +                                    data->diskSrc,
> +                                    data->prevDiskSrc,
> +                                    0,
> +                                    VIR_ASYNC_JOB_SNAPSHOT,
> +                                    autofinalize,
> +                                    commitFlags);

[1]

> +
> +        if (!data->job)
> +            goto error;
> +    }
> +
> +    for (cur = externalData; cur; cur = g_slist_next(cur)) {
> +        qemuSnapshotDeleteExternalData *data = cur->data;
> +
> +        if (qemuSnapshotJobRunning(vm, data->job) < 0)
> +            goto error;
> +
> +        if (data->job->state == QEMU_BLOCKJOB_STATE_FAILED) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR,
> +                           _("block commit failed while deleting disk '%s' snapshot: '%s'"),
> +                           data->snapDisk->name, data->job->errmsg);
> +            goto error;
> +        }
> +    }
> +
> +    for (cur = externalData; cur; cur = g_slist_next(cur)) {
> +        qemuSnapshotDeleteExternalData *data = cur->data;
> +
> +        if (data->job->state == QEMU_BLOCKJOB_STATE_READY) {
> +            if (qemuBlockPivot(vm, data->job, VIR_ASYNC_JOB_SNAPSHOT, NULL) < 0)
> +                goto error;
> +        } else if (data->job->state == QEMU_BLOCKJOB_STATE_PENDING) {
> +            if (qemuBlockFinalize(vm, data->job, VIR_ASYNC_JOB_SNAPSHOT) < 0)
> +                goto error;
> +        }
> +
> +        if (qemuSnapshotJobFinishing(vm, data->job) < 0)
> +            goto error;
> +
> +        if (data->job->state == QEMU_BLOCKJOB_STATE_FAILED) {
> +            virReportError(VIR_ERR_INTERNAL_ERROR,
> +                           _("finishing block job failed while deleting disk '%s' snapshot: '%s'"),
> +                           data->snapDisk->name, data->job->errmsg);
> +            goto error;
> +        }
> +
> +        qemuBlockJobSyncEnd(vm, data->job, VIR_ASYNC_JOB_SNAPSHOT);
> +    }

So 'externalData' is passed here from the caller, and the caller simply
calls the equivalent of 'g_free' on the individual struct. Now this
leaks the 'job' field allocated in [1] since you are still holding the
reference.

> +
> +    return 0;
> +
> + error:
> +    for (cur = externalData; cur; cur = g_slist_next(cur)) {
> +        qemuDomainObjPrivate *priv = vm->privateData;
> +        qemuSnapshotDeleteExternalData *data = cur->data;
> +
> +        if (!data->job)
> +            continue;
> +
> +        qemuBlockJobUpdate(vm, data->job, VIR_ASYNC_JOB_SNAPSHOT);
> +
> +        if (qemuSnapshotJobIsActive(data->job->state)) {
> +            if (qemuDomainObjEnterMonitorAsync(vm, VIR_ASYNC_JOB_SNAPSHOT) == 0) {
> +                ignore_value(qemuMonitorBlockJobCancel(priv->mon, data->job->name, false));
> +                qemuDomainObjExitMonitor(vm);
> +
> +                data->job->state = QEMU_BLOCKJOB_STATE_ABORTING;
> +            }
> +        }
> +
> +        qemuBlockJobSyncEnd(vm, data->job, VIR_ASYNC_JOB_SNAPSHOT);
> +    }

And in this code path too.

> +
> +    return -1;
> +}
> +
> +
>  static int
>  qemuSnapshotDiscardMetadata(virDomainObj *vm,
>                              virDomainMomentObj *snap,