The libvirt libxl driver has no access to FDs associated with VM disks. The disks are opened by libxl.so and any related FDs are not exposed to applications. The prevents using virtlockd's auto-release feature to release locks when the FD is closed. Acquiring and releasing locks is explicitly handled by the libxl driver. The current logic is structured such that locks are acquired in libxlDomainStart and released in libxlDomainCleanup. This works well except for migration, where the locks must be released on the source host before the domain can be started on the destination host, but the domain cannot be cleaned up until the migration confirmation stage. When libxlDomainCleanup if finally called in the confirm stage, locks are again released resulting in confusing errors from virtlockd and libvirtd virtlockd[8095]: resource busy: Lockspace resource 'xxxxxx' is not locked libvirtd[8050]: resource busy: Lockspace resource 'xxxxxx' is not locked libvirtd[8050]: Unable to release lease on testvm The error is also encountered in some error cases, e.g. when libxlDomainStart fails before acquiring locks and libxlDomainCleanup is still used for cleanup. In lieu of a mechanism to check if a lock has been acquired, this patch takes an easy approach to fixing the unnecessary lock releases by adding an indicator to the libxlDomainPrivate object that can be set when the lock is acquired and cleared when the lock is released. libxlDomainCleanup can then skip releasing the lock in cases where it was previously released or never acquired in the first place. Signed-off-by: Jim Fehlig <jfehlig@xxxxxxxx> --- danpb and I discussed this issue on IRC and after pursuing several options I've decided to send this simple approach to the list for comment. I reworked the code in several ways but in the end cant escape the fact that there are occasions where domain cleanup needs to be performed but stopping the lock process is not needed. Another simple approach I pursued was creating a cleanup function that skipped virDomainLockProcessPause, but this approach was a bit cleaner IMO. Other suggestions welcomed. All is not lost in the effort as it resulted in several cleanup and improvement patches that I'll post after release. It includes decomposing libxlDomainStart and making it idempotent on error so libxlDomainCleanup only needs to be called after a successful call to libxlDomainStart. src/libxl/libxl_domain.c | 13 +++++++++---- src/libxl/libxl_domain.h | 1 + src/libxl/libxl_migration.c | 8 ++++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c index 604b94a006..014f6aceca 100644 --- a/src/libxl/libxl_domain.c +++ b/src/libxl/libxl_domain.c @@ -861,10 +861,14 @@ libxlDomainCleanup(libxlDriverPrivatePtr driver, virHostdevReAttachDomainDevices(hostdev_mgr, LIBXL_DRIVER_INTERNAL_NAME, vm->def, hostdev_flags); - VIR_FREE(priv->lockState); - if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) - VIR_WARN("Unable to release lease on %s", vm->def->name); - VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (priv->lockProcessRunning) { + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + else + priv->lockProcessRunning = false; + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } libxlLoggerCloseFile(cfg->logger, vm->def->id); vm->def->id = -1; @@ -1426,6 +1430,7 @@ libxlDomainStart(libxlDriverPrivatePtr driver, priv->lockState) < 0) goto destroy_dom; VIR_FREE(priv->lockState); + priv->lockProcessRunning = true; /* Always enable domain death events */ if (libxl_evenable_domain_death(cfg->ctx, vm->def->id, 0, &priv->deathW)) diff --git a/src/libxl/libxl_domain.h b/src/libxl/libxl_domain.h index 00682546e0..5fb76bd303 100644 --- a/src/libxl/libxl_domain.h +++ b/src/libxl/libxl_domain.h @@ -68,6 +68,7 @@ struct _libxlDomainObjPrivate { virThreadPtr migrationDstReceiveThr; unsigned short migrationPort; char *lockState; + bool lockProcessRunning; struct libxlDomainJobObj job; diff --git a/src/libxl/libxl_migration.c b/src/libxl/libxl_migration.c index 631f67930d..a5a9df98ad 100644 --- a/src/libxl/libxl_migration.c +++ b/src/libxl/libxl_migration.c @@ -1242,9 +1242,12 @@ libxlDomainMigrationSrcPerform(libxlDriverPrivatePtr driver, virObjectLock(vm); if (ret == 0) { - if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) == 0) { + priv->lockProcessRunning = false; + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } else { VIR_WARN("Unable to release lease on %s", vm->def->name); - VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } } else { /* * Confirm phase will not be executed if perform fails. End the @@ -1377,6 +1380,7 @@ libxlDomainMigrationSrcConfirm(libxlDriverPrivatePtr driver, "xen:///system", vm, priv->lockState); + priv->lockProcessRunning = true; if (libxl_domain_resume(cfg->ctx, vm->def->id, 1, 0) == 0) { ret = 0; } else { -- 2.29.2