It may take some time for sanlock to add a lockspace. And if user restart libvirtd service meanwhile, the fresh daemon can fail adding the same lockspace with EINPROGRESS. Hence, we should retry a few times before claiming an error. This issue can be easily reproduced: for i in {1..1000} ; do echo $i; service libvirtd restart; sleep 2; done 20 Stopping libvirtd daemon: [FAILED] Starting libvirtd daemon: [ OK ] 21 Stopping libvirtd daemon: [ OK ] Starting libvirtd daemon: [ OK ] 22 Stopping libvirtd daemon: [ OK ] Starting libvirtd daemon: [ OK ] error : virLockManagerSanlockSetupLockspace:334 : Unable to add lockspace /var/lib/libvirt/sanlock/__LIBVIRT__DISKS__: Operation now in progress --- src/locking/lock_driver_sanlock.c | 16 +++++++++++++++- 1 files changed, 15 insertions(+), 1 deletions(-) diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index d24f3d6..6d02ac6 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -184,6 +184,11 @@ static int virLockManagerSanlockLoadConfig(const char *configFile) return 0; } +/* How much ms sleep before retrying to add a lockspace? */ +#define LOCKSPACE_SLEEP 100 +/* How many times try adding a lockspace? */ +#define LOCKSPACE_RETRIES 10 + static int virLockManagerSanlockSetupLockspace(void) { int fd = -1; @@ -192,6 +197,7 @@ static int virLockManagerSanlockSetupLockspace(void) struct sanlk_lockspace ls; char *path = NULL; char *dir = NULL; + int retries = LOCKSPACE_RETRIES; if (virAsprintf(&path, "%s/%s", driver->autoDiskLeasePath, @@ -320,9 +326,17 @@ static int virLockManagerSanlockSetupLockspace(void) ls.host_id = driver->hostID; /* Stage 2: Try to register the lockspace with the daemon. * If the lockspace is already registered, we should get EEXIST back - * in which case we can just carry on with life + * in which case we can just carry on with life, or EINPROGRESS if + * previous libvirtd instance started the work but didn't finish. + * Unfortunately, sanlock lacks an API to determine state of lockspace, + * so we have to do this blindly. */ +retry: if ((rv = sanlock_add_lockspace(&ls, 0)) < 0) { + if (retries-- && -rv == EINPROGRESS) { + usleep(LOCKSPACE_SLEEP * 1000); + goto retry; + } if (-rv != EEXIST) { if (rv <= -200) virReportError(VIR_ERR_INTERNAL_ERROR, -- 1.7.8.6 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list