[PATCH v2 1/2] lockfile: allow file locking to be retried with a timeout

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently, there is only one attempt to lock a file. If it fails, the
whole operation fails.

But it might sometimes be advantageous to try acquiring a file lock a
few times before giving up. So add a new function,
hold_lock_file_for_update_timeout(), that allows a timeout to be
specified. Make hold_lock_file_for_update() a thin wrapper around the
new function.

If timeout_ms is positive, then retry for at least that many
milliseconds to acquire the lock. On each failed attempt, use select()
to wait for a backoff time that increases quadratically (capped at 1
second) and has a random component to prevent two processes from
getting synchronized. If timeout_ms is negative, retry indefinitely.

In a moment we will switch to using the new function when locking
packed-refs.

Signed-off-by: Michael Haggerty <mhagger@xxxxxxxxxxxx>
---

Notes (discussion):
    It would be easy to also add a hold_lock_file_for_append_timeout(),
    but I can't yet think of an application for it.

 lockfile.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 lockfile.h | 16 +++++++++++--
 2 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/lockfile.c b/lockfile.c
index 9889277..30e65e9 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -157,6 +157,80 @@ static int lock_file(struct lock_file *lk, const char *path, int flags)
 	return lk->fd;
 }
 
+static int sleep_microseconds(long us)
+{
+	struct timeval tv;
+	tv.tv_sec = 0;
+	tv.tv_usec = us;
+	return select(0, NULL, NULL, NULL, &tv);
+}
+
+/*
+ * Constants defining the gaps between attempts to lock a file. The
+ * first backoff period is approximately INITIAL_BACKOFF_MS
+ * milliseconds. The longest backoff period is approximately
+ * (BACKOFF_MAX_MULTIPLIER * INITIAL_BACKOFF_MS) milliseconds.
+ */
+#define INITIAL_BACKOFF_MS 1L
+#define BACKOFF_MAX_MULTIPLIER 1000
+
+/*
+ * Try locking path, retrying with quadratic backoff for at least
+ * timeout_ms milliseconds. If timeout_ms is 0, try locking the file
+ * exactly once. If timeout_ms is -1, try indefinitely.
+ */
+static int lock_file_timeout(struct lock_file *lk, const char *path,
+			     int flags, long timeout_ms)
+{
+	int n = 1;
+	int multiplier = 1;
+	long remaining_us = 0;
+	static int random_initialized = 0;
+
+	if (timeout_ms == 0)
+		return lock_file(lk, path, flags);
+
+	if (!random_initialized) {
+		srandom((unsigned int)getpid());
+		random_initialized = 1;
+	}
+
+	if (timeout_ms > 0) {
+		/* avoid overflow */
+		if (timeout_ms <= LONG_MAX / 1000)
+			remaining_us = timeout_ms * 1000;
+		else
+			remaining_us = LONG_MAX;
+	}
+
+	while (1) {
+		long backoff_ms, wait_us;
+		int fd;
+
+		fd = lock_file(lk, path, flags);
+
+		if (fd >= 0)
+			return fd; /* success */
+		else if (errno != EEXIST)
+			return -1; /* failure other than lock held */
+		else if (timeout_ms > 0 && remaining_us <= 0)
+			return -1; /* failure due to timeout */
+
+		backoff_ms = multiplier * INITIAL_BACKOFF_MS;
+		/* back off for between 0.75*backoff_ms and 1.25*backoff_ms */
+		wait_us = (750 + random() % 500) * backoff_ms;
+		sleep_microseconds(wait_us);
+		remaining_us -= wait_us;
+
+		/* Recursion: (n+1)^2 = n^2 + 2n + 1 */
+		multiplier += 2*n + 1;
+		if (multiplier > BACKOFF_MAX_MULTIPLIER)
+			multiplier = BACKOFF_MAX_MULTIPLIER;
+		else
+			n++;
+	}
+}
+
 void unable_to_lock_message(const char *path, int err, struct strbuf *buf)
 {
 	if (err == EEXIST) {
@@ -179,9 +253,10 @@ NORETURN void unable_to_lock_die(const char *path, int err)
 }
 
 /* This should return a meaningful errno on failure */
-int hold_lock_file_for_update(struct lock_file *lk, const char *path, int flags)
+int hold_lock_file_for_update_timeout(struct lock_file *lk, const char *path,
+				      int flags, long timeout_ms)
 {
-	int fd = lock_file(lk, path, flags);
+	int fd = lock_file_timeout(lk, path, flags, timeout_ms);
 	if (fd < 0 && (flags & LOCK_DIE_ON_ERROR))
 		unable_to_lock_die(path, errno);
 	return fd;
diff --git a/lockfile.h b/lockfile.h
index cd2ec95..b4abc61 100644
--- a/lockfile.h
+++ b/lockfile.h
@@ -74,8 +74,20 @@ struct lock_file {
 extern void unable_to_lock_message(const char *path, int err,
 				   struct strbuf *buf);
 extern NORETURN void unable_to_lock_die(const char *path, int err);
-extern int hold_lock_file_for_update(struct lock_file *, const char *path, int);
-extern int hold_lock_file_for_append(struct lock_file *, const char *path, int);
+extern int hold_lock_file_for_update_timeout(
+		struct lock_file *lk, const char *path,
+		int flags, long timeout_ms);
+
+static inline int hold_lock_file_for_update(
+		struct lock_file *lk, const char *path,
+		int flags)
+{
+	return hold_lock_file_for_update_timeout(lk, path, flags, 0);
+}
+
+extern int hold_lock_file_for_append(struct lock_file *lk, const char *path,
+				     int flags);
+
 extern FILE *fdopen_lock_file(struct lock_file *, const char *mode);
 extern char *get_locked_file_path(struct lock_file *);
 extern int commit_lock_file_to(struct lock_file *, const char *path);
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]