[PATCH] fsmonitor: fix hangs by delayed fs event listening

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Koji Nakamaru <koji.nakamaru@xxxxxxxx>

The thread serving the client (ipc-thread) calls
with_lock__wait_for_cookie() in which a cookie file is
created. with_lock__wait_for_cookie() then waits for the event caused by
the cookie file from the thread for fs events (fsevent-thread).

However, in high load situations, the fsevent-thread may start actual fs
event listening (triggered by FSEventStreamStart() for Darwin, for
example) *after* the cookie file is created. In this case, the
fsevent-thread cannot detect the cookie file and
with_lock__wait_for_cookie() waits forever, so that the whole daemon
hangs [1].

Extend listen_error_code to express that actual fs event listening
starts. listen_error_code is accessed in a thread-safe manner by
utilizing a dedicated mutex.

[1]: https://lore.kernel.org/git/20241002062539.GA2863841@xxxxxxxxxxxxxxxxxxxxxxx/

Suggested-by: Jeff King <peff@xxxxxxxx>
Signed-off-by: Koji Nakamaru <koji.nakamaru@xxxxxxxx>
---
    fsmonitor: fix hangs by delayed fs event listening

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1804%2FKojiNakamaru%2Ffix%2Ffsmonitor-deadlock-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1804/KojiNakamaru/fix/fsmonitor-deadlock-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/1804

 builtin/fsmonitor--daemon.c          | 21 +++++++++++++++++++++
 compat/fsmonitor/fsm-listen-darwin.c |  5 +++--
 compat/fsmonitor/fsm-listen-win32.c  |  5 ++---
 fsmonitor--daemon.h                  |  4 ++++
 4 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c
index dce8a3b2482..ccff2cb8bed 100644
--- a/builtin/fsmonitor--daemon.c
+++ b/builtin/fsmonitor--daemon.c
@@ -172,6 +172,9 @@ static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
 	trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
 			 cookie->name, cookie_pathname.buf);
 
+	while (fsmonitor_get_listen_error_code(state) == 0)
+		sleep_millisec(50);
+
 	/*
 	 * Create the cookie file on disk and then wait for a notification
 	 * that the listener thread has seen it.
@@ -606,6 +609,23 @@ void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
 	pthread_mutex_unlock(&state->main_lock);
 }
 
+int fsmonitor_get_listen_error_code(struct fsmonitor_daemon_state *state)
+{
+	int error_code;
+
+	pthread_mutex_lock(&state->listen_lock);
+	error_code = state->listen_error_code;
+	pthread_mutex_unlock(&state->listen_lock);
+	return error_code;
+}
+
+void fsmonitor_set_listen_error_code(struct fsmonitor_daemon_state *state, int error_code)
+{
+	pthread_mutex_lock(&state->listen_lock);
+	state->listen_error_code = error_code;
+	pthread_mutex_unlock(&state->listen_lock);
+}
+
 /*
  * Format an opaque token string to send to the client.
  */
@@ -1285,6 +1305,7 @@ static int fsmonitor_run_daemon(void)
 	hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
 	pthread_mutex_init(&state.main_lock, NULL);
 	pthread_cond_init(&state.cookies_cond, NULL);
+	pthread_mutex_init(&state.listen_lock, NULL);
 	state.listen_error_code = 0;
 	state.health_error_code = 0;
 	state.current_token_data = fsmonitor_new_token_data();
diff --git a/compat/fsmonitor/fsm-listen-darwin.c b/compat/fsmonitor/fsm-listen-darwin.c
index 2fc67442eb5..92373ce247f 100644
--- a/compat/fsmonitor/fsm-listen-darwin.c
+++ b/compat/fsmonitor/fsm-listen-darwin.c
@@ -515,6 +515,7 @@ void fsm_listen__loop(struct fsmonitor_daemon_state *state)
 		goto force_error_stop_without_loop;
 	}
 	data->stream_started = 1;
+	fsmonitor_set_listen_error_code(state, 1);
 
 	pthread_mutex_lock(&data->dq_lock);
 	pthread_cond_wait(&data->dq_finished, &data->dq_lock);
@@ -522,7 +523,7 @@ void fsm_listen__loop(struct fsmonitor_daemon_state *state)
 
 	switch (data->shutdown_style) {
 	case FORCE_ERROR_STOP:
-		state->listen_error_code = -1;
+		fsmonitor_set_listen_error_code(state, -1);
 		/* fall thru */
 	case FORCE_SHUTDOWN:
 		ipc_server_stop_async(state->ipc_server_data);
@@ -534,7 +535,7 @@ void fsm_listen__loop(struct fsmonitor_daemon_state *state)
 	return;
 
 force_error_stop_without_loop:
-	state->listen_error_code = -1;
+	fsmonitor_set_listen_error_code(state, -1);
 	ipc_server_stop_async(state->ipc_server_data);
 	return;
 }
diff --git a/compat/fsmonitor/fsm-listen-win32.c b/compat/fsmonitor/fsm-listen-win32.c
index 5a21dade7b8..609efd1d6a8 100644
--- a/compat/fsmonitor/fsm-listen-win32.c
+++ b/compat/fsmonitor/fsm-listen-win32.c
@@ -732,14 +732,13 @@ void fsm_listen__loop(struct fsmonitor_daemon_state *state)
 	DWORD dwWait;
 	int result;
 
-	state->listen_error_code = 0;
-
 	if (start_rdcw_watch(data->watch_worktree) == -1)
 		goto force_error_stop;
 
 	if (data->watch_gitdir &&
 	    start_rdcw_watch(data->watch_gitdir) == -1)
 		goto force_error_stop;
+	fsmonitor_set_listen_error_code(state, 1);
 
 	for (;;) {
 		dwWait = WaitForMultipleObjects(data->nr_listener_handles,
@@ -797,7 +796,7 @@ void fsm_listen__loop(struct fsmonitor_daemon_state *state)
 	}
 
 force_error_stop:
-	state->listen_error_code = -1;
+	fsmonitor_set_listen_error_code(state, -1);
 
 force_shutdown:
 	/*
diff --git a/fsmonitor--daemon.h b/fsmonitor--daemon.h
index 5cbbec8d940..77efc59b7bb 100644
--- a/fsmonitor--daemon.h
+++ b/fsmonitor--daemon.h
@@ -51,6 +51,7 @@ struct fsmonitor_daemon_state {
 	int cookie_seq;
 	struct hashmap cookies;
 
+	pthread_mutex_t listen_lock;
 	int listen_error_code;
 	int health_error_code;
 	struct fsm_listen_data *listen_data;
@@ -167,5 +168,8 @@ void fsmonitor_publish(struct fsmonitor_daemon_state *state,
  */
 void fsmonitor_force_resync(struct fsmonitor_daemon_state *state);
 
+int fsmonitor_get_listen_error_code(struct fsmonitor_daemon_state *state);
+void fsmonitor_set_listen_error_code(struct fsmonitor_daemon_state *state, int error_code);
+
 #endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
 #endif /* FSMONITOR_DAEMON_H */

base-commit: e9356ba3ea2a6754281ff7697b3e5a1697b21e24
-- 
gitgitgadget




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux