[PATCH] Remove exit thread and replace it by exit pipe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When exit is called, exit thread will create schedwrk. This usually
works, but sadly there is race between main thread (coropoll) and this
thread in totemsrp_callback_token_create (main thread is reading values
and iterate thru list and exit thread is adding to list).

Solution is to remove exit thread. Create pipe and connect one end to
coropoll (so main thread) and let corosync_shutdown_request simply write
byte (= create notification) to coropoll, which executes shutdown
sequence.

Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx>
---
 exec/main.c |   63 ++++++++++++++++++++++++----------------------------------
 1 files changed, 26 insertions(+), 37 deletions(-)

diff --git a/exec/main.c b/exec/main.c
index 6c8ea35..d23e244 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -137,9 +137,7 @@ static hdb_handle_t object_memb_handle;
 
 static corosync_timer_handle_t corosync_stats_timer_handle;
 
-static pthread_t corosync_exit_thread;
-
-static sem_t corosync_exit_sem;
+static int corosync_exit_pipe[2] = {0, 0};
 
 static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
 
@@ -181,33 +179,37 @@ static void unlink_all_completed (void)
 
 void corosync_shutdown_request (void)
 {
-	static int called = 0;
-	if (called) {
-		return;
-	}
-	if (called == 0) {
-		called = 1;
+	char buf = 0;
+
+	if (corosync_exit_pipe[1] == 0) {
+		corosync_exit_error (AIS_DONE_EXIT);
 	}
 
-	sem_post (&corosync_exit_sem);
+	write(corosync_exit_pipe[1], &buf, sizeof(buf));
 }
 
-static void *corosync_exit_thread_handler (void *arg)
+static int corosync_exit_dispatch_fn (
+    hdb_handle_t handle,
+    int fd,
+    int revents,
+    void *data)
 {
 	totempg_stats_t * stats;
+	char buf;
 
-	sem_wait (&corosync_exit_sem);
+	read(corosync_exit_pipe[0], &buf, sizeof(buf));
 
 	stats = api->totem_get_stats();
 	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
 	    stats->mrp->srp->operational_entered == 0) {
+
 		unlink_all_completed ();
 		/* NOTREACHED */
 	}
 
 	corosync_service_unlink_all (api, unlink_all_completed);
 
-	return arg;
+	return (-1);
 }
 
 static void sigusr2_handler (int num)
@@ -1620,15 +1622,6 @@ int main (int argc, char **argv, char **envp)
 	log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): started and ready to provide service.\n", VERSION);
 	log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" PACKAGE_FEATURES "\n");
 
-	/*
-	 * Create exit semaphore
-	 */
-	res = sem_init (&corosync_exit_sem, 0, 0);
-	if (res != 0) {
-		log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create exit thread.\n");
-		corosync_exit_error (AIS_DONE_FATAL_ERR);
-	}
-
 	(void)signal (SIGINT, sigintr_handler);
 	(void)signal (SIGUSR2, sigusr2_handler);
 	(void)signal (SIGSEGV, sigsegv_handler);
@@ -1790,10 +1783,6 @@ int main (int argc, char **argv, char **envp)
 	}
 	logsys_fork_completed();
 
-	if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != AIS_DONE_EXIT) {
-		corosync_exit_error (flock_err);
-	}
-
 	corosync_timer_init (
 		serialize_lock,
 		serialize_unlock,
@@ -1803,22 +1792,22 @@ int main (int argc, char **argv, char **envp)
 	poll_low_fds_event_set(corosync_poll_handle, main_low_fds_event);
 
 	/*
-	 * Sleep for a while to let other nodes in the cluster
-	 * understand that this node has been away (if it was
-	 * an corosync restart).
-	 */
-
-// TODO what is this hack for?	usleep(totem_config.token_timeout * 2000);
-
-	/*
-	 * Start "exit" thread
+	 * Create exit pipe
 	 */
-	res = pthread_create (&corosync_exit_thread, NULL, corosync_exit_thread_handler, NULL);
+	res = pipe(corosync_exit_pipe);
+	if (res == 0) {
+		res = poll_dispatch_add(corosync_poll_handle, corosync_exit_pipe[0],
+			POLLIN, NULL, corosync_exit_dispatch_fn);
+	}
 	if (res != 0) {
-		log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create exit thread.\n");
+		log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create exit pipe.\n");
 		corosync_exit_error (AIS_DONE_FATAL_ERR);
 	}
 
+	if ((flock_err = corosync_flock (corosync_lock_file, getpid ())) != AIS_DONE_EXIT) {
+		corosync_exit_error (flock_err);
+	}
+
 	/*
 	 * if totempg_initialize doesn't have root priveleges, it cannot
 	 * bind to a specific interface.  This only matters if
-- 
1.7.1

_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss


[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux