[PATCH] DRAFT: Detect big scheduling pauses

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Patch adds poll timer scheduler to be called 3 times per token timeout.
If poll timer was not called for more then 0.8 * token timeout, it means
corosync process was not scheduled and ether token_timeout should be
increased or load should be reduced (useful for VM, where host is
overcommitted so VM is not scheduled as expected).

Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx>
---
 exec/main.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 50 insertions(+), 0 deletions(-)

diff --git a/exec/main.c b/exec/main.c
index d23e244..42ffb7b 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -1295,6 +1295,49 @@ static struct coroipcs_init_state_v2 ipc_init_state_v2 = {
 	.stats_decrement_value		= corosync_stats_decrement_value,
 };
 
+struct scheduler_pause_timeout_data {
+	struct totem_config *totem_config;
+	poll_timer_handle handle;
+	unsigned long long tv_prev;
+	unsigned long long max_tv_diff;
+};
+
+static void timer_function_scheduler_timeout (void *data)
+{
+	struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
+	unsigned long long tv_current;
+	unsigned long long tv_diff;
+
+	tv_current = timerlist_nano_current_get ();
+
+	if (timeout_data->tv_prev == 0) {
+		/*
+		 * Initial call -> just pretent everything is ok
+		 */
+		timeout_data->tv_prev = tv_current;
+		timeout_data->max_tv_diff = 0;
+	}
+
+	tv_diff = tv_current - timeout_data->tv_prev;
+	timeout_data->tv_prev = tv_current;
+
+	if (tv_diff > timeout_data->max_tv_diff) {
+		log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
+		    "(threshold is %0.4f ms). Consider token timeout increase.",
+		    (float)tv_diff / TIMERLIST_NS_IN_MSEC, (float)timeout_data->max_tv_diff / TIMERLIST_NS_IN_MSEC);
+	}
+
+	/*
+	 * Set next threshold, because token_timeout can change
+	 */
+	timeout_data->max_tv_diff = timeout_data->totem_config->token_timeout * TIMERLIST_NS_IN_MSEC * 0.8;
+	poll_timer_add (corosync_poll_handle,
+		timeout_data->totem_config->token_timeout / 3,
+		timeout_data,
+		timer_function_scheduler_timeout,
+		&timeout_data->handle);
+}
+
 static void corosync_setscheduler (void)
 {
 #if defined(HAVE_PTHREAD_SETSCHEDPARAM) && defined(HAVE_SCHED_GET_PRIORITY_MAX) && defined(HAVE_SCHED_SETSCHEDULER)
@@ -1556,6 +1599,7 @@ error_close:
 	return (err);
 }
 
+
 int main (int argc, char **argv, char **envp)
 {
 	const char *error_string;
@@ -1576,6 +1620,7 @@ int main (int argc, char **argv, char **envp)
 	char corosync_lib_dir[PATH_MAX];
 	hdb_handle_t object_runtime_handle;
 	enum e_ais_done flock_err;
+	struct scheduler_pause_timeout_data scheduler_pause_timeout_data;
 
  	/* default configuration
 	 */
@@ -1788,9 +1833,14 @@ int main (int argc, char **argv, char **envp)
 		serialize_unlock,
 		sched_priority);
 
+
 	corosync_poll_handle = poll_create ();
 	poll_low_fds_event_set(corosync_poll_handle, main_low_fds_event);
 
+	memset(&scheduler_pause_timeout_data, 0, sizeof(scheduler_pause_timeout_data));
+	scheduler_pause_timeout_data.totem_config = &totem_config;
+	timer_function_scheduler_timeout (&scheduler_pause_timeout_data);
+
 	/*
 	 * Create exit pipe
 	 */
-- 
1.7.1

_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss




[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux