[PATCH] chunkd self-check cleanups

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



commit 38e47f046d4d3f9657e9644417ad8b154b46a6e7
Author: Jeff Garzik <jeff@xxxxxxxxxx>
Date:   Fri Mar 5 08:03:48 2010 -0500

    Self-check cleanups.
    
    - remove self-check period.  self-check period is externally controlled.
    
    - self-check initiation limited to specified list of administrative users
    
    - clear up confusion about check "start" versus "poke." "start" is
      preferred.
    
    - thread is not spawned until first check
    
    - add some whitespace for readability
    
    - Unrelated: test/server-test.cfg: s/spaces/tab/ in <CLD> clause
    
    Signed-off-by: Jeff Garzik <jgarzik@xxxxxxxxxx>

diff --git a/doc/setup.txt b/doc/setup.txt
index f8d9f00..a5e7c66 100644
--- a/doc/setup.txt
+++ b/doc/setup.txt
@@ -104,15 +104,12 @@ _cld._udp.phx2.ex.com has SRV record 10 50 8081 maika.phx2.ex.com.
 		<Rack>F3R18</Rack>
 	</Geo>
 
-*) configure the self-check period, if desired. Note that the actual amount
-   of time that the daemon sleeps between self-check sweeps is randomized
-   in order to prevent the convoy effect, so the value below is approximate.
+*) configure the list of users authorized to initiate background self-check
 
-	<!-- 30 * 60 * 60 == 108000 seconds -->
-	<SelfCheckPeriod>108000</SelfCheckPeriod>
-
-   The default is zero, which disables the periodic self-check. It may be
-   appropriate if the application performs its own end-to-end checking.
+	<Check>
+		<User>admin_user</User>
+		<User>another_user</User>
+	</Check>
 
 *) start daemon (it will put itself into the background) with the
    configuration file just created:
diff --git a/include/chunkc.h b/include/chunkc.h
index 5ebc936..683992e 100644
--- a/include/chunkc.h
+++ b/include/chunkc.h
@@ -89,7 +89,7 @@ extern bool stc_put_inline(struct st_client *stc, const void *key,
 extern bool stc_del(struct st_client *stc, const void *key, size_t key_len);
 extern bool stc_ping(struct st_client *stc);
 
-extern bool stc_check_poke(struct st_client *stc);
+extern bool stc_check_start(struct st_client *stc);
 extern bool stc_check_status(struct st_client *stc,
 			     struct chunk_check_status *out);
 
diff --git a/lib/chunkdc.c b/lib/chunkdc.c
index 93783bf..e9e0e0f 100644
--- a/lib/chunkdc.c
+++ b/lib/chunkdc.c
@@ -978,13 +978,13 @@ bool stc_ping(struct st_client *stc)
 	return true;
 }
 
-bool stc_check_poke(struct st_client *stc)
+bool stc_check_start(struct st_client *stc)
 {
 	struct chunksrv_resp resp;
 	struct chunksrv_req *req = (struct chunksrv_req *) stc->req_buf;
 
 	if (stc->verbose)
-		fprintf(stderr, "libstc: CHECK_POKE\n");
+		fprintf(stderr, "libstc: CHECK_START\n");
 
 	/* initialize request */
 	req_init(stc, req);
@@ -1004,7 +1004,7 @@ bool stc_check_poke(struct st_client *stc)
 	/* check response code */
 	if (resp.resp_code != che_Success) {
 		if (stc->verbose)
-			fprintf(stderr, "CHECK_POKE resp code: %d\n",
+			fprintf(stderr, "CHECK_START resp code: %d\n",
 				resp.resp_code);
 		return false;
 	}
diff --git a/server/chunkd.h b/server/chunkd.h
index 41e0a4b..42bcb21 100644
--- a/server/chunkd.h
+++ b/server/chunkd.h
@@ -223,8 +223,9 @@ struct server {
 	char			*group;
 	uint32_t		nid;
 	struct geo		loc;
-	time_t			chk_period;
+
 	int			chk_pipe[2];
+	GList			*chk_users;
 
 	TCHDB			*tbl_master;
 	struct objcache		actives;
@@ -342,8 +343,7 @@ extern void resp_init_req(struct chunksrv_resp *resp,
 extern void read_config(void);
 
 /* selfcheck.c */
-extern void chk_init(void);
-extern int chk_spawn(time_t period, TCHDB *hdb);
+extern int chk_spawn(TCHDB *hdb);
 
 static inline bool use_sendfile(struct client *cli)
 {
diff --git a/server/config.c b/server/config.c
index fee2c0c..69b5e9b 100644
--- a/server/config.c
+++ b/server/config.c
@@ -41,6 +41,7 @@ struct config_context {
 	bool		badnid;
 	bool		in_ssl;
 	bool		in_listen;
+	bool		in_chk;
 	bool		have_ssl;
 	char		*vol_path;
 
@@ -133,6 +134,13 @@ static void cfg_elm_start (GMarkupParseContext *context,
 			applog(LOG_ERR, "Nested CLD in configuration");
 		}
 	}
+	else if (!strcmp(element_name, "Check")) {
+		if (!cc->in_chk) {
+			cc->in_chk = true;
+		} else {
+			applog(LOG_ERR, "Nested Check in configuration");
+		}
+	}
 }
 
 static void cfg_elm_end_listen(struct config_context *cc)
@@ -293,7 +301,16 @@ static void cfg_elm_end (GMarkupParseContext *context,
 	else if (!strcmp(element_name, "SSL"))
 		cc->in_ssl = false;
 
-	else if (cc->in_ssl && cc->text && !strcmp(element_name, "PrivateKey")) {
+	else if (cc->in_chk && cc->text && !strcmp(element_name, "User")) {
+		chunkd_srv.chk_users =
+			g_list_append(chunkd_srv.chk_users, cc->text);
+		cc->text = NULL;
+	}
+
+	else if (!strcmp(element_name, "Check"))
+		cc->in_chk = false;
+
+	else if (cc->in_ssl && cc->text && !strcmp(element_name, "PrivateKey")){
 		if (SSL_CTX_use_PrivateKey_file(ssl_ctx, cc->text,
 						SSL_FILETYPE_PEM) <= 0)
 			applog(LOG_ERR, "Failed to read SSL private key '%s'",
@@ -442,24 +459,6 @@ static void cfg_elm_end (GMarkupParseContext *context,
 		cc->text = NULL;
 	}
 
-	else if (!strcmp(element_name, "SelfCheckPeriod")) {
-		if (!cc->text) {
-			applog(LOG_WARNING, "SelfCheckPeriod element empty");
-			return;
-		}
-		n = strtol(cc->text, NULL, 10);
-		if (n < 0 || n >= LONG_MAX) {
-			applog(LOG_ERR, "SelfCheckPeriod '%s' is invalid",
-			       cc->text);
-			free(cc->text);
-			cc->text = NULL;
-			return;
-		}
-		chunkd_srv.chk_period = n;
-		free(cc->text);
-		cc->text = NULL;
-	}
-
 	else {
 		applog(LOG_WARNING, "Unknown element \"%s\"", element_name);
 	}
diff --git a/server/selfcheck.c b/server/selfcheck.c
index f8cbaef..5207e7f 100644
--- a/server/selfcheck.c
+++ b/server/selfcheck.c
@@ -13,7 +13,6 @@
 #include "chunkd.h"
 
 struct chk_arg {
-	time_t period;
 	TCHDB *hdb;
 	// GThread *gthread;
 };
@@ -183,62 +182,46 @@ static void chk_thread_command(struct chk_tls *tls)
 
 static gpointer chk_thread_func(gpointer data)
 {
-	struct chk_tls _tls;
-	struct chk_tls *tls;
+	struct chk_tls _tls = { .arg = data };
+	struct chk_tls *tls = &_tls;
 	struct pollfd pfd[1];
-	int tmo;
-	time_t dt;
 	int i;
 	int rc;
 
-	tls = &_tls;
-	memset(tls, 0, sizeof(struct chk_tls));
-	tls->arg = data;
-
 	for (;;) {
 		g_mutex_lock(chunkd_srv.bigmutex);
 		chunkd_srv.chk_state = CHK_ST_IDLE;
 		g_mutex_unlock(chunkd_srv.bigmutex);
 
-		/*
-		 * Without the randomization, all systems in a low loaded
-		 * datacenter are virtually guaranteed to start checks in the
-		 * same time, blow fuses and/or disrupt applications.
-		 */
-		dt = tls->arg->period;
-		if (dt == 0) {
-			tmo = -1;
-		} else {
-			if (dt >= 3)
-				dt += rand() % (dt/3);
-			if (debugging)
-				applog(LOG_DEBUG, "chk: sleeping %lu s", dt);
-			tmo = dt * 1000;
-		}
-
 		memset(pfd, 0, sizeof(pfd));
 		pfd[0].fd = chunkd_srv.chk_pipe[0];
 		pfd[0].events = POLLIN;
 
-		rc = poll(pfd, 1, tmo);
+		rc = poll(pfd, ARRAY_SIZE(pfd), -1);
 		if (rc < 0) {
 			applog(LOG_WARNING, "chk: poll error: %s",
 			       strerror(errno));
 			break;	/* don't flood, just die */
 		}
 
-		if (rc) {
-			for (i = 0; i < ARRAY_SIZE(pfd); i++) {
-				if (pfd[i].revents) {
-					if (i == 0) {
-						chk_thread_command(tls);
-					}
-				}
+		if (rc == 0)
+			continue;	/* should never happen */
+
+		for (i = 0; i < ARRAY_SIZE(pfd); i++) {
+			if (!pfd[i].revents)
+				continue;
+
+			switch (i) {
+			case 0:
+				chk_thread_command(tls);
+				break;
+			default:
+				/* do nothing */
+				break;
 			}
-		} else {
-			chk_thread_scan(tls);
 		}
 	}
+
 	return NULL;
 }
 
@@ -249,7 +232,7 @@ static gpointer chk_thread_func(gpointer data)
  */
 static struct chk_arg *thread;
 
-int chk_spawn(time_t period, TCHDB *hdb)
+int chk_spawn(TCHDB *hdb)
 {
 	GThread *gthread;
 	struct chk_arg *arg;
@@ -260,7 +243,6 @@ int chk_spawn(time_t period, TCHDB *hdb)
 		applog(LOG_ERR, "No core");
 		return -1;
 	}
-	arg->period = period;
 	arg->hdb = hdb;
 
 	gthread = g_thread_create(chk_thread_func, arg, FALSE, &error);
@@ -276,18 +258,3 @@ int chk_spawn(time_t period, TCHDB *hdb)
 	return 0;
 }
 
-void chk_init()
-{
-	int rc;
-
-	if (!chunkd_srv.chk_period) {
-		if (debugging)
-			applog(LOG_DEBUG, "chk: disabled by configuration");
-		return;
-	}
-
-	chunkd_srv.chk_state = CHK_ST_INIT;
-	rc = chk_spawn(chunkd_srv.chk_period, chunkd_srv.tbl_master);
-	if (rc)
-		exit(1);		/* message already logged */
-}
diff --git a/server/server.c b/server/server.c
index 1dd9d8f..0d83311 100644
--- a/server/server.c
+++ b/server/server.c
@@ -933,24 +933,47 @@ err_out:
 	return cli_err(cli, err, false);
 }
 
-static bool chk_poke(struct client *cli)
+static bool chk_user_authorized(struct client *cli)
+{
+	GList *tmp = chunkd_srv.chk_users;
+
+	while (tmp) {
+		char *s;
+
+		s = tmp->data;
+		if (!strcmp(cli->user, s))
+			return true;
+
+		tmp = tmp->next;
+	}
+
+	return false;
+}
+
+static bool chk_start(struct client *cli)
 {
 	unsigned char cmd;
 	int rc;
 
+	if (!chk_user_authorized(cli))
+		return cli_err(cli, che_AccessDenied, true);
+
 	g_mutex_lock(chunkd_srv.bigmutex);
+
 	switch (chunkd_srv.chk_state) {
 	case CHK_ST_OFF:
 		chunkd_srv.chk_state = CHK_ST_INIT;
 		g_mutex_unlock(chunkd_srv.bigmutex);
-		rc = chk_spawn(chunkd_srv.chk_period, chunkd_srv.tbl_master);
+		rc = chk_spawn(chunkd_srv.tbl_master);
 		if (rc)
 			return cli_err(cli, che_InternalError, true);
 		break;
+
 	case CHK_ST_INIT:
 	case CHK_ST_RUNNING:
 		g_mutex_unlock(chunkd_srv.bigmutex);
 		return cli_err(cli, che_Busy, true);
+
 	default:
 		chunkd_srv.chk_state = CHK_ST_RUNNING;
 		g_mutex_unlock(chunkd_srv.bigmutex);
@@ -966,8 +989,11 @@ static bool chk_status(struct client *cli)
 	struct chunk_check_status outbuf;
 
 	memset(&outbuf, 0, sizeof(struct chunk_check_status));
+
 	g_mutex_lock(chunkd_srv.bigmutex);
+
 	outbuf.lastdone = cpu_to_le64(chunkd_srv.chk_done);
+
 	switch (chunkd_srv.chk_state) {
 	case CHK_ST_IDLE:
 		outbuf.state = chk_Idle;
@@ -979,6 +1005,7 @@ static bool chk_status(struct client *cli)
 	default:
 		outbuf.state = chk_Off;
 	}
+
 	g_mutex_unlock(chunkd_srv.bigmutex);
 
 	return cli_resp_bin(cli, &outbuf, sizeof(struct chunk_check_status));
@@ -1110,7 +1137,7 @@ static bool cli_evt_exec_req(struct client *cli, unsigned int events)
 		rcb = volume_open(cli);
 		break;
 	case CHO_CHECK_START:
-		rcb = chk_poke(cli);
+		rcb = chk_start(cli);
 		break;
 	case CHO_CHECK_STATUS:
 		rcb = chk_status(cli);
@@ -1804,8 +1831,6 @@ int main (int argc, char *argv[])
 		goto err_out_cld;
 	}
 
-	chk_init();
-
 	/* set up server networking */
 	for (tmpl = chunkd_srv.listeners; tmpl; tmpl = tmpl->next) {
 		rc = net_open(tmpl->data);
diff --git a/test/selfcheck-unit.c b/test/selfcheck-unit.c
index fbd9a09..ead5568 100644
--- a/test/selfcheck-unit.c
+++ b/test/selfcheck-unit.c
@@ -37,7 +37,6 @@ struct config_context {
 	char *text;
 
 	char *path;
-	long period;
 };
 
 static void cfg_elm_start(GMarkupParseContext *context,
@@ -56,20 +55,12 @@ static void cfg_elm_end(GMarkupParseContext *context,
 			GError		**error)
 {
 	struct config_context *cc = user_data;
-	long n;
 
 	if (!strcmp(element_name, "Path")) {
 		OK(cc->text);
 		free(cc->path);
 		cc->path = cc->text;
 		cc->text = NULL;
-	} else if (!strcmp(element_name, "SelfCheckPeriod")) {
-		OK(cc->text);
-		n = strtol(cc->text, NULL, 10);
-		OK(n >= 0 && n < LONG_MAX);
-		cc->period = n;
-		free(cc->text);
-		cc->text = NULL;
 	} else {
 		free(cc->text);
 		cc->text = NULL;
@@ -251,7 +242,6 @@ int main(int argc, char *argv[])
 	memset(&ctx, 0, sizeof(struct config_context));
 	read_config(&ctx);
 	OK(ctx.path);		/* must have a path */
-	OK(!ctx.period);	/* should have a SelfCheckPeriod set to off */
 
 	/*
 	 * Step 1: create the object
@@ -275,11 +265,9 @@ int main(int argc, char *argv[])
 	OK(rcb);
 
 	/*
-	 * Step 3: damage the back-end file and wait
-	 * We sleep for longer than the self-check period because:
-	 * 1) the server sleeps for up to period*1.5
-	 * 2) the server may be quite busy walking numerous objects
-	 * 3) the build system may be overloaded
+	 * Step 3: damage the back-end file and wait, because:
+	 * 1) the server may be quite busy walking numerous objects
+	 * 2) the build system may be overloaded
 	 */
 	rcb = be_file_damage(fn);
 	OK(rcb);
@@ -291,7 +279,7 @@ int main(int argc, char *argv[])
 	OK(stc);
 	rcb = stc_check_status(stc, &status1);
 	OK(rcb);
-	rcb = stc_check_poke(stc);
+	rcb = stc_check_start(stc);
 	OK(rcb);
 	cnt = 0;
 	for (;;) {
diff --git a/test/server-test.cfg b/test/server-test.cfg
index 80833d2..d98fca4 100644
--- a/test/server-test.cfg
+++ b/test/server-test.cfg
@@ -23,7 +23,12 @@
 <NID>1</NID>
 
 <CLD>
-  <PortFile>cld.port</PortFile>
-  <Host>localhost</Host>
+	<PortFile>cld.port</PortFile>
+	<Host>localhost</Host>
 </CLD>
 
+<Check>
+	<User>testuser</User>
+	<User>testuser2</User>
+</Check>
+
diff --git a/tools/chcli.c b/tools/chcli.c
index 24b3e4b..6fde35f 100644
--- a/tools/chcli.c
+++ b/tools/chcli.c
@@ -185,7 +185,7 @@ static void show_cmds(void)
 "DEL key       Delete key\n"
 "PING          Ping server\n"
 "CHECKSTATUS   Fetch status of server self-check\n"
-"CHECKPOKE     Force server self-check\n"
+"CHECKSTART    Begin server self-check\n"
 "\n"
 "Keys provided on the command line (as opposed to via -k) are stored\n"
 "with a C-style nul terminating character appended, adding 1 byte to\n"
@@ -639,7 +639,7 @@ static int cmd_get(void)
 	return 0;
 }
 
-static int cmd_check_poke(void)
+static int cmd_check_start(void)
 {
 	struct st_client *stc;
 
@@ -647,8 +647,8 @@ static int cmd_check_poke(void)
 	if (!stc)
 		return 1;
 
-	if (!stc_check_poke(stc)) {
-		fprintf(stderr, "CHECK POKE failed\n");
+	if (!stc_check_start(stc)) {
+		fprintf(stderr, "CHECK START failed\n");
 		stc_free(stc);
 		return 1;
 	}
@@ -758,7 +758,7 @@ int main (int argc, char *argv[])
 	case CHC_CHECKSTATUS:
 		return cmd_check_status();
 	case CHC_CHECKSTART:
-		return cmd_check_poke();
+		return cmd_check_start();
 	}
 
 	return 0;
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Fedora Clound]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux