[Patch 4/4] chunkd: add self-checking

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch adds a background process that periodically verifies the
integrity of all known objects. Objects that are found found faulty
are made invisible to applications. This way, only known good objects
remain visible and applications may implement integrity checking that
is run asynchronously with the self-checking. The intent is to let
the performance of checking to scale with the number of objects and
amount of data stored in the cell.

Signed-off-by: Pete Zaitcev <zaitcev@xxxxxxxxxx>

---
 server/Makefile.am    |    2 
 server/be-fs.c        |  126 +++++++++++++++-
 server/chunkd.h       |    8 +
 server/config.c       |   28 +++
 server/selfcheck.c    |  208 ++++++++++++++++++++++++++
 server/server.c       |    2 
 test/.gitignore       |    1 
 test/Makefile.am      |    4 
 test/selfcheck-unit.c |  313 ++++++++++++++++++++++++++++++++++++++++
 test/server-test.cfg  |    2 
 test/test.h           |    2 
 11 files changed, 693 insertions(+), 3 deletions(-)

commit ded36ccc14d7356ceaa523c43e6d91efc06c51cd
Author: Master <zaitcev@xxxxxxxxxxxxxxxxxx>
Date:   Sun Dec 27 16:36:17 2009 -0700

    Selfcheck (with a test).

diff --git a/server/Makefile.am b/server/Makefile.am
index 824c0b3..f2a66a1 100644
--- a/server/Makefile.am
+++ b/server/Makefile.am
@@ -6,7 +6,7 @@ sbin_PROGRAMS	= chunkd
 
 chunkd_SOURCES	= chunkd.h		\
 		  ../lib/chunksrv.c	\
-		  be-fs.c object.c server.c config.c cldu.c util.c \
+		  be-fs.c object.c server.c selfcheck.c config.c cldu.c util.c \
 		  objcache.c
 chunkd_LDADD	= \
 		  @CLDC_LIBS@ @GLIB_LIBS@ @CRYPTO_LIBS@ \
diff --git a/server/be-fs.c b/server/be-fs.c
index 2c63541..652650d 100644
--- a/server/be-fs.c
+++ b/server/be-fs.c
@@ -39,8 +39,8 @@
 #include <tchdb.h>
 #include "chunkd.h"
 
-#define MDB_TABLE_ID	"__chunkd_table_id"
 #define MDB_TPATH_FMT	"%s/%X"
+#define BAD_TPATH_FMT	"%s/bad"
 
 struct fs_obj {
 	struct backend_obj	bo;
@@ -262,6 +262,47 @@ err_out:
 	return NULL;
 }
 
+static char *fs_obj_badname(unsigned long tag)
+{
+	char *s;
+	struct stat st;
+	int rc;
+
+	rc = asprintf(&s, BAD_TPATH_FMT, chunkd_srv.vol_path);
+	if (rc < 0)
+		return NULL;
+
+	/* create subdir on the fly, if not already exists */
+	if (stat(s, &st) < 0) {
+		if (errno != ENOENT) {
+			syslogerr(s);
+			free(s);
+			return NULL;
+		}
+		if (mkdir(s, 0777) < 0) {
+			if (errno != EEXIST) {
+				syslogerr(s);
+				free(s);
+				return NULL;
+			}
+		}
+	} else {
+		if (!S_ISDIR(st.st_mode)) {
+			applog(LOG_WARNING,
+			       "%s: not a dir, fs_obj_badname go boom", s);
+			free(s);
+			return NULL;
+		}
+	}
+	free(s);
+
+	rc = asprintf(&s, BAD_TPATH_FMT "/%lu", chunkd_srv.vol_path, tag);
+	if (rc < 0)
+		return NULL;
+
+	return s;
+}
+
 static bool key_valid(const void *key, size_t key_len)
 {
 	if (!key || key_len < 1 || key_len > CHD_KEY_SZ)
@@ -691,6 +732,27 @@ err_out:
 	return false;
 }
 
+int fs_obj_disable(const char *fn)
+{
+	struct stat st;
+	char *bad;
+	int rc;
+
+	if (stat(fn, &st) < 0)
+		return -errno;
+
+	bad = fs_obj_badname(st.st_ino);
+
+	if (rename(fn, bad) < 0) {
+		rc = errno;
+		free(bad);
+		return -rc;
+	}
+
+	free(bad);
+	return 0;
+}
+
 int fs_list_objs_open(struct fs_obj_lister *t,
 		      const char *root_path, uint32_t table_id)
 {
@@ -951,3 +1013,65 @@ int fs_obj_hdr_read(const char *fn, char **owner, char **csum,
 	return -1;
 }
 
+int fs_obj_do_sum(const char *fn, unsigned int klen, char **csump)
+{
+	enum { BUFLEN = 128 * 1024 };
+	void *buf;
+	int fd;
+	ssize_t rrc;
+	int rc;
+	SHA_CTX hash;
+	char *csum;
+	unsigned char md[SHA_DIGEST_LENGTH];
+
+	rc = ENOMEM;
+	buf = malloc(BUFLEN);
+	if (!buf)
+		goto err_alloc;
+
+	fd = open(fn, O_RDONLY);
+	if (fd == -1) {
+		rc = errno;
+		goto err_open;
+	}
+	if (lseek(fd, sizeof(struct be_fs_obj_hdr) + klen, SEEK_SET) == (off_t)-1) {
+		rc = errno;
+		goto err_seek;
+	}
+
+	SHA1_Init(&hash);
+	for (;;) {
+		rrc = read(fd, buf, BUFLEN);
+		if (rrc < 0) {
+			rc = errno;
+			goto err_read;
+		}
+		if (rrc != 0)
+			SHA1_Update(&hash, buf, rrc);
+		if (rrc < BUFLEN)
+			break;
+	}
+	SHA1_Final(md, &hash);
+
+	csum = malloc(SHA_DIGEST_LENGTH*2 + 1);
+	if (!csum) {
+		rc = ENOMEM;
+		goto err_retdup;
+	}
+	hexstr(md, SHA_DIGEST_LENGTH, csum);
+
+	close(fd);
+	free(buf);
+	*csump = csum;
+	return 0;
+
+ err_retdup:
+ err_read:
+	close(fd);
+ err_seek:
+ err_open:
+	free(buf);
+ err_alloc:
+	return -rc;
+}
+
diff --git a/server/chunkd.h b/server/chunkd.h
index 18dd31a..fd3e28f 100644
--- a/server/chunkd.h
+++ b/server/chunkd.h
@@ -210,6 +210,7 @@ struct server {
 	char			*cell;
 	uint32_t		nid;
 	struct geo		loc;
+	time_t			chk_period;
 
 	TCHDB			*tbl_master;
 	struct objcache		actives;
@@ -217,6 +218,8 @@ struct server {
 	struct server_stats	stats;		/* global statistics */
 };
 
+#define MDB_TABLE_ID	"__chunkd_table_id"
+
 extern struct hail_log cldu_hail_log;
 
 /* be-fs.c */
@@ -245,6 +248,7 @@ extern bool fs_obj_write_commit(struct backend_obj *bo, const char *user,
 extern bool fs_obj_delete(uint32_t table_id, const char *user,
 		          const void *kbuf, size_t klen,
 			  enum chunk_errcode *err_code);
+extern int fs_obj_disable(const char *fn);
 extern ssize_t fs_obj_sendfile(struct backend_obj *bo, int out_fd, size_t len);
 extern int fs_list_objs_open(struct fs_obj_lister *t,
 			     const char *root_path, uint32_t table_id);
@@ -257,6 +261,7 @@ extern GList *fs_list_objs(uint32_t table_id, const char *user);
 extern bool fs_table_open(const char *user, const void *kbuf, size_t klen,
 		   bool tbl_creat, bool excl_creat, uint32_t *table_id,
 		   enum chunk_errcode *err_code);
+extern int fs_obj_do_sum(const char *fn, unsigned int klen, char **csump);
 
 /* object.c */
 extern bool object_del(struct client *cli);
@@ -320,6 +325,9 @@ extern void resp_init_req(struct chunksrv_resp *resp,
 /* config.c */
 extern void read_config(void);
 
+/* selfcheck.c */
+extern void chk_init(void);
+
 static inline bool use_sendfile(struct client *cli)
 {
 #if defined(HAVE_SENDFILE) && defined(HAVE_SYS_SENDFILE_H)
diff --git a/server/config.c b/server/config.c
index 674e237..55ef327 100644
--- a/server/config.c
+++ b/server/config.c
@@ -442,6 +442,24 @@ static void cfg_elm_end (GMarkupParseContext *context,
 		cc->text = NULL;
 	}
 
+	else if (!strcmp(element_name, "SelfCheckPeriod")) {
+		if (!cc->text) {
+			applog(LOG_WARNING, "SelfCheckPeriod element empty");
+			return;
+		}
+		n = strtol(cc->text, NULL, 10);
+		if (n < 0 || n >= LONG_MAX) {
+			applog(LOG_ERR, "SelfCheckPeriod '%s' is invalid",
+			       cc->text);
+			free(cc->text);
+			cc->text = NULL;
+			return;
+		}
+		chunkd_srv.chk_period = n;
+		free(cc->text);
+		cc->text = NULL;
+	}
+
 	else {
 		applog(LOG_WARNING, "Unknown element \"%s\"", element_name);
 	}
@@ -527,6 +545,16 @@ void read_config(void)
 #endif
 	}
 
+	/*
+	 * The check period determines how quickly the self-check notices
+	 * bad keys when there are few keys or they are small (otherwise the
+	 * time to rescan determines the observation latency). To save
+	 * power, the default is several hours. Notice that it's randomized.
+	 * Zero means no self-check.
+	 */
+	if (!chunkd_srv.chk_period)
+		chunkd_srv.chk_period = 3 * 60 * 60;
+
 	free(ctx.geo_area);
 	free(ctx.geo_zone);
 	free(ctx.geo_rack);
diff --git a/server/selfcheck.c b/server/selfcheck.c
new file mode 100644
index 0000000..218516f
--- /dev/null
+++ b/server/selfcheck.c
@@ -0,0 +1,208 @@
+#define _GNU_SOURCE
+#include "chunkd-config.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdint.h>
+#include <syslog.h>
+#include <tcutil.h>
+#include <tchdb.h>
+#include "chunkd.h"
+
+struct chk_arg {
+	time_t period;
+	TCHDB *hdb;
+	// GThread *gthread;
+};
+
+struct chk_tls {
+	struct chk_arg *arg;
+
+	int stat_ok;
+	int stat_conflict;
+};
+
+static void chk_list_objs(struct chk_tls *tls, uint32_t table_id)
+{
+	struct fs_obj_lister lister;
+	char *fn;
+	char *csum, *csum_act;
+	char *owner;
+	unsigned long long size;
+	time_t mtime;
+	void *key_in;
+	size_t klen_in;
+	struct objcache_entry *cep;
+	int rc;
+
+	memset(&lister, 0, sizeof(struct fs_obj_lister));
+	rc = fs_list_objs_open(&lister, chunkd_srv.vol_path, table_id);
+	if (rc) {
+		applog(LOG_WARNING, "Cannot open table %u: %s", table_id,
+		       strerror(-rc));
+		return;
+	}
+
+	while (fs_list_objs_next(&lister, &fn) > 0) {
+
+		rc = fs_obj_hdr_read(fn, &owner, &csum, &key_in, &klen_in,
+				     &size, &mtime);
+		if (rc < 0) {
+			free(fn);
+			break;
+		}
+
+		cep = objcache_get(&chunkd_srv.actives, key_in, klen_in);
+		if (!cep) {
+			/* This is pretty much impossible unless OOM */
+			applog(LOG_ERR, "chk: objcache_get failed");
+			free(owner);
+			free(csum);
+			free(key_in);
+			free(fn);
+			break;
+		}
+
+		rc = fs_obj_do_sum(fn, klen_in, &csum_act);
+		if (rc) {
+			applog(LOG_INFO, "Cannot compute checksum for %s", fn);
+		} else {
+			if (!objcache_test_dirty(&chunkd_srv.actives, cep)) {
+				if (strcmp(csum, csum_act)) {
+					applog(LOG_INFO,
+					       "Checksum mismatch for %s: "
+					       "expected %s actual %s",
+					       fn, csum, csum_act);
+					fs_obj_disable(fn);
+					/*
+					 * FIXME Suicide the whole server if
+					 * fs_obj_disable fails a few times,
+					 * maybe? But what about races?
+					 */
+				} else {
+					tls->stat_ok++;
+				}
+			} else {
+				tls->stat_conflict++;
+			}
+			free(csum_act);
+		}
+
+		objcache_put(&chunkd_srv.actives, cep);
+		free(owner);
+		free(csum);
+		free(key_in);
+
+		free(fn);
+	}
+}
+
+static void chk_dbscan(struct chk_tls *tls)
+{
+	TCHDB *hdb = tls->arg->hdb;
+	void *kbuf;
+	int klen;
+	uint32_t *val_p;
+	int vlen;
+
+	tchdbiterinit(hdb);
+	while ((kbuf = tchdbiternext(hdb, &klen)) != NULL) {
+		if (!strcmp(kbuf, MDB_TABLE_ID)) {
+			free(kbuf);
+			continue;
+		}
+		val_p = tchdbget(hdb, kbuf, klen, &vlen);
+		if (!val_p) {
+			free(kbuf);
+			continue;
+		}
+		if (vlen != sizeof(int32_t)) {
+			applog(LOG_INFO, "table %s bad size %d", kbuf, vlen);
+			free(val_p);
+			free(kbuf);
+			continue;
+		}
+
+		chk_list_objs(tls, GUINT32_FROM_LE(*val_p));
+
+		free(val_p);
+		free(kbuf);
+	}
+}
+
+static gpointer chk_thread_func(gpointer data)
+{
+	struct chk_tls _tls;
+	struct chk_tls *tls;
+	time_t dt;
+	struct timespec sleepreq;
+
+	tls = &_tls;
+	memset(tls, 0, sizeof(struct chk_tls));
+	tls->arg = data;
+
+	for (;;) {
+		/*
+		 * Without the randomization, all systems in a low loaded
+		 * datacenter are virtually guaranteed to start checks in the
+		 * same time, blow fuses and/or disrupt applications.
+		 */
+		dt = tls->arg->period;
+		if (dt >= 3)
+			dt += rand() % (dt/3);
+		if (debugging)
+			applog(LOG_DEBUG, "chk: sleeping %lu s", dt);
+		sleepreq.tv_sec = dt;
+		sleepreq.tv_nsec = 0;
+		nanosleep(&sleepreq, NULL);
+
+		tls->stat_ok = 0;
+		tls->stat_conflict = 0;
+
+		chk_dbscan(tls);
+ 
+		if (debugging)
+			applog(LOG_INFO, "chk: done ok %d busy %d",
+			       tls->stat_ok,
+			       tls->stat_conflict);
+	}
+	return NULL;
+}
+
+/*
+ * Mind that we cannot have two threads scanning the master db,
+ * as long as Tokyo Cabinet embeds one and only one iterator into
+ * an instance of open database with tchdbiterinit().
+ */
+static struct chk_arg *thread;
+
+void chk_init()
+{
+	GError *error;
+	struct chk_arg *arg;
+	GThread *gthread;
+
+	if (!chunkd_srv.chk_period) {
+		if (debugging)
+			applog(LOG_DEBUG, "chk: disabled by configuration");
+		return;
+	}
+
+	arg = malloc(sizeof(struct chk_arg));
+	if (!arg) {
+		applog(LOG_ERR, "No core");
+		exit(1);
+	}
+	arg->period = chunkd_srv.chk_period;
+	arg->hdb = chunkd_srv.tbl_master;
+
+	gthread = g_thread_create(chk_thread_func, arg, FALSE, &error);
+	if (!gthread) {
+		applog(LOG_ERR, "Failed to start replication thread: %s",
+		       error->message);
+		exit(1);
+	}
+
+	// arg->gthread = gthread;
+	thread = arg;
+}
diff --git a/server/server.c b/server/server.c
index e955dc8..fddc556 100644
--- a/server/server.c
+++ b/server/server.c
@@ -1678,6 +1678,8 @@ int main (int argc, char *argv[])
 		goto err_out_cld;
 	}
 
+	chk_init();
+
 	/* set up server networking */
 	for (tmpl = chunkd_srv.listeners; tmpl; tmpl = tmpl->next) {
 		rc = net_open(tmpl->data);
diff --git a/test/.gitignore b/test/.gitignore
index a929882..dd7d312 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -5,6 +5,7 @@ large-object
 lotsa-objects
 nop
 objcache-unit
+selfcheck-unit
 
 .libs
 libtest.a
diff --git a/test/Makefile.am b/test/Makefile.am
index 84b4837..e1f7ac4 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -26,11 +26,12 @@ TESTS =				\
 	auth			\
 	large-object		\
 	lotsa-objects		\
+	selfcheck-unit		\
 	stop-daemon		\
 	clean-db
 
 check_PROGRAMS		= auth basic-object it-works large-object \
-			  lotsa-objects nop objcache-unit
+			  lotsa-objects nop objcache-unit selfcheck-unit
 
 TESTLDADD		= ../lib/libchunkdc.la	\
 			  libtest.a		\
@@ -42,6 +43,7 @@ it_works_LDADD		= $(TESTLDADD)
 large_object_LDADD	= $(TESTLDADD)
 lotsa_objects_LDADD	= $(TESTLDADD)
 nop_LDADD		= $(TESTLDADD)
+selfcheck_unit_LDADD	= $(TESTLDADD)
 
 objcache_unit_LDADD	= @GLIB_LIBS@
 
diff --git a/test/selfcheck-unit.c b/test/selfcheck-unit.c
new file mode 100644
index 0000000..7b6ca0d
--- /dev/null
+++ b/test/selfcheck-unit.c
@@ -0,0 +1,313 @@
+
+/*
+ * Copyright 2009 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define _GNU_SOURCE
+#include "chunkd-config.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <glib.h>
+#include <chunkc.h>
+#include "test.h"
+
+enum { BUFLEN = 8192 };
+
+struct config_context {
+	char *text;
+
+	char *path;
+	long period;
+};
+
+static void cfg_elm_start(GMarkupParseContext *context,
+			  const gchar	 *element_name,
+			  const gchar    **attribute_names,
+			  const gchar    **attribute_values,
+			  gpointer       user_data,
+			  GError         **error)
+{
+	;
+}
+
+static void cfg_elm_end(GMarkupParseContext *context,
+			const gchar	*element_name,
+			gpointer	user_data,
+			GError		**error)
+{
+	struct config_context *cc = user_data;
+	long n;
+
+	if (!strcmp(element_name, "Path")) {
+		OK(cc->text);
+		free(cc->path);
+		cc->path = cc->text;
+		cc->text = NULL;
+	} else if (!strcmp(element_name, "SelfCheckPeriod")) {
+		OK(cc->text);
+		n = strtol(cc->text, NULL, 10);
+		OK(n >= 0 && n < LONG_MAX);
+		cc->period = n;
+		free(cc->text);
+		cc->text = NULL;
+	} else {
+		free(cc->text);
+		cc->text = NULL;
+	}
+}
+
+static bool str_n_isspace(const char *s, size_t n)
+{
+	char c;
+	size_t i;
+
+	for (i = 0; i < n; i++) {
+		c = *s++;
+		if (!isspace(c))
+			return false;
+	}
+	return true;
+}
+
+static void cfg_elm_text(GMarkupParseContext *context,
+			 const gchar	*text,
+			 gsize		text_len,
+			 gpointer	user_data,
+			 GError		**error)
+{
+	struct config_context *cc = user_data;
+
+	free(cc->text);
+	if (str_n_isspace(text, text_len))
+		cc->text = NULL;
+	else
+		cc->text = g_strndup(text, text_len);
+}
+
+static const GMarkupParser cfg_parse_ops = {
+	.start_element		= cfg_elm_start,
+	.end_element		= cfg_elm_end,
+	.text			= cfg_elm_text,
+};
+
+static void read_config(struct config_context *cc)
+{
+	GMarkupParseContext* parser;
+	char *top, *cfg;
+	char *text;
+	gsize len;
+	int rc;
+
+	top = getenv("top_srcdir");
+	OK(top);
+
+	rc = asprintf(&cfg, "%s/test/" TEST_CHUNKD_CFG, top);
+	OK(rc > 0);
+
+	rc = g_file_get_contents(cfg, &text, &len, NULL);
+	OK(rc);
+
+	parser = g_markup_parse_context_new(&cfg_parse_ops, 0, cc, NULL);
+	OK(parser);
+
+	rc = g_markup_parse_context_parse(parser, text, len, NULL);
+	OK(rc);
+
+	g_markup_parse_context_free(parser);
+	free(text);
+	free(cfg);
+}
+
+static void hexstr(const unsigned char *buf, size_t buf_len, char *outstr)
+{
+	static const char hex[] = "0123456789abcdef";
+	int i;
+
+	for (i = 0; i < buf_len; i++) {
+		outstr[i * 2]       = hex[(buf[i] & 0xF0) >> 4];
+		outstr[(i * 2) + 1] = hex[(buf[i] & 0x0F)     ];
+	}
+
+	outstr[buf_len * 2] = 0;
+}
+
+#define MDB_TPATH_FMT	"%s/%X"
+
+static char *fs_obj_pathname(const char *path, uint32_t table_id,
+			     const void *key, size_t key_len)
+{
+	char *s = NULL;
+	char prefix[5];
+	unsigned char md[SHA256_DIGEST_LENGTH];
+	char mdstr[(SHA256_DIGEST_LENGTH * 2) + 1];
+	int rc;
+
+	if (!table_id || !key || !key_len)
+		return NULL;
+
+	SHA256(key, key_len, md);
+	hexstr(md, SHA256_DIGEST_LENGTH, mdstr);
+
+	memcpy(prefix, mdstr, 4);
+	prefix[4] = 0;
+
+	rc = asprintf(&s, MDB_TPATH_FMT "/%s/%s", path, table_id,
+		      prefix, mdstr + 4);
+	OK(rc != -1);
+
+	return s;
+}
+
+static bool be_file_verify(const char *fn)
+{
+	int fd;
+
+	/* stat(2) is nice and all, but whatever. */
+	fd = open(fn, O_RDONLY);
+	if (fd < 0)
+		return false;
+	close(fd);
+	return true;
+}
+
+static bool be_file_damage(const char *fn)
+{
+	int fd;
+	char buf[1];
+	ssize_t rcs;
+	off_t rco;
+
+	fd = open(fn, O_WRONLY);
+	if (fd < 0)
+		return false;
+
+	/*
+	 * This puts the damage at data size minus the mysterious header size.
+	 */
+	rco = lseek(fd, BUFLEN, SEEK_SET);
+	if (rco == (off_t)-1) {
+		close(fd);
+		return false;
+	}
+
+	buf[0] = 0;
+	rcs = write(fd, buf, 1);
+	if (rcs <= 0) {
+		close(fd);
+		return false;
+	}
+
+	close(fd);
+	return true;
+}
+
+int main(int argc, char *argv[])
+{
+	static char key[] = "selfcheck-test-key";
+	int port;
+	char *buf;
+	struct config_context ctx;
+	struct st_client *stc;
+	char *fn;
+	size_t len;
+	void *mem;
+	bool rcb;
+
+	setlocale(LC_ALL, "C");
+
+	stc_init();
+	SSL_library_init();
+
+	buf = malloc(BUFLEN);
+	OK(buf);
+	memset(buf, 0x55, BUFLEN);
+
+	port = stc_readport(TEST_PORTFILE);
+	OK(port > 0);
+
+	/*
+	 * Step 0: read and parse the configuration.
+	 */
+	memset(&ctx, 0, sizeof(struct config_context));
+	read_config(&ctx);
+	OK(ctx.path);		/* must have a path */
+	OK(ctx.period);		/* must have an explicit SelfCheckPeriod */
+	OK(ctx.period <= 100);	/* A minute is already too long for tests */
+
+	/*
+	 * Step 1: create the object
+	 */
+	stc = stc_new(TEST_HOST, port, TEST_USER, TEST_USER_KEY, false);
+	OK(stc);
+	rcb = stc_table_openz(stc, TEST_TABLE, 0);
+	OK(rcb);
+	rcb = stc_put_inline(stc, key, sizeof(key), buf, BUFLEN, 0);
+	OK(rcb);
+	stc_free(stc);
+
+	/*
+	 * Step 2: verify the back-end file is created
+	 * N.B. We guess the tabled ID to be 1, sice all tests use the same
+	 *      table and they are numbered sequentially on a fresh DB.
+	 */
+	fn = fs_obj_pathname(ctx.path, 1, key, sizeof(key));
+	OK(fn);
+	rcb = be_file_verify(fn);
+	OK(rcb);
+
+	/*
+	 * Step 3: damage the back-end file and wait
+	 * We sleep for longer than the self-check period because:
+	 * 1) the server sleeps for up to period*1.5
+	 * 2) the server may be quite busy walking numerous objects
+	 * 3) the build system may be overloaded
+	 */
+	rcb = be_file_damage(fn);
+	OK(rcb);
+	sleep(ctx.period * 3);
+
+	/*
+	 * Step 4: verify that the damaged object is removed
+	 * This is, strictly speaking, not necessary. The true test is
+	 * trying to access the keyed object through the chunkserver's API.
+	 * But since we have the function already, might as well use it.
+	 */
+	rcb = be_file_verify(fn);
+	OK(!rcb);
+
+	free(fn);
+	fn = NULL;
+
+	/*
+	 * Step 5: verify that we didn't crash the chunkserver and that
+	 * the object we created and damaged is not considered present anymore.
+	 */
+	stc = stc_new(TEST_HOST, port, TEST_USER, TEST_USER_KEY, false);
+	OK(stc);
+	rcb = stc_table_openz(stc, TEST_TABLE, 0);
+	OK(rcb);
+	mem = stc_get_inline(stc, key, sizeof(key), &len);
+	OK(!mem);
+	stc_free(stc);
+
+	return 0;
+}
diff --git a/test/server-test.cfg b/test/server-test.cfg
index 80833d2..a487556 100644
--- a/test/server-test.cfg
+++ b/test/server-test.cfg
@@ -27,3 +27,5 @@
   <Host>localhost</Host>
 </CLD>
 
+<SelfCheckPeriod>35</SelfCheckPeriod>
+
diff --git a/test/test.h b/test/test.h
index c3c48b7..75aa250 100644
--- a/test/test.h
+++ b/test/test.h
@@ -38,6 +38,8 @@
 #define TEST_PORTFILE		"chunkd.port"
 #define TEST_PORTFILE_SSL	"chunkd-ssl.port"
 
+#define TEST_CHUNKD_CFG		"server-test.cfg"
+
 #define OK(expr)				\
 	do {					\
 		if (!(expr)) {			\
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Fedora Clound]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux