[Patch] chunkd: add self-checking

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch adds the self-check to Chunk. With it, the daemon can rescan
all of its keys and drop those that fail to match their own checksums
or throw an I/O error. Objects that are found found faulty are made
invisible to applications (back-end files are renamed, so that bad
blocks are not reused). This is intended to work in concert with
applications that store redundant copies of their data objects.

This patch includes the part that tracks the active I/O, since the
self-check is the only user of it, so it made little sense to separate
the two. We have to track the I/O so that self-check does not mistakenly
assume a partially stored object to be faulty and kills it.

Running the self-check can adversely affect performance. As a crude
way to limit the problem, we limit the load to one check thread only.
Still, as anyone who had mlocate or Beagle running in their desktop
knows, the biggest issue is not additional I/O as such, but the blowing
away the page cache and dentries in kernel. Also, our scheduling is
between rudimentary and non-existing. We only provide a looping check
with a randomized delay and an external control to start and verify the
running of the checking.

Therefore, to avoid surprises with sudden loss of objects and with
performance anomalies, periodic self-check defaults to off.

Still, self-check is an intergral part of the daemon, so we include
unit tests for both the I/O tracking and self-check itself.

Signed-off-by: Pete Zaitcev <zaitcev@xxxxxxxxxx>

---
 doc/setup.txt           |   10 +
 include/Makefile.am     |    2 
 include/chunk-private.h |    4 
 include/chunk_msg.h     |   21 ++
 include/chunkc.h        |    4 
 include/objcache.h      |   75 ++++++++
 lib/chunkdc.c           |   82 +++++++++
 server/Makefile.am      |    3 
 server/be-fs.c          |  130 ++++++++++++++
 server/chunkd.h         |   28 +++
 server/cldu.c           |    1 
 server/config.c         |   18 ++
 server/objcache.c       |  138 +++++++++++++++
 server/object.c         |    9 +
 server/selfcheck.c      |  293 +++++++++++++++++++++++++++++++++
 server/server.c         |  135 +++++++++++++++
 test/.gitignore         |    2 
 test/Makefile.am        |    7 
 test/objcache-unit.c    |   64 +++++++
 test/selfcheck-unit.c   |  334 ++++++++++++++++++++++++++++++++++++++
 test/test.h             |    2 
 tools/chcli.c           |  116 +++++++++++--
 22 files changed, 1454 insertions(+), 24 deletions(-)

commit 6991325fa6c3dd6518f386cd08bbecbdb558295c
Author: Master <zaitcev@xxxxxxxxxxxxxxxxxxxxx>
Date:   Tue Mar 2 11:04:56 2010 -0700

    Self-check with a simplified check status.

diff --git a/doc/setup.txt b/doc/setup.txt
index 1c419af..f8d9f00 100644
--- a/doc/setup.txt
+++ b/doc/setup.txt
@@ -104,6 +104,16 @@ _cld._udp.phx2.ex.com has SRV record 10 50 8081 maika.phx2.ex.com.
 		<Rack>F3R18</Rack>
 	</Geo>
 
+*) configure the self-check period, if desired. Note that the actual amount
+   of time that the daemon sleeps between self-check sweeps is randomized
+   in order to prevent the convoy effect, so the value below is approximate.
+
+	<!-- 30 * 60 * 60 == 108000 seconds -->
+	<SelfCheckPeriod>108000</SelfCheckPeriod>
+
+   The default is zero, which disables the periodic self-check. It may be
+   appropriate if the application performs its own end-to-end checking.
+
 *) start daemon (it will put itself into the background) with the
    configuration file just created:
 
diff --git a/include/Makefile.am b/include/Makefile.am
index ddc2b8a..7abe0b5 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -1,5 +1,5 @@
 
 include_HEADERS = chunkc.h chunk_msg.h
 
-EXTRA_DIST	= elist.h chunk_msg.h chunksrv.h chunk-private.h
+EXTRA_DIST	= elist.h chunk_msg.h chunksrv.h chunk-private.h objcache.h
 
diff --git a/include/chunk-private.h b/include/chunk-private.h
index 158df16..6ab7e75 100644
--- a/include/chunk-private.h
+++ b/include/chunk-private.h
@@ -52,4 +52,8 @@ static inline uint64_t cpu_to_le64(uint64_t val)
 	return GUINT64_TO_LE(val);
 }
 
+#define MDB_TPATH_FMT	"%s/%X"
+#define BAD_TPATH_FMT	"%s/bad"
+#define PREFIX_LEN 3
+
 #endif /* __CHUNK_PRIVATE_H__ */
diff --git a/include/chunk_msg.h b/include/chunk_msg.h
index 478edb5..154201d 100644
--- a/include/chunk_msg.h
+++ b/include/chunk_msg.h
@@ -40,6 +40,8 @@ enum chunksrv_ops {
 	CHO_LIST		= 5,
 	CHO_LOGIN		= 6,
 	CHO_TABLE_OPEN		= 7,
+	CHO_CHECK_START		= 8,
+	CHO_CHECK_STATUS	= 9,
 };
 
 enum chunk_errcode {
@@ -52,6 +54,7 @@ enum chunk_errcode {
 	che_SignatureDoesNotMatch	= 6,
 	che_InvalidKey			= 7,
 	che_InvalidTable		= 8,
+	che_Busy			= 9,
 };
 
 enum chunk_flags {
@@ -86,4 +89,22 @@ struct chunksrv_resp_get {
 	uint64_t		mtime;
 };
 
+enum chunk_check_state {
+	chk_Off,
+	chk_Idle,
+	chk_Active
+};
+
+struct chunk_check_status {
+	uint8_t			state;		/* enum chunk_check_state */
+	uint8_t			pad[3];
+	uint32_t		count;		/* lifetime */
+	uint64_t		lastdone;	/* UTC */
+};
+
+struct chunksrv_resp_chkstat {
+	struct chunksrv_resp		resp;
+	struct chunk_check_status	chkstat;
+};
+
 #endif /* __CHUNK_MSG_H__ */
diff --git a/include/chunkc.h b/include/chunkc.h
index 6eb1b06..5ebc936 100644
--- a/include/chunkc.h
+++ b/include/chunkc.h
@@ -89,6 +89,10 @@ extern bool stc_put_inline(struct st_client *stc, const void *key,
 extern bool stc_del(struct st_client *stc, const void *key, size_t key_len);
 extern bool stc_ping(struct st_client *stc);
 
+extern bool stc_check_poke(struct st_client *stc);
+extern bool stc_check_status(struct st_client *stc,
+			     struct chunk_check_status *out);
+
 extern struct st_keylist *stc_keys(struct st_client *stc);
 
 extern int stc_readport(const char *fname);
diff --git a/include/objcache.h b/include/objcache.h
new file mode 100644
index 0000000..6e36e86
--- /dev/null
+++ b/include/objcache.h
@@ -0,0 +1,75 @@
+
+/*
+ * Copyright 2009 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef _CHUNKD_OBJCACHE_H_
+#define _CHUNKD_OBJCACHE_H_
+
+#include <glib.h>
+#include <stdbool.h>
+
+struct objcache {
+	GMutex *lock;
+	GHashTable *table;
+};
+
+struct objcache_entry {
+	unsigned int hash;
+	unsigned int flags;
+	int ref;
+};
+
+#define OC_F_DIRTY   0x1
+
+/*
+ * Get an entry and set flags.
+ * A method for every flag is needed because our locks are internal to
+ * the cache, and we want this to be atomic.
+ */
+#define objcache_get(c, k, l)		__objcache_get(c, k, l, 0)
+#define objcache_get_dirty(c, k, l)	__objcache_get(c, k, l, OC_F_DIRTY)
+extern struct objcache_entry *__objcache_get(struct objcache *cache,
+					     const char *key, int klen,
+					     unsigned int flag);
+
+/*
+ * Test for dirty.
+ */
+extern bool objcache_test_dirty(struct objcache *cache,
+				struct objcache_entry *entry);
+
+/*
+ * Put an entry (decrement and free, or an equivalent).
+ */
+extern void objcache_put(struct objcache *cache, struct objcache_entry *entry);
+
+/*
+ * Count objects in the cache. Can be slow, and used only for debugging.
+ */
+extern int objcache_count(struct objcache *cache);
+
+/*
+ * Init a cache. Call once. May fail since it allocates a mutex.
+ */
+extern int objcache_init(struct objcache *cache);
+
+/*
+ * Terminate a cache.
+ */
+extern void objcache_fini(struct objcache *cache);
+
+#endif
diff --git a/lib/chunkdc.c b/lib/chunkdc.c
index 91c42dc..93783bf 100644
--- a/lib/chunkdc.c
+++ b/lib/chunkdc.c
@@ -978,6 +978,88 @@ bool stc_ping(struct st_client *stc)
 	return true;
 }
 
+bool stc_check_poke(struct st_client *stc)
+{
+	struct chunksrv_resp resp;
+	struct chunksrv_req *req = (struct chunksrv_req *) stc->req_buf;
+
+	if (stc->verbose)
+		fprintf(stderr, "libstc: CHECK_POKE\n");
+
+	/* initialize request */
+	req_init(stc, req);
+	req->op = CHO_CHECK_START;
+
+	/* sign request */
+	chreq_sign(req, stc->key, req->sig);
+
+	/* write request */
+	if (!net_write(stc, req, req_len(req)))
+		return false;
+
+	/* read response header */
+	if (!net_read(stc, &resp, sizeof(resp)))
+		return false;
+
+	/* check response code */
+	if (resp.resp_code != che_Success) {
+		if (stc->verbose)
+			fprintf(stderr, "CHECK_POKE resp code: %d\n",
+				resp.resp_code);
+		return false;
+	}
+
+	return true;
+}
+
+bool stc_check_status(struct st_client *stc, struct chunk_check_status *out)
+{
+	struct chunksrv_resp resp;
+	struct chunksrv_req req;
+	uint64_t content_len;
+
+	if (stc->verbose)
+		fprintf(stderr, "libstc: CHECK_STATUS\n");
+
+	/* initialize request */
+	req_init(stc, &req);
+	req.op = CHO_CHECK_STATUS;
+
+	/* sign request */
+	chreq_sign(&req, stc->key, req.sig);
+
+	/* write request */
+	if (!net_write(stc, &req, req_len(&req)))
+		return false;
+
+	/* read response header */
+	if (!net_read(stc, &resp, sizeof(resp)))
+		return false;
+
+	/* check response code */
+	if (resp.resp_code != che_Success) {
+		if (stc->verbose)
+			fprintf(stderr, "CHECK STATUS resp code: %d\n",
+				resp.resp_code);
+		return false;
+	}
+
+	content_len = le64_to_cpu(resp.data_len);
+	if (content_len != sizeof(struct chunk_check_status)) {
+		if (stc->verbose)
+			fprintf(stderr, "CHECK STATUS bogus length: %lld\n",
+				(long long) content_len);
+		/* XXX And the unread data in the pipe, what about it? */
+		return false;
+	}
+
+	/* read response data */
+	if (!net_read(stc, out, content_len))
+		return false;
+
+	return true;
+}
+
 /*
  * For extra safety, call stc_init after g_thread_init, if present.
  * Currently we just call srand(), but since we use GLib, we may need
diff --git a/server/Makefile.am b/server/Makefile.am
index 516b081..f2a66a1 100644
--- a/server/Makefile.am
+++ b/server/Makefile.am
@@ -6,7 +6,8 @@ sbin_PROGRAMS	= chunkd
 
 chunkd_SOURCES	= chunkd.h		\
 		  ../lib/chunksrv.c	\
-		  be-fs.c object.c server.c config.c cldu.c util.c
+		  be-fs.c object.c server.c selfcheck.c config.c cldu.c util.c \
+		  objcache.c
 chunkd_LDADD	= \
 		  @CLDC_LIBS@ @GLIB_LIBS@ @CRYPTO_LIBS@ \
 		  @SSL_LIBS@ @EVENT_LIBS@ @TOKYOCABINET_LIBS@
diff --git a/server/be-fs.c b/server/be-fs.c
index a2b23d3..016b56f 100644
--- a/server/be-fs.c
+++ b/server/be-fs.c
@@ -38,11 +38,9 @@
 #include <syslog.h>
 #include <tcutil.h>
 #include <tchdb.h>
+#include <chunk-private.h>
 #include "chunkd.h"
 
-#define MDB_TABLE_ID	"__chunkd_table_id"
-#define MDB_TPATH_FMT	"%s/%X"
-
 struct fs_obj {
 	struct backend_obj	bo;
 
@@ -205,8 +203,6 @@ static struct fs_obj *fs_obj_alloc(void)
 	return obj;
 }
 
-#define PREFIX_LEN 3
-
 static char *fs_obj_pathname(uint32_t table_id,const void *key, size_t key_len)
 {
 	char *s = NULL;
@@ -265,6 +261,47 @@ err_out:
 	return NULL;
 }
 
+static char *fs_obj_badname(unsigned long tag)
+{
+	char *s;
+	struct stat st;
+	int rc;
+
+	rc = asprintf(&s, BAD_TPATH_FMT, chunkd_srv.vol_path);
+	if (rc < 0)
+		return NULL;
+
+	/* create subdir on the fly, if not already exists */
+	if (stat(s, &st) < 0) {
+		if (errno != ENOENT) {
+			syslogerr(s);
+			free(s);
+			return NULL;
+		}
+		if (mkdir(s, 0777) < 0) {
+			if (errno != EEXIST) {
+				syslogerr(s);
+				free(s);
+				return NULL;
+			}
+		}
+	} else {
+		if (!S_ISDIR(st.st_mode)) {
+			applog(LOG_WARNING,
+			       "%s: not a dir, fs_obj_badname go boom", s);
+			free(s);
+			return NULL;
+		}
+	}
+	free(s);
+
+	rc = asprintf(&s, BAD_TPATH_FMT "/%lu", chunkd_srv.vol_path, tag);
+	if (rc < 0)
+		return NULL;
+
+	return s;
+}
+
 static bool key_valid(const void *key, size_t key_len)
 {
 	if (!key || key_len < 1 || key_len > CHD_KEY_SZ)
@@ -694,6 +731,27 @@ err_out:
 	return false;
 }
 
+int fs_obj_disable(const char *fn)
+{
+	struct stat st;
+	char *bad;
+	int rc;
+
+	if (stat(fn, &st) < 0)
+		return -errno;
+
+	bad = fs_obj_badname(st.st_ino);
+
+	if (rename(fn, bad) < 0) {
+		rc = errno;
+		free(bad);
+		return -rc;
+	}
+
+	free(bad);
+	return 0;
+}
+
 int fs_list_objs_open(struct fs_obj_lister *t,
 		      const char *root_path, uint32_t table_id)
 {
@@ -954,3 +1012,65 @@ GList *fs_list_objs(uint32_t table_id, const char *user)
 	return res;
 }
 
+int fs_obj_do_sum(const char *fn, unsigned int klen, char **csump)
+{
+	enum { BUFLEN = 128 * 1024 };
+	void *buf;
+	int fd;
+	ssize_t rrc;
+	int rc;
+	SHA_CTX hash;
+	char *csum;
+	unsigned char md[SHA_DIGEST_LENGTH];
+
+	rc = ENOMEM;
+	buf = malloc(BUFLEN);
+	if (!buf)
+		goto err_alloc;
+
+	fd = open(fn, O_RDONLY);
+	if (fd == -1) {
+		rc = errno;
+		goto err_open;
+	}
+	if (lseek(fd, sizeof(struct be_fs_obj_hdr) + klen, SEEK_SET) == (off_t)-1) {
+		rc = errno;
+		goto err_seek;
+	}
+
+	SHA1_Init(&hash);
+	for (;;) {
+		rrc = read(fd, buf, BUFLEN);
+		if (rrc < 0) {
+			rc = errno;
+			goto err_read;
+		}
+		if (rrc != 0)
+			SHA1_Update(&hash, buf, rrc);
+		if (rrc < BUFLEN)
+			break;
+	}
+	SHA1_Final(md, &hash);
+
+	csum = malloc(SHA_DIGEST_LENGTH*2 + 1);
+	if (!csum) {
+		rc = ENOMEM;
+		goto err_retdup;
+	}
+	hexstr(md, SHA_DIGEST_LENGTH, csum);
+
+	close(fd);
+	free(buf);
+	*csump = csum;
+	return 0;
+
+ err_retdup:
+ err_read:
+	close(fd);
+ err_seek:
+ err_open:
+	free(buf);
+ err_alloc:
+	return -rc;
+}
+
diff --git a/server/chunkd.h b/server/chunkd.h
index 19b13a7..41e0a4b 100644
--- a/server/chunkd.h
+++ b/server/chunkd.h
@@ -28,6 +28,7 @@
 #include <chunk_msg.h>
 #include <hail_log.h>
 #include <tchdb.h>
+#include <objcache.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -118,6 +119,7 @@ struct client {
 	uint64_t		out_len;
 
 	struct backend_obj	*out_bo;
+	struct objcache_entry	*out_ce;
 
 	uint64_t		in_len;
 	struct backend_obj	*in_obj;
@@ -188,8 +190,21 @@ struct server_socket {
 	const struct listen_cfg	*cfg;
 };
 
+enum chk_cmd {
+	CHK_CMD_EXIT,
+	CHK_CMD_RESCAN
+};
+
+enum chk_state {
+	CHK_ST_OFF,
+	CHK_ST_INIT,
+	CHK_ST_IDLE,
+	CHK_ST_RUNNING,
+};
+
 struct server {
 	unsigned long		flags;		/* SFL_xxx above */
+	GMutex			*bigmutex;
 
 	char			*config;	/* master config file */
 	char			*pid_file;	/* PID file */
@@ -208,12 +223,19 @@ struct server {
 	char			*group;
 	uint32_t		nid;
 	struct geo		loc;
+	time_t			chk_period;
+	int			chk_pipe[2];
 
 	TCHDB			*tbl_master;
+	struct objcache		actives;
 
 	struct server_stats	stats;		/* global statistics */
+	enum chk_state		chk_state;
+	time_t			chk_done;
 };
 
+#define MDB_TABLE_ID	"__chunkd_table_id"
+
 extern struct hail_log cldu_hail_log;
 
 /* be-fs.c */
@@ -242,6 +264,7 @@ extern bool fs_obj_write_commit(struct backend_obj *bo, const char *user,
 extern bool fs_obj_delete(uint32_t table_id, const char *user,
 		          const void *kbuf, size_t klen,
 			  enum chunk_errcode *err_code);
+extern int fs_obj_disable(const char *fn);
 extern ssize_t fs_obj_sendfile(struct backend_obj *bo, int out_fd, size_t len);
 extern int fs_list_objs_open(struct fs_obj_lister *t,
 			     const char *root_path, uint32_t table_id);
@@ -254,6 +277,7 @@ extern GList *fs_list_objs(uint32_t table_id, const char *user);
 extern bool fs_table_open(const char *user, const void *kbuf, size_t klen,
 		   bool tbl_creat, bool excl_creat, uint32_t *table_id,
 		   enum chunk_errcode *err_code);
+extern int fs_obj_do_sum(const char *fn, unsigned int klen, char **csump);
 
 /* object.c */
 extern bool object_del(struct client *cli);
@@ -317,6 +341,10 @@ extern void resp_init_req(struct chunksrv_resp *resp,
 /* config.c */
 extern void read_config(void);
 
+/* selfcheck.c */
+extern void chk_init(void);
+extern int chk_spawn(time_t period, TCHDB *hdb);
+
 static inline bool use_sendfile(struct client *cli)
 {
 #if defined(HAVE_SENDFILE) && defined(HAVE_SYS_SENDFILE_H)
diff --git a/server/cldu.c b/server/cldu.c
index 741845b..9c27138 100644
--- a/server/cldu.c
+++ b/server/cldu.c
@@ -248,7 +248,6 @@ static struct cldc_ops cld_ops = {
 	.timer_ctl =	cldu_p_timer_ctl,
 	.pkt_send =	cldu_p_pkt_send,
 	.event =	cldu_p_event,
-	.errlog =	applog,
 };
 
 /*
diff --git a/server/config.c b/server/config.c
index 098bbbe..fee2c0c 100644
--- a/server/config.c
+++ b/server/config.c
@@ -442,6 +442,24 @@ static void cfg_elm_end (GMarkupParseContext *context,
 		cc->text = NULL;
 	}
 
+	else if (!strcmp(element_name, "SelfCheckPeriod")) {
+		if (!cc->text) {
+			applog(LOG_WARNING, "SelfCheckPeriod element empty");
+			return;
+		}
+		n = strtol(cc->text, NULL, 10);
+		if (n < 0 || n >= LONG_MAX) {
+			applog(LOG_ERR, "SelfCheckPeriod '%s' is invalid",
+			       cc->text);
+			free(cc->text);
+			cc->text = NULL;
+			return;
+		}
+		chunkd_srv.chk_period = n;
+		free(cc->text);
+		cc->text = NULL;
+	}
+
 	else {
 		applog(LOG_WARNING, "Unknown element \"%s\"", element_name);
 	}
diff --git a/server/objcache.c b/server/objcache.c
new file mode 100644
index 0000000..e969f89
--- /dev/null
+++ b/server/objcache.c
@@ -0,0 +1,138 @@
+
+/*
+ * Copyright 2009 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <objcache.h>
+#include <stdlib.h>
+
+/*
+ * We really should not screw around with hand-rolled garbage and use
+ * something like Paul Hsieh's SuperFastHash, but licenses are too confusing.
+ */
+static unsigned int objcache_hash(const char *key, int klen)
+{
+	unsigned int hash;
+	int i;
+	unsigned char c;
+
+	hash = 0x55555555;
+	for (i = 0; i < klen; i++) {
+		c = (unsigned char) *key++;
+		hash ^= hash << 16;
+		hash ^= c;
+		hash = (hash << 8) | (hash >> 24);
+	}
+	return hash;
+}
+
+static struct objcache_entry *objcache_insert(struct objcache *cache,
+					      unsigned int hash)
+{
+	struct objcache_entry *cep;
+
+	cep = malloc(sizeof(struct objcache_entry));
+	if (!cep)
+		return NULL;
+	cep->hash = hash;
+	cep->flags = 0;
+	cep->ref = 1;
+	g_hash_table_insert(cache->table, &cep->hash, cep);
+	return cep;
+}
+
+/*
+ * Observe the way we handle conflicts in the computed hash: we treat the
+ * keys with the same hash as same. It's acceptable in our application.
+ * At worst, an unrelated activity main in chunkd may spook self-check.
+ * This policy remains the same for list, tree, hash or any other implementing
+ * structure. If we use Glib's hash, it can have its own conflicts over
+ * a shared bucket indexed with our hash. We don't know anything about those.
+ */
+struct objcache_entry *__objcache_get(struct objcache *cache,
+				      const char *key, int klen,
+				      unsigned int flag)
+{
+	struct objcache_entry *cep;
+	unsigned int hash;
+
+	hash = objcache_hash(key, klen);
+	g_mutex_lock(cache->lock);
+	cep = g_hash_table_lookup(cache->table, &hash);
+	if (cep) {
+		cep->ref++;
+	} else {
+		cep = objcache_insert(cache, hash);
+	}
+	cep->flags |= flag;
+	g_mutex_unlock(cache->lock);
+	return cep;
+}
+
+bool objcache_test_dirty(struct objcache *cache, struct objcache_entry *cep)
+{
+	bool ret;
+
+	g_mutex_lock(cache->lock);
+	ret = cep->flags & OC_F_DIRTY;
+	g_mutex_unlock(cache->lock);
+	return ret;
+}
+
+void objcache_put(struct objcache *cache, struct objcache_entry *cep)
+{
+	g_mutex_lock(cache->lock);
+	if (!cep->ref) {
+		g_mutex_unlock(cache->lock);
+		/* Must not happen, or a leak for Valgrind to catch. */
+		return;
+	}
+	--cep->ref;
+	if (!cep->ref) {
+		gboolean rcb;
+		rcb = g_hash_table_remove(cache->table, &cep->hash);
+		/*
+		 * We are so super sure that this cannot happen that
+		 * we use abort(), which is not welcome in daemons.
+		 */
+		if (!rcb)
+			abort();
+		free(cep);
+	}
+	g_mutex_unlock(cache->lock);
+}
+
+int objcache_count(struct objcache *cache)
+{
+	return g_hash_table_size(cache->table);
+}
+
+int objcache_init(struct objcache *cache)
+{
+	cache->lock = g_mutex_new();
+	if (!cache->lock)
+		return -1;
+	/* We do not use g_str_hash becuse our keys may have nul bytes. */
+	cache->table = g_hash_table_new(g_int_hash, g_int_equal);
+	return 0;
+}
+
+void objcache_fini(struct objcache *cache)
+{
+	g_mutex_free(cache->lock);
+	g_hash_table_destroy(cache->table);
+}
diff --git a/server/object.c b/server/object.c
index ad1d98a..40ef4c4 100644
--- a/server/object.c
+++ b/server/object.c
@@ -73,6 +73,10 @@ void cli_out_end(struct client *cli)
 		fs_obj_free(cli->out_bo);
 		cli->out_bo = NULL;
 	}
+	if (cli->out_ce) {
+		objcache_put(&chunkd_srv.actives, cli->out_ce);
+		cli->out_ce = NULL;
+	}
 
 	free(cli->out_user);
 	cli->out_user = NULL;
@@ -217,6 +221,11 @@ bool object_put(struct client *cli)
 	if (!user)
 		return cli_err(cli, che_AccessDenied, true);
 
+	cli->out_ce = objcache_get_dirty(&chunkd_srv.actives,
+					 cli->key, cli->key_len);
+	if (!cli->out_ce)
+		return cli_err(cli, che_InternalError, true);
+
 	cli->out_bo = fs_obj_new(cli->table_id, cli->key, cli->key_len, &err);
 	if (!cli->out_bo)
 		return cli_err(cli, err, true);
diff --git a/server/selfcheck.c b/server/selfcheck.c
new file mode 100644
index 0000000..f8cbaef
--- /dev/null
+++ b/server/selfcheck.c
@@ -0,0 +1,293 @@
+#define _GNU_SOURCE
+#include "chunkd-config.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdint.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <poll.h>
+#include <errno.h>
+#include <tcutil.h>
+#include <tchdb.h>
+#include "chunkd.h"
+
+struct chk_arg {
+	time_t period;
+	TCHDB *hdb;
+	// GThread *gthread;
+};
+
+struct chk_tls {
+	struct chk_arg *arg;
+
+	int stat_ok;
+	int stat_conflict;
+};
+
+static void chk_list_objs(struct chk_tls *tls, uint32_t table_id)
+{
+	struct fs_obj_lister lister;
+	char *fn;
+	char *csum, *csum_act;
+	char *owner;
+	unsigned long long size;
+	time_t mtime;
+	void *key_in;
+	size_t klen_in;
+	struct objcache_entry *cep;
+	int rc;
+
+	memset(&lister, 0, sizeof(struct fs_obj_lister));
+	rc = fs_list_objs_open(&lister, chunkd_srv.vol_path, table_id);
+	if (rc) {
+		applog(LOG_WARNING, "Cannot open table %u: %s", table_id,
+		       strerror(-rc));
+		return;
+	}
+
+	while (fs_list_objs_next(&lister, &fn) > 0) {
+
+		rc = fs_obj_hdr_read(fn, &owner, &csum, &key_in, &klen_in,
+				     &size, &mtime);
+		if (rc < 0) {
+			free(fn);
+			break;
+		}
+
+		cep = objcache_get(&chunkd_srv.actives, key_in, klen_in);
+		if (!cep) {
+			/* This is pretty much impossible unless OOM */
+			applog(LOG_ERR, "chk: objcache_get failed");
+			free(owner);
+			free(csum);
+			free(key_in);
+			free(fn);
+			break;
+		}
+
+		rc = fs_obj_do_sum(fn, klen_in, &csum_act);
+		if (rc) {
+			applog(LOG_INFO, "Cannot compute checksum for %s", fn);
+		} else {
+			if (!objcache_test_dirty(&chunkd_srv.actives, cep)) {
+				if (strcmp(csum, csum_act)) {
+					applog(LOG_INFO,
+					       "Checksum mismatch for %s: "
+					       "expected %s actual %s",
+					       fn, csum, csum_act);
+					fs_obj_disable(fn);
+					/*
+					 * FIXME Suicide the whole server if
+					 * fs_obj_disable fails a few times,
+					 * maybe? But what about races?
+					 */
+				} else {
+					tls->stat_ok++;
+				}
+			} else {
+				tls->stat_conflict++;
+			}
+			free(csum_act);
+		}
+
+		objcache_put(&chunkd_srv.actives, cep);
+		free(owner);
+		free(csum);
+		free(key_in);
+
+		free(fn);
+	}
+}
+
+static void chk_dbscan(struct chk_tls *tls)
+{
+	TCHDB *hdb = tls->arg->hdb;
+	void *kbuf;
+	int klen;
+	uint32_t *val_p;
+	int vlen;
+
+	tchdbiterinit(hdb);
+	while ((kbuf = tchdbiternext(hdb, &klen)) != NULL) {
+		if (!strcmp(kbuf, MDB_TABLE_ID)) {
+			free(kbuf);
+			continue;
+		}
+		val_p = tchdbget(hdb, kbuf, klen, &vlen);
+		if (!val_p) {
+			free(kbuf);
+			continue;
+		}
+		if (vlen != sizeof(int32_t)) {
+			applog(LOG_INFO, "table %s bad size %d", kbuf, vlen);
+			free(val_p);
+			free(kbuf);
+			continue;
+		}
+
+		chk_list_objs(tls, GUINT32_FROM_LE(*val_p));
+
+		free(val_p);
+		free(kbuf);
+	}
+}
+
+static void chk_thread_scan(struct chk_tls *tls)
+{
+	g_mutex_lock(chunkd_srv.bigmutex);
+	chunkd_srv.chk_state = CHK_ST_RUNNING;
+	g_mutex_unlock(chunkd_srv.bigmutex);
+
+	tls->stat_ok = 0;
+	tls->stat_conflict = 0;
+
+	chk_dbscan(tls);
+ 
+	g_mutex_lock(chunkd_srv.bigmutex);
+	chunkd_srv.chk_done = time(NULL);
+	g_mutex_unlock(chunkd_srv.bigmutex);
+	if (debugging)
+		applog(LOG_DEBUG, "chk: done ok %d busy %d",
+		       tls->stat_ok, tls->stat_conflict);
+}
+
+static void chk_thread_command(struct chk_tls *tls)
+{
+	ssize_t rrc;
+	unsigned char cmd;
+
+	rrc = read(chunkd_srv.chk_pipe[0], &cmd, 1);
+	if (rrc < 0) {
+		applog(LOG_ERR, "pipe read error: %s", strerror(errno));
+		return;
+	}
+	if (rrc < 1) {
+		if (debugging)
+			applog(LOG_DEBUG, "pipe short read, exiting\n");
+		g_thread_exit(NULL);
+		return;
+	}
+
+	switch (cmd) {
+	case CHK_CMD_EXIT:
+		g_thread_exit(NULL);
+		break;
+	case CHK_CMD_RESCAN:
+		chk_thread_scan(tls);
+		break;
+	default:
+		applog(LOG_ERR, "bad scan command 0x%x\n", cmd);
+	}
+}
+
+static gpointer chk_thread_func(gpointer data)
+{
+	struct chk_tls _tls;
+	struct chk_tls *tls;
+	struct pollfd pfd[1];
+	int tmo;
+	time_t dt;
+	int i;
+	int rc;
+
+	tls = &_tls;
+	memset(tls, 0, sizeof(struct chk_tls));
+	tls->arg = data;
+
+	for (;;) {
+		g_mutex_lock(chunkd_srv.bigmutex);
+		chunkd_srv.chk_state = CHK_ST_IDLE;
+		g_mutex_unlock(chunkd_srv.bigmutex);
+
+		/*
+		 * Without the randomization, all systems in a low loaded
+		 * datacenter are virtually guaranteed to start checks in the
+		 * same time, blow fuses and/or disrupt applications.
+		 */
+		dt = tls->arg->period;
+		if (dt == 0) {
+			tmo = -1;
+		} else {
+			if (dt >= 3)
+				dt += rand() % (dt/3);
+			if (debugging)
+				applog(LOG_DEBUG, "chk: sleeping %lu s", dt);
+			tmo = dt * 1000;
+		}
+
+		memset(pfd, 0, sizeof(pfd));
+		pfd[0].fd = chunkd_srv.chk_pipe[0];
+		pfd[0].events = POLLIN;
+
+		rc = poll(pfd, 1, tmo);
+		if (rc < 0) {
+			applog(LOG_WARNING, "chk: poll error: %s",
+			       strerror(errno));
+			break;	/* don't flood, just die */
+		}
+
+		if (rc) {
+			for (i = 0; i < ARRAY_SIZE(pfd); i++) {
+				if (pfd[i].revents) {
+					if (i == 0) {
+						chk_thread_command(tls);
+					}
+				}
+			}
+		} else {
+			chk_thread_scan(tls);
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Mind that we cannot have two threads scanning the master db,
+ * as long as Tokyo Cabinet embeds one and only one iterator into
+ * an instance of open database with tchdbiterinit().
+ */
+static struct chk_arg *thread;
+
+int chk_spawn(time_t period, TCHDB *hdb)
+{
+	GThread *gthread;
+	struct chk_arg *arg;
+	GError *error;
+
+	arg = malloc(sizeof(struct chk_arg));
+	if (!arg) {
+		applog(LOG_ERR, "No core");
+		return -1;
+	}
+	arg->period = period;
+	arg->hdb = hdb;
+
+	gthread = g_thread_create(chk_thread_func, arg, FALSE, &error);
+	if (!gthread) {
+		applog(LOG_ERR, "Failed to start replication thread: %s",
+		       error->message);
+		return -1;
+	}
+
+	// arg->gthread = gthread;
+	thread = arg;
+
+	return 0;
+}
+
+void chk_init()
+{
+	int rc;
+
+	if (!chunkd_srv.chk_period) {
+		if (debugging)
+			applog(LOG_DEBUG, "chk: disabled by configuration");
+		return;
+	}
+
+	chunkd_srv.chk_state = CHK_ST_INIT;
+	rc = chk_spawn(chunkd_srv.chk_period, chunkd_srv.tbl_master);
+	if (rc)
+		exit(1);		/* message already logged */
+}
diff --git a/server/server.c b/server/server.c
index ae0b966..c81558f 100644
--- a/server/server.c
+++ b/server/server.c
@@ -142,6 +142,10 @@ static struct {
 	[che_InvalidTable] =
 	{ "che_InvalidTable", 400,
 	  "Invalid table requested, or table not open" },
+
+	[che_Busy] =
+	{ "che_Busy", 500,
+	  "Temporarily unable to process the command" },
 };
 
 void applog(int prio, const char *fmt, ...)
@@ -753,6 +757,56 @@ static bool cli_resp_xml(struct client *cli, GList *content)
 	return rcb;
 }
 
+static bool cli_resp_bin(struct client *cli, void *data, size_t content_len)
+{
+	int rc;
+	bool rcb;
+	struct chunksrv_resp *resp = NULL;
+	void *bin;
+
+	resp = malloc(sizeof(*resp));
+	if (!resp) {
+		cli->state = evt_dispose;
+		return true;
+	}
+
+	bin = malloc(content_len);
+	if (!bin) {
+		free(resp);
+		cli->state = evt_dispose;
+		return true;
+	}
+	memcpy(bin, data, content_len);
+
+	resp_init_req(resp, &cli->creq);
+
+	resp->data_len = cpu_to_le64(content_len);
+
+	cli->state = evt_recycle;
+
+	rc = cli_writeq(cli, resp, sizeof(*resp), cli_cb_free, resp);
+	if (rc) {
+		free(resp);
+		free(bin);
+		cli->state = evt_dispose;
+		return true;
+	}
+
+	rc = cli_writeq(cli, bin, content_len, cli_cb_free, bin);
+	if (rc) {
+		free(bin);
+		cli->state = evt_dispose;
+		return true;
+	}
+
+	rcb = cli_write_start(cli);
+
+	if (cli->state == evt_recycle)
+		return true;
+
+	return rcb;
+}
+
 static bool volume_list(struct client *cli)
 {
 	char *s;
@@ -880,6 +934,57 @@ err_out:
 	return cli_err(cli, err, false);
 }
 
+static bool chk_poke(struct client *cli)
+{
+	unsigned char cmd;
+	int rc;
+
+	g_mutex_lock(chunkd_srv.bigmutex);
+	switch (chunkd_srv.chk_state) {
+	case CHK_ST_OFF:
+		chunkd_srv.chk_state = CHK_ST_INIT;
+		g_mutex_unlock(chunkd_srv.bigmutex);
+		rc = chk_spawn(chunkd_srv.chk_period, chunkd_srv.tbl_master);
+		if (rc)
+			return cli_err(cli, che_InternalError, true);
+		break;
+	case CHK_ST_INIT:
+	case CHK_ST_RUNNING:
+		g_mutex_unlock(chunkd_srv.bigmutex);
+		return cli_err(cli, che_Busy, true);
+	default:
+		chunkd_srv.chk_state = CHK_ST_RUNNING;
+		g_mutex_unlock(chunkd_srv.bigmutex);
+	}
+
+	cmd = CHK_CMD_RESCAN;
+	write(chunkd_srv.chk_pipe[1], &cmd, 1);
+	return cli_err(cli, che_Success, true);
+}
+
+static bool chk_status(struct client *cli)
+{
+	struct chunk_check_status outbuf;
+
+	memset(&outbuf, 0, sizeof(struct chunk_check_status));
+	g_mutex_lock(chunkd_srv.bigmutex);
+	outbuf.lastdone = cpu_to_le64(chunkd_srv.chk_done);
+	switch (chunkd_srv.chk_state) {
+	case CHK_ST_IDLE:
+		outbuf.state = chk_Idle;
+		break;
+	case CHK_ST_INIT:
+	case CHK_ST_RUNNING:
+		outbuf.state = chk_Active;
+		break;
+	default:
+		outbuf.state = chk_Off;
+	}
+	g_mutex_unlock(chunkd_srv.bigmutex);
+
+	return cli_resp_bin(cli, &outbuf, sizeof(struct chunk_check_status));
+}
+
 static bool valid_req_hdr(const struct chunksrv_req *req)
 {
 	size_t len;
@@ -905,6 +1010,8 @@ static const char *op2str(enum chunksrv_ops op)
 	case CHO_LIST:		return "CHO_LIST";
 	case CHO_LOGIN:		return "CHO_LOGIN";
 	case CHO_TABLE_OPEN:	return "CHO_TABLE_OPEN";
+	case CHO_CHECK_START:	return "CHO_CHECK_START";
+	case CHO_CHECK_STATUS:	return "CHO_CHECK_STATUS";
 
 	default:
 		return "BUG/UNKNOWN!";
@@ -974,7 +1081,7 @@ static bool cli_evt_exec_req(struct client *cli, unsigned int events)
 	}
 
 	/*
-	 * operations on objects
+	 * operations
 	 */
 	switch (req->op) {
 	case CHO_LOGIN:
@@ -1003,6 +1110,12 @@ static bool cli_evt_exec_req(struct client *cli, unsigned int events)
 	case CHO_TABLE_OPEN:
 		rcb = volume_open(cli);
 		break;
+	case CHO_CHECK_START:
+		rcb = chk_poke(cli);
+		break;
+	case CHO_CHECK_STATUS:
+		rcb = chk_status(cli);
+		break;
 	default:
 		rcb = cli_err(cli, che_InvalidURI, true);
 		break;
@@ -1580,6 +1693,7 @@ int main (int argc, char *argv[])
 	error_t aprc;
 	int rc = 1;
 	GList *tmpl;
+	unsigned char cmd;
 
 	/* isspace() and strcasecmp() consistency requires this */
 	setlocale(LC_ALL, "C");
@@ -1609,6 +1723,7 @@ int main (int argc, char *argv[])
 	cldu_hail_log.verbose = debugging;
 
 	g_thread_init(NULL);
+	chunkd_srv.bigmutex = g_mutex_new();
 	SSL_library_init();
 
 	/* init SSL */
@@ -1666,9 +1781,19 @@ int main (int argc, char *argv[])
 		goto err_out_session;
 	}
 
+	if (objcache_init(&chunkd_srv.actives) != 0) {
+		rc = 1;
+		goto err_out_objcache;
+	}
+
 	INIT_LIST_HEAD(&chunkd_srv.wr_trash);
 	chunkd_srv.trash_sz = 0;
 
+	if (pipe(chunkd_srv.chk_pipe) < 0) {
+		rc = 1;
+		goto err_out_pipe;
+	}
+
 	if (fs_open()) {
 		rc = 1;
 		goto err_out_fs;
@@ -1680,6 +1805,8 @@ int main (int argc, char *argv[])
 		goto err_out_cld;
 	}
 
+	chk_init();
+
 	/* set up server networking */
 	for (tmpl = chunkd_srv.listeners; tmpl; tmpl = tmpl->next) {
 		rc = net_open(tmpl->data);
@@ -1699,6 +1826,12 @@ err_out_listen:
 err_out_cld:
 	fs_close();
 err_out_fs:
+	cmd = CHK_CMD_EXIT;
+	write(chunkd_srv.chk_pipe[1], &cmd, 1);
+	close(chunkd_srv.chk_pipe[1]);
+err_out_pipe:
+	objcache_fini(&chunkd_srv.actives);
+err_out_objcache:
 	if (strict_free)
 		g_hash_table_destroy(chunkd_srv.fd_info);
 err_out_session:
diff --git a/test/.gitignore b/test/.gitignore
index ccb6bdb..dd7d312 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -4,6 +4,8 @@ it-works
 large-object
 lotsa-objects
 nop
+objcache-unit
+selfcheck-unit
 
 .libs
 libtest.a
diff --git a/test/Makefile.am b/test/Makefile.am
index d9c10b5..e1f7ac4 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -15,6 +15,7 @@ EXTRA_DIST =			\
 	ssl-key.pem ssl-cert.pem
 
 TESTS =				\
+	objcache-unit		\
 	prep-db			\
 	start-daemon		\
 	pid-exists		\
@@ -25,11 +26,12 @@ TESTS =				\
 	auth			\
 	large-object		\
 	lotsa-objects		\
+	selfcheck-unit		\
 	stop-daemon		\
 	clean-db
 
 check_PROGRAMS		= auth basic-object it-works large-object \
-			  lotsa-objects nop
+			  lotsa-objects nop objcache-unit selfcheck-unit
 
 TESTLDADD		= ../lib/libchunkdc.la	\
 			  libtest.a		\
@@ -41,6 +43,9 @@ it_works_LDADD		= $(TESTLDADD)
 large_object_LDADD	= $(TESTLDADD)
 lotsa_objects_LDADD	= $(TESTLDADD)
 nop_LDADD		= $(TESTLDADD)
+selfcheck_unit_LDADD	= $(TESTLDADD)
+
+objcache_unit_LDADD	= @GLIB_LIBS@
 
 noinst_LIBRARIES	= libtest.a
 
diff --git a/test/objcache-unit.c b/test/objcache-unit.c
new file mode 100644
index 0000000..874b57d
--- /dev/null
+++ b/test/objcache-unit.c
@@ -0,0 +1,64 @@
+
+/*
+ * Copyright 2009 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "../server/objcache.c"
+#include "test.h"
+
+int main(int argc, char *argv[])
+{
+	static char k1[] = { 'a' };
+	static char k2[] = { 'a', 'a' };
+	static char k3[] = { 'a', '\0', 'a' };
+	struct objcache cache;
+	struct objcache_entry *ep1, *ep2, *ep3;
+	int rc;
+
+	g_thread_init(NULL);
+	rc = objcache_init(&cache);
+	OK(rc==0);
+
+	ep1 = objcache_get(&cache, k1, sizeof(k1));
+	OK(ep1 != NULL);
+
+	ep2 = objcache_get(&cache, k2, sizeof(k2));
+	OK(ep2 != NULL);
+
+	ep3 = objcache_get(&cache, k3, sizeof(k3));
+	OK(ep3 != NULL);
+
+	rc = objcache_count(&cache);
+	OK(rc == 3);
+
+	OK(ep1->ref == 1);	/* no collisions, else improve hash */
+
+	objcache_put(&cache, ep1);
+	objcache_put(&cache, ep2);
+	objcache_put(&cache, ep3);
+
+	ep2 = objcache_get(&cache, k2, sizeof(k2));
+	OK(ep2 != NULL);
+	OK(ep2->ref == 1);	/* new */
+	objcache_put(&cache, ep2);
+
+	rc = objcache_count(&cache);
+	OK(rc == 0);
+
+	objcache_fini(&cache);
+	return 0;
+}
diff --git a/test/selfcheck-unit.c b/test/selfcheck-unit.c
new file mode 100644
index 0000000..fbd9a09
--- /dev/null
+++ b/test/selfcheck-unit.c
@@ -0,0 +1,334 @@
+
+/*
+ * Copyright 2009 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define _GNU_SOURCE
+#include "chunkd-config.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <glib.h>
+#include <chunk-private.h>
+#include <chunkc.h>
+#include "test.h"
+
+enum { BUFLEN = 8192 };
+
+struct config_context {
+	char *text;
+
+	char *path;
+	long period;
+};
+
+static void cfg_elm_start(GMarkupParseContext *context,
+			  const gchar	 *element_name,
+			  const gchar    **attribute_names,
+			  const gchar    **attribute_values,
+			  gpointer       user_data,
+			  GError         **error)
+{
+	;
+}
+
+static void cfg_elm_end(GMarkupParseContext *context,
+			const gchar	*element_name,
+			gpointer	user_data,
+			GError		**error)
+{
+	struct config_context *cc = user_data;
+	long n;
+
+	if (!strcmp(element_name, "Path")) {
+		OK(cc->text);
+		free(cc->path);
+		cc->path = cc->text;
+		cc->text = NULL;
+	} else if (!strcmp(element_name, "SelfCheckPeriod")) {
+		OK(cc->text);
+		n = strtol(cc->text, NULL, 10);
+		OK(n >= 0 && n < LONG_MAX);
+		cc->period = n;
+		free(cc->text);
+		cc->text = NULL;
+	} else {
+		free(cc->text);
+		cc->text = NULL;
+	}
+}
+
+static bool str_n_isspace(const char *s, size_t n)
+{
+	char c;
+	size_t i;
+
+	for (i = 0; i < n; i++) {
+		c = *s++;
+		if (!isspace(c))
+			return false;
+	}
+	return true;
+}
+
+static void cfg_elm_text(GMarkupParseContext *context,
+			 const gchar	*text,
+			 gsize		text_len,
+			 gpointer	user_data,
+			 GError		**error)
+{
+	struct config_context *cc = user_data;
+
+	free(cc->text);
+	if (str_n_isspace(text, text_len))
+		cc->text = NULL;
+	else
+		cc->text = g_strndup(text, text_len);
+}
+
+static const GMarkupParser cfg_parse_ops = {
+	.start_element		= cfg_elm_start,
+	.end_element		= cfg_elm_end,
+	.text			= cfg_elm_text,
+};
+
+static void read_config(struct config_context *cc)
+{
+	GMarkupParseContext* parser;
+	char *top, *cfg;
+	char *text;
+	gsize len;
+	int rc;
+
+	top = getenv("top_srcdir");
+	OK(top);
+
+	rc = asprintf(&cfg, "%s/test/" TEST_CHUNKD_CFG, top);
+	OK(rc > 0);
+
+	rc = g_file_get_contents(cfg, &text, &len, NULL);
+	OK(rc);
+
+	parser = g_markup_parse_context_new(&cfg_parse_ops, 0, cc, NULL);
+	OK(parser);
+
+	rc = g_markup_parse_context_parse(parser, text, len, NULL);
+	OK(rc);
+
+	g_markup_parse_context_free(parser);
+	free(text);
+	free(cfg);
+}
+
+static void hexstr(const unsigned char *buf, size_t buf_len, char *outstr)
+{
+	static const char hex[] = "0123456789abcdef";
+	int i;
+
+	for (i = 0; i < buf_len; i++) {
+		outstr[i * 2]       = hex[(buf[i] & 0xF0) >> 4];
+		outstr[(i * 2) + 1] = hex[(buf[i] & 0x0F)     ];
+	}
+
+	outstr[buf_len * 2] = 0;
+}
+
+static char *fs_obj_pathname(const char *path, uint32_t table_id,
+			     const void *key, size_t key_len)
+{
+	char *s = NULL;
+	char prefix[PREFIX_LEN + 1];
+	unsigned char md[SHA256_DIGEST_LENGTH];
+	char mdstr[(SHA256_DIGEST_LENGTH * 2) + 1];
+	int rc;
+
+	if (!table_id || !key || !key_len)
+		return NULL;
+
+	SHA256(key, key_len, md);
+	hexstr(md, SHA256_DIGEST_LENGTH, mdstr);
+
+	memcpy(prefix, mdstr, PREFIX_LEN);
+	prefix[PREFIX_LEN] = 0;
+
+	rc = asprintf(&s, MDB_TPATH_FMT "/%s/%s", path, table_id,
+		      prefix, mdstr + PREFIX_LEN);
+	OK(rc != -1);
+
+	return s;
+}
+
+static bool be_file_verify(const char *fn)
+{
+	int fd;
+
+	/* stat(2) is nice and all, but whatever. */
+	fd = open(fn, O_RDONLY);
+	if (fd < 0)
+		return false;
+	close(fd);
+	return true;
+}
+
+static bool be_file_damage(const char *fn)
+{
+	int fd;
+	char buf[1];
+	ssize_t rcs;
+	off_t rco;
+
+	fd = open(fn, O_WRONLY);
+	if (fd < 0)
+		return false;
+
+	/*
+	 * This puts the damage at data size minus the mysterious header size.
+	 */
+	rco = lseek(fd, BUFLEN, SEEK_SET);
+	if (rco == (off_t)-1) {
+		close(fd);
+		return false;
+	}
+
+	buf[0] = 0;
+	rcs = write(fd, buf, 1);
+	if (rcs <= 0) {
+		close(fd);
+		return false;
+	}
+
+	close(fd);
+	return true;
+}
+
+int main(int argc, char *argv[])
+{
+	static char key[] = "selfcheck-test-key";
+	int port;
+	char *buf;
+	struct config_context ctx;
+	struct st_client *stc;
+	char *fn;
+	size_t len;
+	void *mem;
+	struct chunk_check_status status1, status2;
+	int cnt;
+	bool rcb;
+
+	setlocale(LC_ALL, "C");
+
+	stc_init();
+	SSL_library_init();
+
+	buf = malloc(BUFLEN);
+	OK(buf);
+	memset(buf, 0x55, BUFLEN);
+
+	port = stc_readport(TEST_PORTFILE);
+	OK(port > 0);
+
+	/*
+	 * Step 0: read and parse the configuration.
+	 */
+	memset(&ctx, 0, sizeof(struct config_context));
+	read_config(&ctx);
+	OK(ctx.path);		/* must have a path */
+	OK(!ctx.period);	/* should have a SelfCheckPeriod set to off */
+
+	/*
+	 * Step 1: create the object
+	 */
+	stc = stc_new(TEST_HOST, port, TEST_USER, TEST_USER_KEY, false);
+	OK(stc);
+	rcb = stc_table_openz(stc, TEST_TABLE, 0);
+	OK(rcb);
+	rcb = stc_put_inline(stc, key, sizeof(key), buf, BUFLEN, 0);
+	OK(rcb);
+	stc_free(stc);
+
+	/*
+	 * Step 2: verify the back-end file is created
+	 * N.B. We guess the tabled ID to be 1, sice all tests use the same
+	 *      table and they are numbered sequentially on a fresh DB.
+	 */
+	fn = fs_obj_pathname(ctx.path, 1, key, sizeof(key));
+	OK(fn);
+	rcb = be_file_verify(fn);
+	OK(rcb);
+
+	/*
+	 * Step 3: damage the back-end file and wait
+	 * We sleep for longer than the self-check period because:
+	 * 1) the server sleeps for up to period*1.5
+	 * 2) the server may be quite busy walking numerous objects
+	 * 3) the build system may be overloaded
+	 */
+	rcb = be_file_damage(fn);
+	OK(rcb);
+
+	/*
+	 * Step 4: force self-check and make sure it runs
+	 */
+	stc = stc_new(TEST_HOST, port, TEST_USER, TEST_USER_KEY, false);
+	OK(stc);
+	rcb = stc_check_status(stc, &status1);
+	OK(rcb);
+	rcb = stc_check_poke(stc);
+	OK(rcb);
+	cnt = 0;
+	for (;;) {
+		sleep(2);
+		rcb = stc_check_status(stc, &status2);
+		OK(rcb);
+		if (status2.lastdone != status1.lastdone &&
+		    status2.state != chk_Active)
+			break;
+		++cnt;
+		OK(cnt < 15);
+	}
+	stc_free(stc);
+
+	/*
+	 * Step 5: verify that the damaged object is removed
+	 * This is, strictly speaking, not necessary. The true test is
+	 * trying to access the keyed object through the chunkserver's API.
+	 * But since we have the function already, might as well use it.
+	 */
+	rcb = be_file_verify(fn);
+	OK(!rcb);
+
+	free(fn);
+	fn = NULL;
+
+	/*
+	 * Step 6: verify that we didn't crash the chunkserver and that
+	 * the object we created and damaged is not considered present anymore.
+	 */
+	stc = stc_new(TEST_HOST, port, TEST_USER, TEST_USER_KEY, false);
+	OK(stc);
+	rcb = stc_table_openz(stc, TEST_TABLE, 0);
+	OK(rcb);
+	mem = stc_get_inline(stc, key, sizeof(key), &len);
+	OK(!mem);
+	stc_free(stc);
+
+	return 0;
+}
diff --git a/test/test.h b/test/test.h
index c3c48b7..75aa250 100644
--- a/test/test.h
+++ b/test/test.h
@@ -38,6 +38,8 @@
 #define TEST_PORTFILE		"chunkd.port"
 #define TEST_PORTFILE_SSL	"chunkd-ssl.port"
 
+#define TEST_CHUNKD_CFG		"server-test.cfg"
+
 #define OK(expr)				\
 	do {					\
 		if (!(expr)) {			\
diff --git a/tools/chcli.c b/tools/chcli.c
index 2c9ac23..1c3576d 100644
--- a/tools/chcli.c
+++ b/tools/chcli.c
@@ -89,6 +89,8 @@ enum chcli_cmd {
 	CHC_PUT,
 	CHC_DEL,
 	CHC_PING,
+	CHC_CHECKSTATUS,
+	CHC_CHECKSTART,
 };
 
 struct chcli_host {
@@ -178,10 +180,12 @@ static void show_cmds(void)
 	fprintf(stderr,
 "Supported chcli commands:\n"
 "\n"
-"GET key		Retrieve key, send to output (def: stdout)\n"
-"PUT key val	Store key\n"
-"DEL key	Delete key\n"
-"PING		Ping server\n"
+"GET key       Retrieve key, send to output (def: stdout)\n"
+"PUT key val   Store key\n"
+"DEL key       Delete key\n"
+"PING          Ping server\n"
+"CHECKSTATUS   Fetch status of server self-check\n"
+"CHECKPOKE     Force server self-check\n"
 "\n"
 "Keys provided on the command line (as opposed to via -k) are stored\n"
 "with a C-style nul terminating character appended, adding 1 byte to\n"
@@ -316,6 +320,10 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state)
 			cmd_mode = CHC_DEL;
 		else if (!strcasecmp(arg, "ping"))
 			cmd_mode = CHC_PING;
+		else if (!strcasecmp(arg, "checkstatus"))
+			cmd_mode = CHC_CHECKSTATUS;
+		else if (!strcasecmp(arg, "checkstart"))
+			cmd_mode = CHC_CHECKSTART;
 		else
 			argp_usage(state);	/* invalid cmd */
 		break;
@@ -346,13 +354,14 @@ static struct st_client *chcli_stc_new(void)
 
 	stc->verbose = chcli_verbose;
 
-	if (!stc_table_open(stc, table_name, table_name_len,
-			    table_create ? CHF_TBL_CREAT : 0)) {
-		fprintf(stderr, "%s:%u: failed to open table\n",
-			host->name,
-			host->port);
-		stc_free(stc);
-		return NULL;
+	if (table_name_len) {
+		if (!stc_table_open(stc, table_name, table_name_len,
+				    table_create ? CHF_TBL_CREAT : 0)) {
+			fprintf(stderr, "%s:%u: failed to open table\n",
+				host->name, host->port);
+			stc_free(stc);
+			return NULL;
+		}
 	}
 
 	return stc;
@@ -368,6 +377,7 @@ static int cmd_ping(void)
 
 	if (!stc_ping(stc)) {
 		fprintf(stderr, "PING failed\n");
+		stc_free(stc);
 		return 1;
 	}
 
@@ -406,6 +416,7 @@ static int cmd_del(void)
 
 	if (!stc_del(stc, key_data, key_data_len)) {
 		fprintf(stderr, "DEL failed\n");
+		stc_free(stc);
 		return 1;
 	}
 
@@ -503,6 +514,7 @@ static int cmd_put(void)
 
 	if (!rcb) {
 		fprintf(stderr, "PUT failed\n");
+		stc_free(stc);
 		return 1;
 	}
 
@@ -578,6 +590,7 @@ static int cmd_get(void)
 
 	if (!stc_get_start(stc, key_data, key_data_len, &rfd, &get_len)) {
 		fprintf(stderr, "GET initiation failed\n");
+		stc_free(stc);
 		return 1;
 	}
 
@@ -589,6 +602,7 @@ static int cmd_get(void)
 			fprintf(stderr, "GET output file %s open failed: %s\n",
 				output_fn,
 				strerror(errno));
+			stc_free(stc);
 			return 1;
 		}
 	}
@@ -601,6 +615,7 @@ static int cmd_get(void)
 
 		if (!recv_buf(stc, rfd, get_buf, need_len)) {
 			fprintf(stderr, "GET buffer failed\n");
+			stc_free(stc);
 			return 1;
 		}
 
@@ -609,6 +624,7 @@ static int cmd_get(void)
 			fprintf(stderr, "GET write to output failed: %s\n",
 				strerror(errno));
 			unlink(output_fn);
+			stc_free(stc);
 			return 1;
 		}
 
@@ -623,6 +639,67 @@ static int cmd_get(void)
 	return 0;
 }
 
+static int cmd_check_poke(void)
+{
+	struct st_client *stc;
+
+	stc = chcli_stc_new();
+	if (!stc)
+		return 1;
+
+	if (!stc_check_poke(stc)) {
+		fprintf(stderr, "CHECK POKE failed\n");
+		stc_free(stc);
+		return 1;
+	}
+
+	stc_free(stc);
+	return 0;
+}
+
+static int cmd_check_status(void)
+{
+	struct st_client *stc;
+	struct chunk_check_status status;
+	char *state;
+	char chartime[26];
+	char *s;
+	time_t last_done;
+
+	stc = chcli_stc_new();
+	if (!stc)
+		return 1;
+
+	if (!stc_check_status(stc, &status)) {
+		fprintf(stderr, "CHECK STATUS fetch failed\n");
+		stc_free(stc);
+		return 1;
+	}
+
+	switch (status.state) {
+	case chk_Off:
+		state = "Off";
+		break;
+	case chk_Idle:
+		state = "Idle";
+		break;
+	case chk_Active:
+		state = "Idle";
+		break;
+	default:
+		state = "UNKNOWN";
+	}
+	printf("state: %s\n", state);
+
+	last_done = GUINT64_FROM_LE(status.lastdone);
+	ctime_r(&last_done, chartime);
+	if ((s = strchr(chartime, '\n'))) *s = 0;
+	printf("last: %lld (%s)\n", (long long) last_done, chartime);
+
+	stc_free(stc);
+	return 0;
+}
+
 int main (int argc, char *argv[])
 {
 	error_t aprc;
@@ -639,9 +716,16 @@ int main (int argc, char *argv[])
 		fprintf(stderr, "no host specified\n");
 		return 1;
 	}
-	if (!table_name || !table_name_len) {
-		fprintf(stderr, "no table name specified\n");
-		return 1;
+	switch (cmd_mode) {
+	case CHC_PING:
+	case CHC_CHECKSTATUS:
+	case CHC_CHECKSTART:
+		break;
+	default:
+		if (!table_name || !table_name_len) {
+			fprintf(stderr, "no table name specified\n");
+			return 1;
+		}
 	}
 	if (strlen(username) == 0) {
 		fprintf(stderr, "no username specified\n");
@@ -671,6 +755,10 @@ int main (int argc, char *argv[])
 		return cmd_del();
 	case CHC_PING:
 		return cmd_ping();
+	case CHC_CHECKSTATUS:
+		return cmd_check_status();
+	case CHC_CHECKSTART:
+		return cmd_check_poke();
 	}
 
 	return 0;
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Fedora Clound]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux