[PATCH 3/3] chunkd: on-disk format stores per-64k checksums

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



commit e6fcc02bea062af291148771a59ee2028ae98834
Author: Jeff Garzik <jeff@xxxxxxxxxx>
Date:   Thu Jul 15 13:57:17 2010 -0400

    chunkd: Add checksum table to on-disk format, one sum per 64k of data
    
    Signed-off-by: Jeff Garzik <jgarzik@xxxxxxxxxx>

 chunkd/be-fs.c |  145 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 127 insertions(+), 18 deletions(-)

diff --git a/chunkd/be-fs.c b/chunkd/be-fs.c
index 671c8fd..1bd85ea 100644
--- a/chunkd/be-fs.c
+++ b/chunkd/be-fs.c
@@ -40,6 +40,11 @@
 
 #define BE_FS_OBJ_MAGIC		"CHU1"
 
+enum {
+	CHUNK_BLK_ORDER		= 16,			/* 64k blocks */
+	CHUNK_BLK_SZ		= 1 << CHUNK_BLK_ORDER,
+};
+
 struct fs_obj {
 	struct backend_obj	bo;
 
@@ -49,14 +54,23 @@ struct fs_obj {
 
 	int			in_fd;
 	char			*in_fn;
+
+	size_t			checked_bytes;
+	SHA_CTX			checksum;
+	unsigned int		csum_idx;
+	void			*csum_tbl;
+	size_t			csum_tbl_sz;
+
+	unsigned int		n_blk;
 };
 
 struct be_fs_obj_hdr {
 	char			magic[4];
 	uint32_t		key_len;
 	uint64_t		value_len;
+	uint32_t		n_blk;
 
-	char			reserved[16];
+	char			reserved[12];
 
 	unsigned char		hash[CHD_CSUM_SZ];
 	char			owner[128];
@@ -204,6 +218,8 @@ static struct fs_obj *fs_obj_alloc(void)
 	obj->out_fd = -1;
 	obj->in_fd = -1;
 
+	SHA1_Init(&obj->checksum);
+
 	return obj;
 }
 
@@ -314,6 +330,17 @@ static bool key_valid(const void *key, size_t key_len)
 	return true;
 }
 
+static unsigned int fs_blk_count(uint64_t data_len)
+{
+	uint64_t n_blk;
+
+	n_blk = data_len >> CHUNK_BLK_ORDER;
+	if (data_len & (CHUNK_BLK_SZ - 1))
+		n_blk++;
+
+	return (unsigned int) n_blk;
+}
+
 struct backend_obj *fs_obj_new(uint32_t table_id,
 			       const void *key, size_t key_len,
 			       uint64_t data_len,
@@ -321,6 +348,7 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
 {
 	struct fs_obj *obj;
 	char *fn = NULL;
+	size_t csum_bytes;
 	enum chunk_errcode erc = che_InternalError;
 	off_t skip_len;
 
@@ -335,6 +363,13 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
 		return NULL;
 	}
 
+	obj->n_blk = fs_blk_count(data_len);
+	csum_bytes = obj->n_blk * CHD_CSUM_SZ;
+	obj->csum_tbl = malloc(csum_bytes);
+	if (!obj->csum_tbl)
+		goto err_out;
+	obj->csum_tbl_sz = csum_bytes;
+
 	/* build local fs pathname */
 	fn = fs_obj_pathname(table_id, key, key_len);
 	if (!fn)
@@ -355,7 +390,7 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
 	obj->out_fn = fn;
 
 	/* calculate size of front-of-file metadata area */
-	skip_len = sizeof(struct be_fs_obj_hdr) + key_len;
+	skip_len = sizeof(struct be_fs_obj_hdr) + key_len + csum_bytes;
 
 	/* position file pointer where object data (as in, not metadata)
 	 * will begin
@@ -391,8 +426,11 @@ struct backend_obj *fs_obj_open(uint32_t table_id, const char *user,
 	struct stat st;
 	struct be_fs_obj_hdr hdr;
 	ssize_t rrc;
-	uint64_t value_len;
+	uint64_t value_len, tmp64;
+	size_t csum_bytes;
 	enum chunk_errcode erc = che_InternalError;
+	struct iovec iov[2];
+	size_t total_rd_len;
 
 	if (!key_valid(key, key_len)) {
 		*err_code = che_InvalidKey;
@@ -447,25 +485,49 @@ struct backend_obj *fs_obj_open(uint32_t table_id, const char *user,
 	}
 
 	/* verify object key length matches input key length */
-	if (GUINT32_FROM_LE(hdr.key_len) != key_len)
+	if (G_UNLIKELY(GUINT32_FROM_LE(hdr.key_len) != key_len))
 		goto err_out;
 
-	/* verify file size large enough to contain value */
 	value_len = GUINT64_FROM_LE(hdr.value_len);
-	if ((st.st_size - sizeof(hdr) - key_len) < value_len) {
+	obj->n_blk = GUINT32_FROM_LE(hdr.n_blk);
+	csum_bytes = obj->n_blk * CHD_CSUM_SZ;
+
+	/* verify file size large enough to contain value */
+	tmp64 = value_len + sizeof(hdr) + key_len + csum_bytes;
+	if (G_UNLIKELY(st.st_size < tmp64)) {
 		applog(LOG_ERR, "obj(%s) unexpected size change", obj->in_fn);
 		goto err_out;
 	}
 
+	/* verify expected size of checksum table */
+	if (G_UNLIKELY(fs_blk_count(value_len) != obj->n_blk)) {
+		applog(LOG_ERR, "obj(%s) unexpected blk count "
+		       "(%u from val sz, %u from hdr)",
+		       obj->in_fn, fs_blk_count(value_len), obj->n_blk);
+		goto err_out;
+	}
+
+	obj->csum_tbl = malloc(csum_bytes);
+	if (!obj->csum_tbl)
+		goto err_out;
+	obj->csum_tbl_sz = csum_bytes;
+
 	obj->bo.key = malloc(key_len);
 	obj->bo.key_len = key_len;
 	if (!obj->bo.key)
 		goto err_out;
 
-	/* read object variable-length header */
-	rrc = read(obj->in_fd, obj->bo.key, key_len);
-	if ((rrc != key_len) || (memcmp(key, obj->bo.key, key_len))) {
-		applog(LOG_ERR, "read hdr key obj(%s) failed: %s",
+	/* init additional header segment list */
+	iov[0].iov_base = obj->bo.key;
+	iov[0].iov_len = key_len;
+	iov[1].iov_base = obj->csum_tbl;
+	iov[1].iov_len = csum_bytes;
+	total_rd_len = iov[0].iov_len + iov[1].iov_len;
+
+	/* read additional header segments (key, checksum table) */
+	rrc = readv(obj->in_fd, iov, ARRAY_SIZE(iov));
+	if ((rrc != total_rd_len) || (memcmp(key, obj->bo.key, key_len))) {
+		applog(LOG_ERR, "read addnl hdrs(%s) failed: %s",
 			obj->in_fn,
 			(rrc < 0) ? strerror(errno) : "<unknown reasons>");
 		goto err_out;
@@ -508,6 +570,7 @@ void fs_obj_free(struct backend_obj *bo)
 	if (obj->in_fd >= 0)
 		close(obj->in_fd);
 
+	free(obj->csum_tbl);
 	free(obj);
 }
 
@@ -524,17 +587,48 @@ ssize_t fs_obj_read(struct backend_obj *bo, void *ptr, size_t len)
 	return rc;
 }
 
+static void obj_flush_csum(struct backend_obj *bo)
+{
+	struct fs_obj *obj = bo->private;
+	unsigned char md[CHD_CSUM_SZ];
+
+	SHA1_Final(md, &obj->checksum);
+
+	memcpy(obj->csum_tbl + ((obj->csum_idx++) * CHD_CSUM_SZ),
+	       md, CHD_CSUM_SZ);
+
+	obj->checked_bytes = 0;
+	SHA1_Init(&obj->checksum);
+}
+
 ssize_t fs_obj_write(struct backend_obj *bo, const void *ptr, size_t len)
 {
 	struct fs_obj *obj = bo->private;
-	ssize_t rc;
+	ssize_t rc = 0;
+
+	while (len > 0) {
+		size_t unchecked;
+
+		unchecked = CHUNK_BLK_SZ - obj->checked_bytes;
+
+		rc = write(obj->out_fd, ptr, MIN(unchecked, len));
+		if (rc < 0) {
+			applog(LOG_ERR, "obj write(%s) failed: %s",
+			       obj->out_fn, strerror(errno));
+			break;
+		}
+
+		SHA1_Update(&obj->checksum, ptr, rc);
 
-	rc = write(obj->out_fd, ptr, len);
-	if (rc < 0)
-		applog(LOG_ERR, "obj write(%s) failed: %s",
-		       obj->out_fn, strerror(errno));
-	else
 		obj->written_bytes += rc;
+		obj->checked_bytes += rc;
+		ptr += rc;
+		len -= rc;
+
+		/* if at end of 64k block, update csum table with new csum */
+		if (obj->checked_bytes == CHUNK_BLK_SZ)
+			obj_flush_csum(bo);
+	}
 
 	return rc;
 }
@@ -546,7 +640,7 @@ bool fs_obj_write_commit(struct backend_obj *bo, const char *user,
 	struct be_fs_obj_hdr hdr;
 	ssize_t wrc;
 	size_t total_wr_len;
-	struct iovec iov[2];
+	struct iovec iov[3];
 
 	memset(&hdr, 0, sizeof(hdr));
 	memcpy(hdr.magic, BE_FS_OBJ_MAGIC, strlen(BE_FS_OBJ_MAGIC));
@@ -554,6 +648,19 @@ bool fs_obj_write_commit(struct backend_obj *bo, const char *user,
 	strncpy(hdr.owner, user, sizeof(hdr.owner));
 	hdr.key_len = GUINT32_TO_LE(bo->key_len);
 	hdr.value_len = GUINT64_TO_LE(obj->written_bytes);
+	hdr.n_blk = GUINT32_TO_LE(obj->n_blk);
+
+	/* update checksum table with final csum, if necessary */
+	if (obj->checked_bytes > 0)
+		obj_flush_csum(bo);
+
+	if (G_UNLIKELY(obj->csum_idx != obj->n_blk)) {
+		applog(LOG_ERR, "BUG(%s): csum_idx/n_blk mismatch: %u/%u",
+		       obj->out_fn, obj->csum_idx, obj->n_blk);
+		return false;
+	}
+
+	obj->csum_idx = 0;
 
 	/* go back to beginning of file */
 	if (lseek(obj->out_fd, 0, SEEK_SET) < 0) {
@@ -567,7 +674,9 @@ bool fs_obj_write_commit(struct backend_obj *bo, const char *user,
 	iov[0].iov_len = sizeof(hdr);
 	iov[1].iov_base = bo->key;
 	iov[1].iov_len = bo->key_len;
-	total_wr_len = iov[0].iov_len + iov[1].iov_len;
+	iov[2].iov_base = obj->csum_tbl;
+	iov[2].iov_len = obj->csum_tbl_sz;
+	total_wr_len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
 
 	/* write object header segments */
 	wrc = writev(obj->out_fd, iov, ARRAY_SIZE(iov));
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Fedora Clound]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux