Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit 1066358aebafb7221732bedd6fb9fde56b14be7b:

  Improve dedupe/compression buffer filling for mixed block sizes (2014-09-26 15:04:58 -0600)

are available in the git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to bc095aab789a2e7d036c6d499f6c124a0c2d5de0:

  t/dedupe: fixup bloom entry calculation (2014-09-27 21:29:03 -0600)

----------------------------------------------------------------
Jens Axboe (12):
      parse: fix issue with not filling leftover percentages correctly
      dedupe: exit gracefully if device/file open fails
      Add murmurhash3
      crc/test: fix alignment
      crc/test: add jhash
      Move murmur3 hash to crc/
      t/dedupe: print dedupe ratio instead of made-up factor
      crc/test: if the checksum needs a _final(), do it in the loop
      Add fnv hash
      crc/test: add fnv
      bloom: up hashes to 5 by default
      t/dedupe: fixup bloom entry calculation

 Makefile      |    4 +--
 crc/fnv.c     |   16 ++++++++++
 crc/fnv.h     |    8 +++++
 crc/murmur3.c |   68 +++++++++++++++++++++++++++++++++++++++++
 crc/murmur3.h |    8 +++++
 crc/test.c    |   93 ++++++++++++++++++++++++++++++++++++++++++++-------------
 lib/bloom.c   |   29 ++++++++++++++----
 options.c     |   11 +++----
 t/dedupe.c    |   15 ++++++----
 9 files changed, 212 insertions(+), 40 deletions(-)
 create mode 100644 crc/fnv.c
 create mode 100644 crc/fnv.h
 create mode 100644 crc/murmur3.c
 create mode 100644 crc/murmur3.h

---

Diff of recent changes:

diff --git a/Makefile b/Makefile
index 8c424e3..8c6c056 100644
--- a/Makefile
+++ b/Makefile
@@ -192,8 +192,8 @@ endif
 ifeq ($(CONFIG_TARGET_OS), Linux)
 T_DEDUPE_OBJS = t/dedupe.o
 T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \
-		memalign.o lib/bloom.o t/debug.o crc/xxhash.o crc/crc32c.o \
-		crc/crc32c-intel.o
+		memalign.o lib/bloom.o t/debug.o crc/xxhash.o crc/murmur3.o \
+		crc/crc32c.o crc/crc32c-intel.o crc/fnv.o
 T_DEDUPE_PROGS = t/dedupe
 endif
 
diff --git a/crc/fnv.c b/crc/fnv.c
new file mode 100644
index 0000000..04c0560
--- /dev/null
+++ b/crc/fnv.c
@@ -0,0 +1,16 @@
+#include "fnv.h"
+
+#define FNV_PRIME	0x100000001b3ULL
+
+uint64_t fnv(const void *buf, uint32_t len, uint64_t hval)
+{
+	const uint64_t *ptr = buf;
+	const uint64_t *end = (void *) buf + len;
+
+	while (ptr < end) {
+		hval *= FNV_PRIME;
+		hval ^= (uint64_t) *ptr++;
+	}
+
+	return hval;
+}
diff --git a/crc/fnv.h b/crc/fnv.h
new file mode 100644
index 0000000..ef2b77b
--- /dev/null
+++ b/crc/fnv.h
@@ -0,0 +1,8 @@
+#ifndef FIO_FNV_H
+#define FIO_FNV_H
+
+#include <inttypes.h>
+
+uint64_t fnv(const void *, uint32_t, uint64_t);
+
+#endif
diff --git a/crc/murmur3.c b/crc/murmur3.c
new file mode 100644
index 0000000..e316f59
--- /dev/null
+++ b/crc/murmur3.c
@@ -0,0 +1,68 @@
+#include "murmur3.h"
+
+static inline uint32_t rotl32(uint32_t x, int8_t r)
+{
+	return (x << r) | (x >> (32 - r));
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+static inline uint32_t fmix32(uint32_t h)
+{
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+
+	return h;
+}
+
+static uint32_t murmur3_tail(const uint8_t *data, const int nblocks,
+			     uint32_t len, const uint32_t c1,
+			     const uint32_t c2, uint32_t h1)
+{
+	const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
+
+	uint32_t k1 = 0;
+	switch (len & 3) {
+	case 3:
+		k1 ^= tail[2] << 16;
+	case 2:
+		k1 ^= tail[1] << 8;
+	case 1:
+		k1 ^= tail[0];
+		k1 *= c1;
+		k1 = rotl32(k1, 15);
+		k1 *= c2;
+		h1 ^= k1;
+	};
+
+	return fmix32(h1 ^ len);
+}
+
+uint32_t murmurhash3(const void *key, uint32_t len, uint32_t seed)
+{
+	const uint8_t *data = (const uint8_t *)key;
+	const int nblocks = len / 4;
+	uint32_t h1 = seed;
+	const uint32_t c1 = 0xcc9e2d51;
+	const uint32_t c2 = 0x1b873593;
+	const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
+	int i;
+
+	for (i = -nblocks; i; i++) {
+		uint32_t k1 = blocks[i];
+
+		k1 *= c1;
+		k1 = rotl32(k1, 15);
+		k1 *= c2;
+
+		h1 ^= k1;
+		h1 = rotl32(h1, 13);
+		h1 = h1 * 5 + 0xe6546b64;
+	}
+
+	return murmur3_tail(data, nblocks, len, c1, c2, h1);
+}
diff --git a/crc/murmur3.h b/crc/murmur3.h
new file mode 100644
index 0000000..89f6500
--- /dev/null
+++ b/crc/murmur3.h
@@ -0,0 +1,8 @@
+#ifndef FIO_MURMUR3_H
+#define FIO_MURMUR3_H
+
+#include <inttypes.h>
+
+uint32_t murmurhash3(const void *key, uint32_t len, uint32_t seed);
+
+#endif
diff --git a/crc/test.c b/crc/test.c
index 36054e6..bca46f2 100644
--- a/crc/test.c
+++ b/crc/test.c
@@ -17,6 +17,9 @@
 #include "../crc/sha256.h"
 #include "../crc/sha512.h"
 #include "../crc/xxhash.h"
+#include "../crc/murmur3.h"
+#include "../crc/fnv.h"
+#include "../hash.h"
 
 #include "test.h"
 
@@ -26,7 +29,8 @@
 struct test_type {
 	const char *name;
 	unsigned int mask;
-	void (*fn)(void *, size_t);
+	void (*fn)(struct test_type *, void *, size_t);
+	uint32_t output;
 };
 
 enum {
@@ -40,9 +44,12 @@ enum {
 	T_SHA256	= 1U << 7,
 	T_SHA512	= 1U << 8,
 	T_XXHASH	= 1U << 9,
+	T_MURMUR3	= 1U << 10,
+	T_JHASH		= 1U << 11,
+	T_FNV		= 1U << 12,
 };
 
-static void t_md5(void *buf, size_t size)
+static void t_md5(struct test_type *t, void *buf, size_t size)
 {
 	uint32_t digest[4];
 	struct fio_md5_ctx ctx = { .hash = digest };
@@ -50,13 +57,13 @@ static void t_md5(void *buf, size_t size)
 
 	fio_md5_init(&ctx);
 
-	for (i = 0; i < NR_CHUNKS; i++)
+	for (i = 0; i < NR_CHUNKS; i++) {
 		fio_md5_update(&ctx, buf, size);
-
-	fio_md5_final(&ctx);
+		fio_md5_final(&ctx);
+	}
 }
 
-static void t_crc64(void *buf, size_t size)
+static void t_crc64(struct test_type *t, void *buf, size_t size)
 {
 	int i;
 
@@ -64,7 +71,7 @@ static void t_crc64(void *buf, size_t size)
 		fio_crc64(buf, size);
 }
 
-static void t_crc32(void *buf, size_t size)
+static void t_crc32(struct test_type *t, void *buf, size_t size)
 {
 	int i;
 
@@ -72,7 +79,7 @@ static void t_crc32(void *buf, size_t size)
 		fio_crc32(buf, size);
 }
 
-static void t_crc32c(void *buf, size_t size)
+static void t_crc32c(struct test_type *t, void *buf, size_t size)
 {
 	int i;
 
@@ -80,7 +87,7 @@ static void t_crc32c(void *buf, size_t size)
 		fio_crc32c(buf, size);
 }
 
-static void t_crc16(void *buf, size_t size)
+static void t_crc16(struct test_type *t, void *buf, size_t size)
 {
 	int i;
 
@@ -88,7 +95,7 @@ static void t_crc16(void *buf, size_t size)
 		fio_crc16(buf, size);
 }
 
-static void t_crc7(void *buf, size_t size)
+static void t_crc7(struct test_type *t, void *buf, size_t size)
 {
 	int i;
 
@@ -96,7 +103,7 @@ static void t_crc7(void *buf, size_t size)
 		fio_crc7(buf, size);
 }
 
-static void t_sha1(void *buf, size_t size)
+static void t_sha1(struct test_type *t, void *buf, size_t size)
 {
 	uint32_t sha[5];
 	struct fio_sha1_ctx ctx = { .H = sha };
@@ -108,7 +115,7 @@ static void t_sha1(void *buf, size_t size)
 		fio_sha1_update(&ctx, buf, size);
 }
 
-static void t_sha256(void *buf, size_t size)
+static void t_sha256(struct test_type *t, void *buf, size_t size)
 {
 	uint8_t sha[64];
 	struct fio_sha256_ctx ctx = { .buf = sha };
@@ -116,13 +123,13 @@ static void t_sha256(void *buf, size_t size)
 
 	fio_sha256_init(&ctx);
 
-	for (i = 0; i < NR_CHUNKS; i++)
+	for (i = 0; i < NR_CHUNKS; i++) {
 		fio_sha256_update(&ctx, buf, size);
-
-	fio_sha256_final(&ctx);
+		fio_sha256_final(&ctx);
+	}
 }
 
-static void t_sha512(void *buf, size_t size)
+static void t_sha512(struct test_type *t, void *buf, size_t size)
 {
 	uint8_t sha[128];
 	struct fio_sha512_ctx ctx = { .buf = sha };
@@ -134,7 +141,31 @@ static void t_sha512(void *buf, size_t size)
 		fio_sha512_update(&ctx, buf, size);
 }
 
-static void t_xxhash(void *buf, size_t size)
+static void t_murmur3(struct test_type *t, void *buf, size_t size)
+{
+	int i;
+
+	for (i = 0; i < NR_CHUNKS; i++)
+		murmurhash3(buf, size, 0x8989);
+}
+
+static void t_jhash(struct test_type *t, void *buf, size_t size)
+{
+	int i;
+
+	for (i = 0; i < NR_CHUNKS; i++)
+		t->output += jhash(buf, size, 0x8989);
+}
+
+static void t_fnv(struct test_type *t, void *buf, size_t size)
+{
+	int i;
+
+	for (i = 0; i < NR_CHUNKS; i++)
+		t->output += fnv(buf, size, 0x8989);
+}
+
+static void t_xxhash(struct test_type *t, void *buf, size_t size)
 {
 	void *state;
 	int i;
@@ -144,7 +175,7 @@ static void t_xxhash(void *buf, size_t size)
 	for (i = 0; i < NR_CHUNKS; i++)
 		XXH32_update(state, buf, size);
 
-	XXH32_digest(state);
+	t->output = XXH32_digest(state);
 }
 
 static struct test_type t[] = {
@@ -199,6 +230,21 @@ static struct test_type t[] = {
 		.fn = t_xxhash,
 	},
 	{
+		.name = "murmur3",
+		.mask = T_MURMUR3,
+		.fn = t_murmur3,
+	},
+	{
+		.name = "jhash",
+		.mask = T_JHASH,
+		.fn = t_jhash,
+	},
+	{
+		.name = "fnv",
+		.mask = T_FNV,
+		.fn = t_fnv,
+	},
+	{
 		.name = NULL,
 	},
 };
@@ -265,6 +311,7 @@ int fio_crctest(const char *type)
 		struct timeval tv;
 		double mb_sec;
 		uint64_t usec;
+		char pre[3];
 
 		if (!(t[i].mask & test_mask))
 			continue;
@@ -275,16 +322,20 @@ int fio_crctest(const char *type)
 		 */
 		if (first) {
 			usec_spin(100000);
-			t[i].fn(buf, CHUNK);
+			t[i].fn(&t[i], buf, CHUNK);
 		}
 
 		fio_gettime(&tv, NULL);
-		t[i].fn(buf, CHUNK);
+		t[i].fn(&t[i], buf, CHUNK);
 		usec = utime_since_now(&tv);
 
 		mb_sec = (double) mb / (double) usec;
 		mb_sec /= (1.024 * 1.024);
-		printf("%s:\t%8.2f MB/sec\n", t[i].name, mb_sec);
+		if (strlen(t[i].name) >= 7)
+			sprintf(pre, "\t");
+		else
+			sprintf(pre, "\t\t");
+		printf("%s:%s%8.2f MB/sec\n", t[i].name, pre, mb_sec);
 		first = 0;
 	}
 
diff --git a/lib/bloom.c b/lib/bloom.c
index b469fde..33d093a 100644
--- a/lib/bloom.c
+++ b/lib/bloom.c
@@ -5,7 +5,9 @@
 #include "../hash.h"
 #include "../minmax.h"
 #include "../crc/xxhash.h"
+#include "../crc/murmur3.h"
 #include "../crc/crc32c.h"
+#include "../crc/fnv.h"
 
 struct bloom {
 	uint64_t nentries;
@@ -21,27 +23,42 @@ struct bloom_hash {
 	uint32_t (*fn)(const void *, uint32_t, uint32_t);
 };
 
-static uint32_t b_crc32c(const void *buf, uint32_t len, uint32_t seed)
+static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed)
 {
 	return fio_crc32c(buf, len);
 }
 
+static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed)
+{
+	return fnv(buf, len, seed);
+}
+
+#define BLOOM_SEED	0x8989
+
 struct bloom_hash hashes[] = {
 	{
-		.seed = 0x8989,
+		.seed = BLOOM_SEED,
 		.fn = jhash,
 	},
 	{
-		.seed = 0x8989,
+		.seed = BLOOM_SEED,
 		.fn = XXH32,
 	},
 	{
-		.seed = 0,
-		.fn = b_crc32c,
+		.seed = BLOOM_SEED,
+		.fn = murmurhash3,
+	},
+	{
+		.seed = BLOOM_SEED,
+		.fn = bloom_crc32c,
+	},
+	{
+		.seed = BLOOM_SEED,
+		.fn = bloom_fnv,
 	},
 };
 
-#define N_HASHES	3
+#define N_HASHES	5
 
 #define MIN_ENTRIES	1073741824UL
 
diff --git a/options.c b/options.c
index 593f717..56e9108 100644
--- a/options.c
+++ b/options.c
@@ -98,9 +98,9 @@ static int bssplit_ddir(struct thread_options *o, int ddir, char *str)
 			if (perc > 100)
 				perc = 100;
 			else if (!perc)
-				perc = -1;
+				perc = -1U;
 		} else
-			perc = -1;
+			perc = -1U;
 
 		if (str_to_decimal(fname, &val, 1, o, 0)) {
 			log_err("fio: bssplit conversion failed\n");
@@ -127,7 +127,7 @@ static int bssplit_ddir(struct thread_options *o, int ddir, char *str)
 	for (i = 0; i < o->bssplit_nr[ddir]; i++) {
 		struct bssplit *bsp = &bssplit[i];
 
-		if (bsp->perc == (unsigned char) -1)
+		if (bsp->perc == -1U)
 			perc_missing++;
 		else
 			perc += bsp->perc;
@@ -138,17 +138,18 @@ static int bssplit_ddir(struct thread_options *o, int ddir, char *str)
 		free(bssplit);
 		return 1;
 	}
+
 	/*
 	 * If values didn't have a percentage set, divide the remains between
 	 * them.
 	 */
 	if (perc_missing) {
-		if (perc_missing == 1)
+		if (perc_missing == 1 && o->bssplit_nr[ddir] == 1)
 			perc = 100;
 		for (i = 0; i < o->bssplit_nr[ddir]; i++) {
 			struct bssplit *bsp = &bssplit[i];
 
-			if (bsp->perc == (unsigned char) -1)
+			if (bsp->perc == -1U)
 				bsp->perc = (100 - perc) / perc_missing;
 		}
 	}
diff --git a/t/dedupe.c b/t/dedupe.c
index 5998138..f9fb8c4 100644
--- a/t/dedupe.c
+++ b/t/dedupe.c
@@ -452,7 +452,7 @@ static int dedupe_check(const char *filename, uint64_t *nextents,
 	if (use_bloom) {
 		uint64_t bloom_entries;
 
-		bloom_entries = (3 * dev_size ) / (blocksize * 2);
+		bloom_entries = 8 * (dev_size / blocksize);
 		bloom = bloom_new(bloom_entries);
 	}
 
@@ -475,10 +475,11 @@ static void show_chunk(struct chunk *c)
 
 static void show_stat(uint64_t nextents, uint64_t nchunks)
 {
-	double perc;
+	double perc, ratio;
 
 	printf("Extents=%lu, Unique extents=%lu\n", (unsigned long) nextents, (unsigned long) nchunks);
-	printf("De-dupe factor: %3.2f\n", (double) nextents / (double) nchunks);
+	ratio = (double) nextents / (double) nchunks;
+	printf("De-dupe ratio: 1:%3.2f\n", ratio - 1.0);
 
 	perc = 1.00 - ((double) nchunks / (double) nextents);
 	perc *= 100.0;
@@ -575,10 +576,12 @@ int main(int argc, char *argv[])
 
 	ret = dedupe_check(argv[optind], &nextents, &nchunks);
 
-	if (!bloom)
-		iter_rb_tree(&nextents, &nchunks);
+	if (!ret) {
+		if (!bloom)
+			iter_rb_tree(&nextents, &nchunks);
 
-	show_stat(nextents, nchunks);
+		show_stat(nextents, nchunks);
+	}
 
 	fio_mutex_remove(rb_lock);
 	if (bloom)
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux