Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit b5b571a3f01c17ddb39fd0306cb425a11e216f3d:

  Fix compile for FIO_INC_DEBUG not set (2014-09-24 09:54:24 -0600)

are available in the git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 1066358aebafb7221732bedd6fb9fde56b14be7b:

  Improve dedupe/compression buffer filling for mixed block sizes (2014-09-26 15:04:58 -0600)

----------------------------------------------------------------
Andrey Kuzmin (2):
      Add ability to use an include file in a fio job file
      Add HOWTO section on include files

Jens Axboe (10):
      Add bloom filter
      dedupe: default to using a bloom filter to save memory
      Add debug helper stub for t/ programs
      t/lfsr-test: fixup time
      dedupe: fix warning and segfault on -B0
      dedupe: read in larger chunks at the time
      bloom: always use a larger minimum size for bloom filter
      Clarify that include files may not contain job sections
      bloom: use independent hashes
      Improve dedupe/compression buffer filling for mixed block sizes

 HOWTO         |   35 +++++++++++++
 Makefile      |    9 ++--
 crc/xxhash.c  |    2 +-
 crc/xxhash.h  |    2 +-
 fio_time.h    |    1 +
 init.c        |  162 +++++++++++++++++++++++++++++++++++++++------------------
 io_u.c        |   32 +++++++-----
 lib/bloom.c   |  107 +++++++++++++++++++++++++++++++++++++
 lib/bloom.h   |   13 +++++
 t/debug.c     |   14 +++++
 t/debug.h     |    6 +++
 t/dedupe.c    |  150 ++++++++++++++++++++++++++++++++++++----------------
 t/lfsr-test.c |   13 ++---
 t/stest.c     |   12 +----
 14 files changed, 430 insertions(+), 128 deletions(-)
 create mode 100644 lib/bloom.c
 create mode 100644 lib/bloom.h
 create mode 100644 t/debug.c
 create mode 100644 t/debug.h

---

Diff of recent changes:

diff --git a/HOWTO b/HOWTO
index 23746ce..aaa46f8 100644
--- a/HOWTO
+++ b/HOWTO
@@ -159,6 +159,41 @@ specify:
 
 $ fio --name=random-writers --ioengine=libaio --iodepth=4 --rw=randwrite --bs=32k --direct=0 --size=64m --numjobs=4
 
+When fio is utilized as a basis of any reasonably large test suite, it might be
+desirable to share a set of standardized settings across multiple job files.
+Instead of copy/pasting such settings, any section may pull in an external
+.fio file with 'include filename' directive, as in the following example:
+
+; -- start job file including.fio --
+[global]
+filename=/tmp/test
+filesize=1m
+include glob-include.fio
+
+[test]
+rw=randread
+bs=4k
+time_based=1
+runtime=10
+include test-include.fio
+; -- end job file including.fio --
+
+; -- start job file glob-include.fio --
+thread=1
+group_reporting=1
+; -- end job file glob-include.fio --
+
+; -- start job file test-include.fio --
+ioengine=libaio
+iodepth=4
+; -- end job file test-include.fio --
+
+Settings pulled into a section apply to that section only (except global
+section). Include directives may be nested in that any included file may
+contain further include directive(s). Include files may not contain []
+sections.
+
+
 4.1 Environment variables
 -------------------------
 
diff --git a/Makefile b/Makefile
index fe439c1..8c424e3 100644
--- a/Makefile
+++ b/Makefile
@@ -36,7 +36,7 @@ SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c filesetup.c \
 		lib/lfsr.c gettime-thread.c helpers.c lib/flist_sort.c \
 		lib/hweight.c lib/getrusage.c idletime.c td_error.c \
 		profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
-		lib/tp.c
+		lib/tp.c lib/bloom.c
 
 ifdef CONFIG_LIBHDFS
   HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE)
@@ -164,7 +164,7 @@ GFIO_OBJS = $(OBJS) gfio.o graph.o tickmarks.o ghelpers.o goptions.o gerror.o \
 -include $(OBJS:.o=.d)
 
 T_SMALLOC_OBJS = t/stest.o
-T_SMALLOC_OBJS += gettime.o mutex.o smalloc.o t/log.o
+T_SMALLOC_OBJS += gettime.o mutex.o smalloc.o t/log.o t/debug.o
 T_SMALLOC_PROGS = t/stest
 
 T_IEEE_OBJS = t/ieee754.o
@@ -180,7 +180,7 @@ T_AXMAP_OBJS += lib/lfsr.o lib/axmap.o
 T_AXMAP_PROGS = t/axmap
 
 T_LFSR_TEST_OBJS = t/lfsr-test.o
-T_LFSR_TEST_OBJS += lib/lfsr.o
+T_LFSR_TEST_OBJS += lib/lfsr.o gettime.o t/log.o t/debug.o
 T_LFSR_TEST_PROGS = t/lfsr-test
 
 ifeq ($(CONFIG_TARGET_OS), Linux)
@@ -192,7 +192,8 @@ endif
 ifeq ($(CONFIG_TARGET_OS), Linux)
 T_DEDUPE_OBJS = t/dedupe.o
 T_DEDUPE_OBJS += lib/rbtree.o t/log.o mutex.o smalloc.o gettime.o crc/md5.o \
-		memalign.o
+		memalign.o lib/bloom.o t/debug.o crc/xxhash.o crc/crc32c.o \
+		crc/crc32c-intel.o
 T_DEDUPE_PROGS = t/dedupe
 endif
 
diff --git a/crc/xxhash.c b/crc/xxhash.c
index eedaecb..4736c52 100644
--- a/crc/xxhash.c
+++ b/crc/xxhash.c
@@ -221,7 +221,7 @@ static uint32_t XXH32_endian_align(const void* input, int len, uint32_t seed, XX
 }
 
 
-uint32_t XXH32(const void* input, int len, uint32_t seed)
+uint32_t XXH32(const void* input, uint32_t len, uint32_t seed)
 {
 #if 0
     // Simple version, good for code maintenance, but unfortunately slow for small inputs
diff --git a/crc/xxhash.h b/crc/xxhash.h
index e80a91d..8850d20 100644
--- a/crc/xxhash.h
+++ b/crc/xxhash.h
@@ -88,7 +88,7 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 // Simple Hash Functions
 //****************************
 
-unsigned int XXH32 (const void* input, int len, unsigned int seed);
+uint32_t XXH32 (const void* input, uint32_t len, uint32_t seed);
 
 /*
 XXH32() :
diff --git a/fio_time.h b/fio_time.h
index c550a55..9f7d209 100644
--- a/fio_time.h
+++ b/fio_time.h
@@ -1,6 +1,7 @@
 #ifndef FIO_TIME_H
 #define FIO_TIME_H
 
+struct thread_data;
 extern uint64_t utime_since(struct timeval *, struct timeval *);
 extern uint64_t utime_since_now(struct timeval *);
 extern uint64_t mtime_since(struct timeval *, struct timeval *);
diff --git a/init.c b/init.c
index 5b0290d..e208451 100644
--- a/init.c
+++ b/init.c
@@ -1397,11 +1397,12 @@ static int is_empty_or_comment(char *line)
 /*
  * This is our [ini] type file parser.
  */
-int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
+int __parse_jobs_ini(struct thread_data *td,
+		char *file, int is_buf, int stonewall_flag, int type,
+		int nested, char *name, char ***popts, int *aopts, int *nopts)
 {
-	unsigned int global;
-	struct thread_data *td;
-	char *string, *name;
+	unsigned int global = 0;
+	char *string;
 	FILE *f;
 	char *p;
 	int ret = 0, stonewall;
@@ -1411,6 +1412,9 @@ int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
 	char **opts;
 	int i, alloc_opts, num_opts;
 
+	dprint(FD_PARSE, "Parsing ini file %s\n", file);
+	assert(td || !nested);
+
 	if (is_buf)
 		f = NULL;
 	else {
@@ -1430,12 +1434,23 @@ int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
 	/*
 	 * it's really 256 + small bit, 280 should suffice
 	 */
-	name = malloc(280);
-	memset(name, 0, 280);
+	if (!nested) {
+		name = malloc(280);
+		memset(name, 0, 280);
+	}
 
-	alloc_opts = 8;
-	opts = malloc(sizeof(char *) * alloc_opts);
-	num_opts = 0;
+	opts = NULL;
+	if (nested && popts) {
+		opts = *popts;
+		alloc_opts = *aopts;
+		num_opts = *nopts;
+	}
+
+	if (!opts) {
+		alloc_opts = 8;
+		opts = malloc(sizeof(char *) * alloc_opts);
+		num_opts = 0;
+	}
 
 	stonewall = stonewall_flag;
 	do {
@@ -1456,58 +1471,72 @@ int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
 		strip_blank_front(&p);
 		strip_blank_end(p);
 
+		dprint(FD_PARSE, "%s\n", p);
 		if (is_empty_or_comment(p))
 			continue;
-		if (sscanf(p, "[%255[^\n]]", name) != 1) {
-			if (inside_skip)
+
+		if (!nested) {
+			if (sscanf(p, "[%255[^\n]]", name) != 1) {
+				if (inside_skip)
+					continue;
+
+				log_err("fio: option <%s> outside of "
+					"[] job section\n", p);
+				break;
+			}
+
+			name[strlen(name) - 1] = '\0';
+
+			if (skip_this_section(name)) {
+				inside_skip = 1;
 				continue;
-			log_err("fio: option <%s> outside of [] job section\n",
-									p);
-			break;
-		}
+			} else
+				inside_skip = 0;
 
-		name[strlen(name) - 1] = '\0';
+			dprint(FD_PARSE, "Parsing section [%s]\n", name);
 
-		if (skip_this_section(name)) {
-			inside_skip = 1;
-			continue;
-		} else
-			inside_skip = 0;
+			global = !strncmp(name, "global", 6);
 
-		global = !strncmp(name, "global", 6);
+			if (dump_cmdline) {
+				if (first_sect)
+					log_info("fio ");
+				if (!global)
+					log_info("--name=%s ", name);
+				first_sect = 0;
+			}
 
-		if (dump_cmdline) {
-			if (first_sect)
-				log_info("fio ");
-			if (!global)
-				log_info("--name=%s ", name);
-			first_sect = 0;
-		}
+			td = get_new_job(global, &def_thread, 0, name);
+			if (!td) {
+				ret = 1;
+				break;
+			}
 
-		td = get_new_job(global, &def_thread, 0, name);
-		if (!td) {
-			ret = 1;
-			break;
-		}
+			/*
+			 * Separate multiple job files by a stonewall
+			 */
+			if (!global && stonewall) {
+				td->o.stonewall = stonewall;
+				stonewall = 0;
+			}
 
-		/*
-		 * Separate multiple job files by a stonewall
-		 */
-		if (!global && stonewall) {
-			td->o.stonewall = stonewall;
-			stonewall = 0;
+			num_opts = 0;
+			memset(opts, 0, alloc_opts * sizeof(char *));
 		}
-
-		num_opts = 0;
-		memset(opts, 0, alloc_opts * sizeof(char *));
+		else
+			skip_fgets = 1;
 
 		while (1) {
-			if (is_buf)
-				p = strsep(&file, "\n");
+			if (!skip_fgets) {
+				if (is_buf)
+					p = strsep(&file, "\n");
+				else
+					p = fgets(string, 4096, f);
+				if (!p)
+					break;
+				dprint(FD_PARSE, "%s", p);
+			}
 			else
-				p = fgets(string, 4096, f);
-			if (!p)
-				break;
+				skip_fgets = 0;
 
 			if (is_empty_or_comment(p))
 				continue;
@@ -1519,12 +1548,30 @@ int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
 			 * fgets() a new line at the top.
 			 */
 			if (p[0] == '[') {
+				if (nested) {
+					log_err("No new sections in included files\n");
+					return 1;
+				}
+
 				skip_fgets = 1;
 				break;
 			}
 
 			strip_blank_end(p);
 
+			if (!strncmp(p, "include", strlen("include"))) {
+				char *filename = p + strlen("include") + 1;
+
+				if ((ret = __parse_jobs_ini(td, filename,
+						is_buf, stonewall_flag, type, 1,
+						name, &opts, &alloc_opts, &num_opts))) {
+					log_err("Error %d while parsing include file %s\n",
+						ret, filename);
+					break;
+				}
+				continue;
+			}
+
 			if (num_opts == alloc_opts) {
 				alloc_opts <<= 1;
 				opts = realloc(opts,
@@ -1535,6 +1582,13 @@ int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
 			num_opts++;
 		}
 
+		if (nested) {
+			*popts = opts;
+			*aopts = alloc_opts;
+			*nopts = num_opts;
+			goto out;
+		}
+
 		ret = fio_options_parse(td, opts, num_opts, dump_cmdline);
 		if (!ret)
 			ret = add_job(td, name, 0, 0, type);
@@ -1557,14 +1611,22 @@ int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
 		i++;
 	}
 
-	free(string);
-	free(name);
 	free(opts);
+out:
+	free(string);
+	if (!nested)
+		free(name);
 	if (!is_buf && f != stdin)
 		fclose(f);
 	return ret;
 }
 
+int parse_jobs_ini(char *file, int is_buf, int stonewall_flag, int type)
+{
+	return __parse_jobs_ini(NULL, file, is_buf, stonewall_flag, type,
+			0, NULL, NULL, NULL, NULL);
+}
+
 static int fill_def_thread(void)
 {
 	memset(&def_thread, 0, sizeof(def_thread));
diff --git a/io_u.c b/io_u.c
index eac871b..8546899 100644
--- a/io_u.c
+++ b/io_u.c
@@ -1487,7 +1487,8 @@ struct io_u *get_io_u(struct thread_data *td)
 		if (io_u->ddir == DDIR_WRITE) {
 			if (td->flags & TD_F_REFILL_BUFFERS) {
 				io_u_fill_buffer(td, io_u,
-					io_u->xfer_buflen, io_u->xfer_buflen);
+					td->o.min_bs[DDIR_WRITE],
+					io_u->xfer_buflen);
 			} else if ((td->flags & TD_F_SCRAMBLE_BUFFERS) &&
 				   !(td->flags & TD_F_COMPRESS))
 				do_scramble = 1;
@@ -1864,22 +1865,29 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write,
 	else if (!td->o.zero_buffers) {
 		unsigned int perc = td->o.compress_percentage;
 		struct frand_state *rs;
+		unsigned int left = max_bs;
 
-		rs = get_buf_state(td);
+		do {
+			rs = get_buf_state(td);
 
-		if (perc) {
-			unsigned int seg = min_write;
+			min_write = min(min_write, left);
 
-			seg = min(min_write, td->o.compress_chunk);
-			if (!seg)
-				seg = min_write;
+			if (perc) {
+				unsigned int seg = min_write;
 
-			fill_random_buf_percentage(rs, buf, perc, seg,max_bs);
-			save_buf_state(td, rs);
-		} else {
-			fill_random_buf(rs, buf, max_bs);
+				seg = min(min_write, td->o.compress_chunk);
+				if (!seg)
+					seg = min_write;
+
+				fill_random_buf_percentage(rs, buf, perc, seg,
+								min_write);
+			} else
+				fill_random_buf(rs, buf, min_write);
+
+			buf += min_write;
+			left -= min_write;
 			save_buf_state(td, rs);
-		}
+		} while (left);
 	} else
 		memset(buf, 0, max_bs);
 }
diff --git a/lib/bloom.c b/lib/bloom.c
new file mode 100644
index 0000000..b469fde
--- /dev/null
+++ b/lib/bloom.c
@@ -0,0 +1,107 @@
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "bloom.h"
+#include "../hash.h"
+#include "../minmax.h"
+#include "../crc/xxhash.h"
+#include "../crc/crc32c.h"
+
+struct bloom {
+	uint64_t nentries;
+
+	uint32_t *map;
+};
+
+#define BITS_PER_INDEX	(sizeof(uint32_t) * 8)
+#define BITS_INDEX_MASK	(BITS_PER_INDEX - 1)
+
+struct bloom_hash {
+	unsigned int seed;
+	uint32_t (*fn)(const void *, uint32_t, uint32_t);
+};
+
+static uint32_t b_crc32c(const void *buf, uint32_t len, uint32_t seed)
+{
+	return fio_crc32c(buf, len);
+}
+
+struct bloom_hash hashes[] = {
+	{
+		.seed = 0x8989,
+		.fn = jhash,
+	},
+	{
+		.seed = 0x8989,
+		.fn = XXH32,
+	},
+	{
+		.seed = 0,
+		.fn = b_crc32c,
+	},
+};
+
+#define N_HASHES	3
+
+#define MIN_ENTRIES	1073741824UL
+
+struct bloom *bloom_new(uint64_t entries)
+{
+	struct bloom *b;
+	size_t no_uints;
+
+	crc32c_intel_probe();
+
+	b = malloc(sizeof(*b));
+	b->nentries = entries;
+	no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
+	no_uints = max((unsigned long) no_uints, MIN_ENTRIES);
+	b->map = calloc(no_uints, sizeof(uint32_t));
+	if (!b->map) {
+		free(b);
+		return NULL;
+	}
+
+	return b;
+}
+
+void bloom_free(struct bloom *b)
+{
+	free(b->map);
+	free(b);
+}
+
+static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords,
+			 int set)
+{
+	uint32_t hash[N_HASHES];
+	int i, was_set;
+
+	for (i = 0; i < N_HASHES; i++) {
+		hash[i] = hashes[i].fn(data, nwords, hashes[i].seed);
+		hash[i] = hash[i] % b->nentries;
+	}
+
+	was_set = 0;
+	for (i = 0; i < N_HASHES; i++) {
+		const unsigned int index = hash[i] / BITS_PER_INDEX;
+		const unsigned int bit = hash[i] & BITS_INDEX_MASK;
+
+		if (b->map[index] & (1U << bit))
+			was_set++;
+		if (set)
+			b->map[index] |= 1U << bit;
+	}
+
+	return was_set == N_HASHES;
+}
+
+int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords)
+{
+	return __bloom_check(b, data, nwords, 0);
+}
+
+int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
+{
+	return __bloom_check(b, data, nwords, 1);
+}
diff --git a/lib/bloom.h b/lib/bloom.h
new file mode 100644
index 0000000..b3cde95
--- /dev/null
+++ b/lib/bloom.h
@@ -0,0 +1,13 @@
+#ifndef FIO_BLOOM_H
+#define FIO_BLOOM_H
+
+#include <inttypes.h>
+
+struct bloom;
+
+struct bloom *bloom_new(uint64_t entries);
+void bloom_free(struct bloom *b);
+int bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords);
+int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords);
+
+#endif
diff --git a/t/debug.c b/t/debug.c
new file mode 100644
index 0000000..c297d61
--- /dev/null
+++ b/t/debug.c
@@ -0,0 +1,14 @@
+#include <stdio.h>
+
+FILE *f_err;
+struct timeval *fio_tv = NULL;
+unsigned int fio_debug = 0;
+
+void __dprint(int type, const char *str, ...)
+{
+}
+
+void debug_init(void)
+{
+	f_err = stderr;
+}
diff --git a/t/debug.h b/t/debug.h
new file mode 100644
index 0000000..9d1d415
--- /dev/null
+++ b/t/debug.h
@@ -0,0 +1,6 @@
+#ifndef FIO_DEBUG_INC_H
+#define FIO_DEBUG_INC_H
+
+extern void debug_init(void);
+
+#endif
diff --git a/t/dedupe.c b/t/dedupe.c
index b81e98a..5998138 100644
--- a/t/dedupe.c
+++ b/t/dedupe.c
@@ -27,13 +27,8 @@
 #include "../gettime.h"
 #include "../fio_time.h"
 
-FILE *f_err;
-struct timeval *fio_tv = NULL;
-unsigned int fio_debug = 0;
-
-void __dprint(int type, const char *str, ...)
-{
-}
+#include "../lib/bloom.h"
+#include "debug.h"
 
 struct worker_thread {
 	pthread_t thread;
@@ -45,6 +40,7 @@ struct worker_thread {
 	uint64_t size;
 
 	unsigned long items;
+	unsigned long dupes;
 	int err;
 };
 
@@ -66,6 +62,7 @@ struct item {
 };
 
 static struct rb_root rb_root;
+static struct bloom *bloom;
 static struct fio_mutex *rb_lock;
 
 static unsigned int blocksize = 4096;
@@ -75,6 +72,7 @@ static unsigned int dump_output;
 static unsigned int odirect;
 static unsigned int collision_check;
 static unsigned int print_progress = 1;
+static unsigned int use_bloom = 1;
 
 static uint64_t total_size;
 static uint64_t cur_offset;
@@ -116,17 +114,17 @@ static int get_work(uint64_t *offset, uint64_t *size)
 	return ret;
 }
 
-static int read_block(int fd, void *buf, off_t offset)
+static int __read_block(int fd, void *buf, off_t offset, size_t count)
 {
 	ssize_t ret;
 
-	ret = pread(fd, buf, blocksize, offset);
+	ret = pread(fd, buf, count, offset);
 	if (ret < 0) {
 		perror("pread");
 		return 1;
 	} else if (!ret)
 		return 1;
-	else if (ret != blocksize) {
+	else if (ret != count) {
 		log_err("dedupe: short read on block\n");
 		return 1;
 	}
@@ -134,6 +132,11 @@ static int read_block(int fd, void *buf, off_t offset)
 	return 0;
 }
 
+static int read_block(int fd, void *buf, off_t offset)
+{
+	return __read_block(fd, buf, offset, blocksize);
+}
+
 static void add_item(struct chunk *c, struct item *i)
 {
 	/*	
@@ -231,14 +234,24 @@ add:
 	add_item(c, i);
 }
 
-static void insert_chunks(struct item *items, unsigned int nitems)
+static void insert_chunks(struct item *items, unsigned int nitems,
+			  uint64_t *ndupes)
 {
 	int i;
 
 	fio_mutex_down(rb_lock);
 
-	for (i = 0; i < nitems; i++)
-		insert_chunk(&items[i]);
+	for (i = 0; i < nitems; i++) {
+		if (bloom) {
+			unsigned int s;
+			int r;
+
+			s = sizeof(items[i].hash) / sizeof(uint32_t);
+			r = bloom_set(bloom, items[i].hash, s);
+			*ndupes += r;
+		} else
+			insert_chunk(&items[i]);
+	}
 
 	fio_mutex_up(rb_lock);
 }
@@ -252,30 +265,46 @@ static void crc_buf(void *buf, uint32_t *hash)
 	fio_md5_final(&ctx);
 }
 
+static unsigned int read_blocks(int fd, void *buf, off_t offset, size_t size)
+{
+	if (__read_block(fd, buf, offset, size))
+		return 0;
+
+	return size / blocksize;
+}
+
 static int do_work(struct worker_thread *thread, void *buf)
 {
 	unsigned int nblocks, i;
 	off_t offset;
-	int err = 0, nitems = 0;
+	int nitems = 0;
+	uint64_t ndupes = 0;
 	struct item *items;
 
-	nblocks = thread->size / blocksize;
 	offset = thread->cur_offset;
+
+	nblocks = read_blocks(thread->fd, buf, offset, min(thread->size, (uint64_t)chunk_size));
+	if (!nblocks)
+		return 1;
+
 	items = malloc(sizeof(*items) * nblocks);
 
 	for (i = 0; i < nblocks; i++) {
-		if (read_block(thread->fd, buf, offset))
-			break;
-		items[i].offset = offset;
-		crc_buf(buf, items[i].hash);
+		void *thisptr = buf + (i * blocksize);
+
+		if (items)
+			items[i].offset = offset;
+		crc_buf(thisptr, items[i].hash);
 		offset += blocksize;
 		nitems++;
 	}
 
-	insert_chunks(items, nitems);
-	thread->items += nitems;
+	insert_chunks(items, nitems, &ndupes);
+
 	free(items);
-	return err;
+	thread->items += nitems;
+	thread->dupes += ndupes;
+	return 0;
 }
 
 static void *thread_fn(void *data)
@@ -283,7 +312,7 @@ static void *thread_fn(void *data)
 	struct worker_thread *thread = data;
 	void *buf;
 
-	buf = fio_memalign(blocksize, blocksize);
+	buf = fio_memalign(blocksize, chunk_size);
 
 	do {
 		if (get_work(&thread->cur_offset, &thread->size)) {
@@ -297,7 +326,7 @@ static void *thread_fn(void *data)
 	} while (1);
 
 	thread->done = 1;
-	fio_memfree(buf, blocksize);
+	fio_memfree(buf, chunk_size);
 	return NULL;
 }
 
@@ -343,7 +372,8 @@ static void show_progress(struct worker_thread *threads, unsigned long total)
 	};
 }
 
-static int run_dedupe_threads(int fd, uint64_t dev_size)
+static int run_dedupe_threads(int fd, uint64_t dev_size, uint64_t *nextents,
+				uint64_t *nchunks)
 {
 	struct worker_thread *threads;
 	unsigned long nitems, total_items;
@@ -371,20 +401,27 @@ static int run_dedupe_threads(int fd, uint64_t dev_size)
 	show_progress(threads, total_items);
 
 	nitems = 0;
+	*nextents = 0;
+	*nchunks = 1;
 	for (i = 0; i < num_threads; i++) {
 		void *ret;
 		pthread_join(threads[i].thread, &ret);
 		nitems += threads[i].items;
+		*nchunks += threads[i].dupes;
 	}
 
 	printf("Threads(%u): %lu items processed\n", num_threads, nitems);
 
+	*nextents = nitems;
+	*nchunks = nitems - *nchunks;
+
 	fio_mutex_remove(size_lock);
 	free(threads);
 	return err;
 }
 
-static int dedupe_check(const char *filename)
+static int dedupe_check(const char *filename, uint64_t *nextents,
+			uint64_t *nchunks)
 {
 	uint64_t dev_size;
 	struct stat sb;
@@ -412,9 +449,16 @@ static int dedupe_check(const char *filename)
 		return 1;
 	}
 
+	if (use_bloom) {
+		uint64_t bloom_entries;
+
+		bloom_entries = (3 * dev_size ) / (blocksize * 2);
+		bloom = bloom_new(bloom_entries);
+	}
+
 	printf("Will check <%s>, size <%llu>, using %u threads\n", filename, (unsigned long long) dev_size, num_threads);
 
-	return run_dedupe_threads(dev_fd, dev_size);
+	return run_dedupe_threads(dev_fd, dev_size, nextents, nchunks);
 }
 
 static void show_chunk(struct chunk *c)
@@ -429,14 +473,24 @@ static void show_chunk(struct chunk *c)
 	}
 }
 
-static void iter_rb_tree(void)
+static void show_stat(uint64_t nextents, uint64_t nchunks)
 {
-	struct rb_node *n;
-	uint64_t nchunks;
-	uint64_t nextents;
 	double perc;
 
-	nchunks = nextents = 0;
+	printf("Extents=%lu, Unique extents=%lu\n", (unsigned long) nextents, (unsigned long) nchunks);
+	printf("De-dupe factor: %3.2f\n", (double) nextents / (double) nchunks);
+
+	perc = 1.00 - ((double) nchunks / (double) nextents);
+	perc *= 100.0;
+	printf("Fio setting: dedupe_percentage=%u\n", (int) (perc + 0.50));
+
+}
+
+static void iter_rb_tree(uint64_t *nextents, uint64_t *nchunks)
+{
+	struct rb_node *n;
+
+	*nchunks = *nextents = 0;
 
 	n = rb_first(&rb_root);
 	if (!n)
@@ -446,20 +500,13 @@ static void iter_rb_tree(void)
 		struct chunk *c;
 
 		c = rb_entry(n, struct chunk, rb_node);
-		nchunks++;
-		nextents += c->count;
+		(*nchunks)++;
+		*nextents += c->count;
 
 		if (dump_output)
 			show_chunk(c);
 
 	} while ((n = rb_next(n)) != NULL);
-
-	printf("Extents=%lu, Unique extents=%lu\n", (unsigned long) nextents, (unsigned long) nchunks);
-	printf("De-dupe factor: %3.2f\n", (double) nextents / (double) nchunks);
-
-	perc = 1.00 - ((double) nchunks / (double) nextents);
-	perc *= 100.0;
-	printf("Fio setting: dedupe_percentage=%u\n", (int) (perc + 0.50));
 }
 
 static int usage(char *argv[])
@@ -471,15 +518,19 @@ static int usage(char *argv[])
 	log_err("\t-d\tFull extent/chunk debug output\n");
 	log_err("\t-o\tUse O_DIRECT\n");
 	log_err("\t-c\tFull collision check\n");
+	log_err("\t-B\tUse probabilistic bloom filter\n");
 	log_err("\t-p\tPrint progress indicator\n");
 	return 1;
 }
 
 int main(int argc, char *argv[])
 {
+	uint64_t nextents = 0, nchunks = 0;
 	int c, ret;
 
-	while ((c = getopt(argc, argv, "b:t:d:o:c:p:")) != -1) {
+	debug_init();
+
+	while ((c = getopt(argc, argv, "b:t:d:o:c:p:B:")) != -1) {
 		switch (c) {
 		case 'b':
 			blocksize = atoi(optarg);
@@ -499,12 +550,18 @@ int main(int argc, char *argv[])
 		case 'p':
 			print_progress = atoi(optarg);
 			break;
+		case 'B':
+			use_bloom = atoi(optarg);
+			break;
 		case '?':
 		default:
 			return usage(argv);
 		}
 	}
 
+	if (collision_check || dump_output)
+		use_bloom = 0;
+
 	if (!num_threads)
 		num_threads = cpus_online();
 
@@ -516,11 +573,16 @@ int main(int argc, char *argv[])
 	rb_root = RB_ROOT;
 	rb_lock = fio_mutex_init(FIO_MUTEX_UNLOCKED);
 
-	ret = dedupe_check(argv[optind]);
+	ret = dedupe_check(argv[optind], &nextents, &nchunks);
+
+	if (!bloom)
+		iter_rb_tree(&nextents, &nchunks);
 
-	iter_rb_tree();
+	show_stat(nextents, nchunks);
 
 	fio_mutex_remove(rb_lock);
+	if (bloom)
+		bloom_free(bloom);
 	scleanup();
 	return ret;
 }
diff --git a/t/lfsr-test.c b/t/lfsr-test.c
index 481f37e..4b54248 100644
--- a/t/lfsr-test.c
+++ b/t/lfsr-test.c
@@ -8,6 +8,8 @@
 #include <sys/stat.h>
 
 #include "../lib/lfsr.h"
+#include "../gettime.h"
+#include "../fio_time.h"
 
 void usage()
 {
@@ -25,7 +27,7 @@ void usage()
 int main(int argc, char *argv[])
 {
 	int r;
-	struct timespec start, end;
+	struct timeval start, end;
 	struct fio_lfsr *fl;
 	int verify = 0;
 	unsigned int spin = 0;
@@ -86,12 +88,12 @@ int main(int argc, char *argv[])
 	 * negligible overhead.
 	 */
 	fprintf(stderr, "\nTest initiated... ");
-	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
+	fio_gettime(&start, NULL);
 	while (!lfsr_next(fl, &i, fl->max_val)) {
 		if (verify)
 			*(uint8_t *)(v + i) += 1;
 	}
-	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
+	fio_gettime(&end, NULL);
 	fprintf(stderr, "finished.\n");
 
 
@@ -113,8 +115,7 @@ int main(int argc, char *argv[])
 	}
 
 	/* Calculate elapsed time and mean time per number */
-	total = (end.tv_sec - start.tv_sec) * pow(10,9) +
-		end.tv_nsec - start.tv_nsec;
+	total = utime_since(&start, &end);
 	mean = total / fl->num_vals;
 
 	printf("\nTime results ");
@@ -122,7 +123,7 @@ int main(int argc, char *argv[])
 		printf("(slower due to verification)");
 	printf("\n==============================\n");
 	printf("Elapsed: %lf s\n", total / pow(10,9));
-	printf("Mean:    %lf ns\n", mean);
+	printf("Mean:    %lf us\n", mean);
 
 	free(v_start);
 	free(fl);
diff --git a/t/stest.c b/t/stest.c
index 0da8f2c..efb256e 100644
--- a/t/stest.c
+++ b/t/stest.c
@@ -4,10 +4,7 @@
 
 #include "../smalloc.h"
 #include "../flist.h"
-
-FILE *f_err;
-struct timeval *fio_tv = NULL;
-unsigned int fio_debug = 0;
+#include "debug.h"
 
 #define MAGIC1	0xa9b1c8d2
 #define MAGIC2	0xf0a1e9b3
@@ -72,9 +69,8 @@ static int do_specific_alloc(unsigned long size)
 
 int main(int argc, char *argv[])
 {
-	f_err = stderr;
-
 	sinit();
+	debug_init();
 
 	do_rand_allocs();
 
@@ -84,7 +80,3 @@ int main(int argc, char *argv[])
 	scleanup();
 	return 0;
 }
-
-void __dprint(int type, const char *str, ...)
-{
-}
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux