Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit 7477673323a943b99ea203bb9434661d13a0159c:

  lfsr: ensure we don't generate an offset + buflen that exceeds the max size (2013-01-11 14:03:25 +0100)

are available in the git repository at:
  git://git.kernel.dk/fio.git master

Jens Axboe (2):
      Pre-load and sort random blocks for pure read verify workloads
      configure: enable e4defrag engine regardless of MOVE_EXTENT compile test

 Makefile         |    2 +-
 backend.c        |    1 +
 configure        |    7 ++-
 fio.h            |    3 +
 flist.h          |    3 +
 io_u.c           |  100 ++++++++++++++++++++++++++++++--------
 lib/flist_sort.c |  140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 options.c        |   10 ++++
 8 files changed, 243 insertions(+), 23 deletions(-)
 create mode 100644 lib/flist_sort.c

---

Diff of recent changes:

diff --git a/Makefile b/Makefile
index 96819c8..299e5e9 100644
--- a/Makefile
+++ b/Makefile
@@ -29,7 +29,7 @@ SOURCE := gettime.c fio.c ioengines.c init.c stat.c log.c time.c filesetup.c \
 		engines/mmap.c engines/sync.c engines/null.c engines/net.c \
 		memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \
 		json.c lib/zipf.c lib/axmap.c lib/lfsr.c gettime-thread.c \
-		helpers.c
+		helpers.c lib/flist_sort.c
 
 ifdef CONFIG_64BIT
   CFLAGS += -DBITS_PER_LONG=64
diff --git a/backend.c b/backend.c
index 8e96fb0..7cebf4d 100644
--- a/backend.c
+++ b/backend.c
@@ -1025,6 +1025,7 @@ static void *thread_main(void *data)
 	INIT_FLIST_HEAD(&td->io_hist_list);
 	INIT_FLIST_HEAD(&td->verify_list);
 	INIT_FLIST_HEAD(&td->trim_list);
+	INIT_FLIST_HEAD(&td->next_rand_list);
 	pthread_mutex_init(&td->io_u_lock, NULL);
 	td->io_hist_tree = RB_ROOT;
 
diff --git a/configure b/configure
index 9cdfc49..3558bf4 100755
--- a/configure
+++ b/configure
@@ -585,7 +585,12 @@ int main(int argc, char **argv)
   return ioctl(0, EXT4_IOC_MOVE_EXT, &me);
 }
 EOF
-if compile_prog "" "" "ext4 move extent"; then
+if compile_prog "" "" "ext4 move extent" ; then
+  ext4_me="yes"
+elif test $targetos = "Linux" ; then
+  # On Linux, just default to it on and let it error at runtime if we really
+  # don't have it. None of my updated systems have it defined, but it does
+  # work. Takes a while to bubble back.
   ext4_me="yes"
 fi
 echo "EXT4 move extent              $ext4_me"
diff --git a/fio.h b/fio.h
index a528f6a..c5e2bf1 100644
--- a/fio.h
+++ b/fio.h
@@ -149,6 +149,7 @@ struct thread_options {
 	unsigned int verify;
 	unsigned int do_verify;
 	unsigned int verifysort;
+	unsigned int verifysort_nr;
 	unsigned int verify_interval;
 	unsigned int verify_offset;
 	char verify_pattern[MAX_PATTERN_SIZE];
@@ -515,6 +516,8 @@ struct thread_data {
 	struct flist_head trim_list;
 	unsigned long trim_entries;
 
+	struct flist_head next_rand_list;
+
 	/*
 	 * for fileservice, how often to switch to a new file
 	 */
diff --git a/flist.h b/flist.h
index 7aca973..8e13041 100644
--- a/flist.h
+++ b/flist.h
@@ -176,4 +176,7 @@ static inline void flist_splice_init(struct flist_head *list,
 	for (pos = (head)->next, n = pos->next; pos != (head); \
 		pos = n, n = pos->next)
 
+extern void flist_sort(void *priv, struct flist_head *head,
+	int (*cmp)(void *priv, struct flist_head *a, struct flist_head *b));
+
 #endif
diff --git a/io_u.c b/io_u.c
index 94fd123..87b8019 100644
--- a/io_u.c
+++ b/io_u.c
@@ -24,7 +24,7 @@ struct io_completion_data {
  * The ->io_axmap contains a map of blocks we have or have not done io
  * to yet. Used to make sure we cover the entire range in a fair fashion.
  */
-static int random_map_free(struct fio_file *f, const unsigned long long block)
+static int random_map_free(struct fio_file *f, const uint64_t block)
 {
 	return !axmap_isset(f->io_axmap, block);
 }
@@ -36,10 +36,10 @@ static void mark_random_map(struct thread_data *td, struct io_u *io_u)
 {
 	unsigned int min_bs = td->o.rw_min_bs;
 	struct fio_file *f = io_u->file;
-	unsigned long long block;
 	unsigned int nr_blocks;
+	uint64_t block;
 
-	block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs;
+	block = (io_u->offset - f->file_offset) / (uint64_t) min_bs;
 	nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
 
 	if (!(io_u->flags & IO_U_F_BUSY_OK))
@@ -67,24 +67,29 @@ static uint64_t last_block(struct thread_data *td, struct fio_file *f,
 	if (td->o.zone_range)
 		max_size = td->o.zone_range;
 
-	max_blocks = max_size / (unsigned long long) td->o.ba[ddir];
+	max_blocks = max_size / (uint64_t) td->o.ba[ddir];
 	if (!max_blocks)
 		return 0;
 
 	return max_blocks;
 }
 
+struct rand_off {
+	struct flist_head list;
+	uint64_t off;
+};
+
 static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,
-				  enum fio_ddir ddir, unsigned long long *b)
+				  enum fio_ddir ddir, uint64_t *b)
 {
-	unsigned long long r, lastb;
+	uint64_t r, lastb;
 
 	lastb = last_block(td, f, ddir);
 	if (!lastb)
 		return 1;
 
 	if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE) {
-		unsigned long long rmax;
+		uint64_t rmax;
 
 		rmax = td->o.use_os_rand ? OS_RAND_MAX : FRAND_MAX;
 
@@ -98,7 +103,7 @@ static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f,
 
 		dprint(FD_RANDOM, "off rand %llu\n", r);
 
-		*b = (lastb - 1) * (r / ((unsigned long long) rmax + 1.0));
+		*b = (lastb - 1) * (r / ((uint64_t) rmax + 1.0));
 	} else {
 		uint64_t off = 0;
 
@@ -131,7 +136,7 @@ ret:
 
 static int __get_next_rand_offset_zipf(struct thread_data *td,
 				       struct fio_file *f, enum fio_ddir ddir,
-				       unsigned long long *b)
+				       uint64_t *b)
 {
 	*b = zipf_next(&f->zipf);
 	return 0;
@@ -139,14 +144,22 @@ static int __get_next_rand_offset_zipf(struct thread_data *td,
 
 static int __get_next_rand_offset_pareto(struct thread_data *td,
 					 struct fio_file *f, enum fio_ddir ddir,
-					 unsigned long long *b)
+					 uint64_t *b)
 {
 	*b = pareto_next(&f->zipf);
 	return 0;
 }
 
-static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
-				enum fio_ddir ddir, unsigned long long *b)
+static int flist_cmp(void *data, struct flist_head *a, struct flist_head *b)
+{
+	struct rand_off *r1 = flist_entry(a, struct rand_off, list);
+	struct rand_off *r2 = flist_entry(b, struct rand_off, list);
+
+	return r1->off - r2->off;
+}
+
+static int get_off_from_method(struct thread_data *td, struct fio_file *f,
+			       enum fio_ddir ddir, uint64_t *b)
 {
 	if (td->o.random_distribution == FIO_RAND_DIST_RANDOM)
 		return __get_next_rand_offset(td, f, ddir, b);
@@ -159,8 +172,52 @@ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
 	return 1;
 }
 
+static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
+				enum fio_ddir ddir, uint64_t *b)
+{
+	struct rand_off *r;
+	int i, ret = 1;
+
+	/*
+	 * If sort not enabled, or not a pure random read workload without
+	 * any stored write metadata, just return a random offset
+	 */
+	if (!td->o.verifysort_nr || !(ddir == READ && td->o.do_verify &&
+	    td->o.verify != VERIFY_NONE && td_random(td)))
+		return get_off_from_method(td, f, ddir, b);
+
+	if (!flist_empty(&td->next_rand_list)) {
+		struct rand_off *r;
+fetch:
+		r = flist_entry(td->next_rand_list.next, struct rand_off, list);
+		flist_del(&r->list);
+		*b = r->off;
+		free(r);
+		return 0;
+	}
+
+	for (i = 0; i < td->o.verifysort_nr; i++) {
+		r = malloc(sizeof(*r));
+
+		ret = get_off_from_method(td, f, ddir, &r->off);
+		if (ret) {
+			free(r);
+			break;
+		}
+
+		flist_add(&r->list, &td->next_rand_list);
+	}
+
+	if (ret && !i)
+		return ret;
+
+	assert(!flist_empty(&td->next_rand_list));
+	flist_sort(NULL, &td->next_rand_list, flist_cmp);
+	goto fetch;
+}
+
 static int get_next_rand_block(struct thread_data *td, struct fio_file *f,
-			       enum fio_ddir ddir, unsigned long long *b)
+			       enum fio_ddir ddir, uint64_t *b)
 {
 	if (!get_next_rand_offset(td, f, ddir, b))
 		return 0;
@@ -177,7 +234,7 @@ static int get_next_rand_block(struct thread_data *td, struct fio_file *f,
 }
 
 static int get_next_seq_offset(struct thread_data *td, struct fio_file *f,
-			       enum fio_ddir ddir, unsigned long long *offset)
+			       enum fio_ddir ddir, uint64_t *offset)
 {
 	assert(ddir_rw(ddir));
 
@@ -185,7 +242,7 @@ static int get_next_seq_offset(struct thread_data *td, struct fio_file *f,
 		f->last_pos = f->last_pos - f->io_size;
 
 	if (f->last_pos < f->real_file_size) {
-		unsigned long long pos;
+		uint64_t pos;
 
 		if (f->last_pos == f->file_offset && td->o.ddir_seq_add < 0)
 			f->last_pos = f->real_file_size;
@@ -205,7 +262,7 @@ static int get_next_block(struct thread_data *td, struct io_u *io_u,
 			  enum fio_ddir ddir, int rw_seq)
 {
 	struct fio_file *f = io_u->file;
-	unsigned long long b, offset;
+	uint64_t b, offset;
 	int ret;
 
 	assert(ddir_rw(ddir));
@@ -1106,7 +1163,7 @@ static int check_get_verify(struct thread_data *td, struct io_u *io_u)
 static void small_content_scramble(struct io_u *io_u)
 {
 	unsigned int i, nr_blocks = io_u->buflen / 512;
-	unsigned long long boffset;
+	uint64_t boffset;
 	unsigned int offset;
 	void *p, *end;
 
@@ -1124,9 +1181,9 @@ static void small_content_scramble(struct io_u *io_u)
 		 * and the actual offset.
 		 */
 		offset = (io_u->start_time.tv_usec ^ boffset) & 511;
-		offset &= ~(sizeof(unsigned long long) - 1);
-		if (offset >= 512 - sizeof(unsigned long long))
-			offset -= sizeof(unsigned long long);
+		offset &= ~(sizeof(uint64_t) - 1);
+		if (offset >= 512 - sizeof(uint64_t))
+			offset -= sizeof(uint64_t);
 		memcpy(p + offset, &boffset, sizeof(boffset));
 
 		end = p + 512 - sizeof(io_u->start_time);
@@ -1285,7 +1342,8 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u,
 
 static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir)
 {
-	unsigned long long secs, remainder, bps, bytes;
+	uint64_t secs, remainder, bps, bytes;
+
 	bytes = td->this_io_bytes[ddir];
 	bps = td->rate_bps[ddir];
 	secs = bytes / bps;
diff --git a/lib/flist_sort.c b/lib/flist_sort.c
new file mode 100644
index 0000000..1c91cc4
--- /dev/null
+++ b/lib/flist_sort.c
@@ -0,0 +1,140 @@
+#include <stdio.h>
+#include <string.h>
+#include "../flist.h"
+#include "../log.h"
+
+#define MAX_LIST_LENGTH_BITS 20
+
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+static struct flist_head *merge(void *priv,
+				int (*cmp)(void *priv, struct flist_head *a,
+					struct flist_head *b),
+				struct flist_head *a, struct flist_head *b)
+{
+	struct flist_head head, *tail = &head;
+
+	while (a && b) {
+		/* if equal, take 'a' -- important for sort stability */
+		if ((*cmp)(priv, a, b) <= 0) {
+			tail->next = a;
+			a = a->next;
+		} else {
+			tail->next = b;
+			b = b->next;
+		}
+		tail = tail->next;
+	}
+	tail->next = a?:b;
+	return head.next;
+}
+
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure.  This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+static void merge_and_restore_back_links(void *priv,
+				int (*cmp)(void *priv, struct flist_head *a,
+					struct flist_head *b),
+				struct flist_head *head,
+				struct flist_head *a, struct flist_head *b)
+{
+	struct flist_head *tail = head;
+
+	while (a && b) {
+		/* if equal, take 'a' -- important for sort stability */
+		if ((*cmp)(priv, a, b) <= 0) {
+			tail->next = a;
+			a->prev = tail;
+			a = a->next;
+		} else {
+			tail->next = b;
+			b->prev = tail;
+			b = b->next;
+		}
+		tail = tail->next;
+	}
+	tail->next = a ? : b;
+
+	do {
+		/*
+		 * In worst cases this loop may run many iterations.
+		 * Continue callbacks to the client even though no
+		 * element comparison is needed, so the client's cmp()
+		 * routine can invoke cond_resched() periodically.
+		 */
+		(*cmp)(priv, tail->next, tail->next);
+
+		tail->next->prev = tail;
+		tail = tail->next;
+	} while (tail->next);
+
+	tail->next = head;
+	head->prev = tail;
+}
+
+/**
+ * list_sort - sort a list
+ * @priv: private data, opaque to list_sort(), passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function implements "merge sort", which has O(nlog(n))
+ * complexity.
+ *
+ * The comparison function @cmp must return a negative value if @a
+ * should sort before @b, and a positive value if @a should sort after
+ * @b. If @a and @b are equivalent, and their original relative
+ * ordering is to be preserved, @cmp must return 0.
+ */
+void flist_sort(void *priv, struct flist_head *head,
+		int (*cmp)(void *priv, struct flist_head *a,
+			struct flist_head *b))
+{
+	struct flist_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
+						-- last slot is a sentinel */
+	int lev;  /* index into part[] */
+	int max_lev = 0;
+	struct flist_head *list;
+
+	if (flist_empty(head))
+		return;
+
+	memset(part, 0, sizeof(part));
+
+	head->prev->next = NULL;
+	list = head->next;
+
+	while (list) {
+		struct flist_head *cur = list;
+		list = list->next;
+		cur->next = NULL;
+
+		for (lev = 0; part[lev]; lev++) {
+			cur = merge(priv, cmp, part[lev], cur);
+			part[lev] = NULL;
+		}
+		if (lev > max_lev) {
+			if (lev >= MAX_LIST_LENGTH_BITS) {
+				log_err("fio: list passed to"
+					" list_sort() too long for"
+					" efficiency\n");
+				lev--;
+			}
+			max_lev = lev;
+		}
+		part[lev] = cur;
+	}
+
+	for (lev = 0; lev < max_lev; lev++)
+		if (part[lev])
+			list = merge(priv, cmp, part[lev], list);
+
+	merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
+}
diff --git a/options.c b/options.c
index 3a406fa..1760762 100644
--- a/options.c
+++ b/options.c
@@ -1887,6 +1887,16 @@ static struct fio_option options[FIO_MAX_OPTS] = {
 		.parent = "verify",
 	},
 	{
+		.name	= "verifysort_nr",
+		.type	= FIO_OPT_INT,
+		.off1	= td_var_offset(verifysort_nr),
+		.help	= "Pre-load and sort verify blocks for a read workload",
+		.minval	= 0,
+		.maxval	= 131072,
+		.def	= "1024",
+		.parent = "verify",
+	},
+	{
 		.name   = "verify_interval",
 		.type   = FIO_OPT_INT,
 		.off1   = td_var_offset(verify_interval),
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux