Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit 6c5d3a1c08bda1bbf22187c7b80573400e1c1053:

  t/io_uring: don't print BW numbers for do_nop (2021-09-24 15:17:44 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 0b2114e7b46d047271d8d404beaae7006e89f8ef:

  Merge branch 'evelu-uring' of https://github.com/ErwanAliasr1/fio (2021-09-25 14:56:14 -0600)

----------------------------------------------------------------
Erwan Velu (1):
      t/io_uring.c: Adding \n on help

Jens Axboe (3):
      t/io_uring: add support for latency tracking
      t/io_uring: batch stat updates
      Merge branch 'evelu-uring' of https://github.com/ErwanAliasr1/fio

 t/io_uring.c | 298 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 289 insertions(+), 9 deletions(-)

---

Diff of recent changes:

diff --git a/t/io_uring.c b/t/io_uring.c
index d5636380..f22c504a 100644
--- a/t/io_uring.c
+++ b/t/io_uring.c
@@ -5,6 +5,7 @@
 #include <stddef.h>
 #include <signal.h>
 #include <inttypes.h>
+#include <math.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -22,10 +23,10 @@
 
 #include "../arch/arch.h"
 #include "../lib/types.h"
+#include "../lib/roundup.h"
+#include "../minmax.h"
 #include "../os/linux/io_uring.h"
 
-#define min(a, b)		((a < b) ? (a) : (b))
-
 struct io_sq_ring {
 	unsigned *head;
 	unsigned *tail;
@@ -57,8 +58,14 @@ struct file {
 	unsigned pending_ios;
 	int real_fd;
 	int fixed_fd;
+	int fileno;
 };
 
+#define PLAT_BITS		6
+#define PLAT_VAL		(1 << PLAT_BITS)
+#define PLAT_GROUP_NR		29
+#define PLAT_NR			(PLAT_GROUP_NR * PLAT_VAL)
+
 struct submitter {
 	pthread_t thread;
 	int ring_fd;
@@ -67,6 +74,7 @@ struct submitter {
 	struct io_uring_sqe *sqes;
 	struct io_cq_ring cq_ring;
 	int inflight;
+	int tid;
 	unsigned long reaps;
 	unsigned long done;
 	unsigned long calls;
@@ -74,6 +82,10 @@ struct submitter {
 
 	__s32 *fds;
 
+	unsigned long *clock_batch;
+	int clock_index;
+	unsigned long *plat;
+
 	struct file files[MAX_FDS];
 	unsigned nr_files;
 	unsigned cur_file;
@@ -95,9 +107,202 @@ static int sq_thread_poll = 0;	/* use kernel submission/poller thread */
 static int sq_thread_cpu = -1;	/* pin above thread to this CPU */
 static int do_nop = 0;		/* no-op SQ ring commands */
 static int nthreads = 1;
+static int stats = 0;		/* generate IO stats */
+static unsigned long tsc_rate;
 
 static int vectored = 1;
 
+static float plist[] = { 1.0, 5.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0,
+			80.0, 90.0, 95.0, 99.9, 99.5, 99.9, 99.95, 99.99 };
+static int plist_len = 17;
+
+static unsigned long cycles_to_nsec(unsigned long cycles)
+{
+	uint64_t val;
+
+	if (!tsc_rate)
+		return cycles;
+
+	val = cycles * 1000000000ULL;
+	return val / tsc_rate;
+}
+
+static unsigned long plat_idx_to_val(unsigned int idx)
+{
+	unsigned int error_bits;
+	unsigned long k, base;
+
+	assert(idx < PLAT_NR);
+
+	/* MSB <= (PLAT_BITS-1), cannot be rounded off. Use
+	 * all bits of the sample as index */
+	if (idx < (PLAT_VAL << 1))
+		return cycles_to_nsec(idx);
+
+	/* Find the group and compute the minimum value of that group */
+	error_bits = (idx >> PLAT_BITS) - 1;
+	base = ((unsigned long) 1) << (error_bits + PLAT_BITS);
+
+	/* Find its bucket number of the group */
+	k = idx % PLAT_VAL;
+
+	/* Return the mean of the range of the bucket */
+	return cycles_to_nsec(base + ((k + 0.5) * (1 << error_bits)));
+}
+
+unsigned int calc_clat_percentiles(unsigned long *io_u_plat, unsigned long nr,
+				   unsigned long **output,
+				   unsigned long *maxv, unsigned long *minv)
+{
+	unsigned long sum = 0;
+	unsigned int len = plist_len, i, j = 0;
+	unsigned long *ovals = NULL;
+	bool is_last;
+
+	*minv = -1ULL;
+	*maxv = 0;
+
+	ovals = malloc(len * sizeof(*ovals));
+	if (!ovals)
+		return 0;
+
+	/*
+	 * Calculate bucket values, note down max and min values
+	 */
+	is_last = false;
+	for (i = 0; i < PLAT_NR && !is_last; i++) {
+		sum += io_u_plat[i];
+		while (sum >= ((long double) plist[j] / 100.0 * nr)) {
+			assert(plist[j] <= 100.0);
+
+			ovals[j] = plat_idx_to_val(i);
+			if (ovals[j] < *minv)
+				*minv = ovals[j];
+			if (ovals[j] > *maxv)
+				*maxv = ovals[j];
+
+			is_last = (j == len - 1) != 0;
+			if (is_last)
+				break;
+
+			j++;
+		}
+	}
+
+	if (!is_last)
+		fprintf(stderr, "error calculating latency percentiles\n");
+
+	*output = ovals;
+	return len;
+}
+
+static void show_clat_percentiles(unsigned long *io_u_plat, unsigned long nr,
+				  unsigned int precision)
+{
+	unsigned int divisor, len, i, j = 0;
+	unsigned long minv, maxv;
+	unsigned long *ovals;
+	int per_line, scale_down, time_width;
+	bool is_last;
+	char fmt[32];
+
+	len = calc_clat_percentiles(io_u_plat, nr, &ovals, &maxv, &minv);
+	if (!len || !ovals)
+		goto out;
+
+	if (!tsc_rate) {
+		scale_down = 0;
+		divisor = 1;
+		printf("    percentiles (tsc ticks):\n     |");
+	} else if (minv > 2000 && maxv > 99999) {
+		scale_down = 1;
+		divisor = 1000;
+		printf("    percentiles (usec):\n     |");
+	} else {
+		scale_down = 0;
+		divisor = 1;
+		printf("    percentiles (nsec):\n     |");
+	}
+
+	time_width = max(5, (int) (log10(maxv / divisor) + 1));
+	snprintf(fmt, sizeof(fmt), " %%%u.%ufth=[%%%dllu]%%c", precision + 3,
+			precision, time_width);
+	/* fmt will be something like " %5.2fth=[%4llu]%c" */
+	per_line = (80 - 7) / (precision + 10 + time_width);
+
+	for (j = 0; j < len; j++) {
+		/* for formatting */
+		if (j != 0 && (j % per_line) == 0)
+			printf("     |");
+
+		/* end of the list */
+		is_last = (j == len - 1) != 0;
+
+		for (i = 0; i < scale_down; i++)
+			ovals[j] = (ovals[j] + 999) / 1000;
+
+		printf(fmt, plist[j], ovals[j], is_last ? '\n' : ',');
+
+		if (is_last)
+			break;
+
+		if ((j % per_line) == per_line - 1)	/* for formatting */
+			printf("\n");
+	}
+
+out:
+	free(ovals);
+}
+
+static unsigned int plat_val_to_idx(unsigned long val)
+{
+	unsigned int msb, error_bits, base, offset, idx;
+
+	/* Find MSB starting from bit 0 */
+	if (val == 0)
+		msb = 0;
+	else
+		msb = (sizeof(val)*8) - __builtin_clzll(val) - 1;
+
+	/*
+	 * MSB <= (PLAT_BITS-1), cannot be rounded off. Use
+	 * all bits of the sample as index
+	 */
+	if (msb <= PLAT_BITS)
+		return val;
+
+	/* Compute the number of error bits to discard*/
+	error_bits = msb - PLAT_BITS;
+
+	/* Compute the number of buckets before the group */
+	base = (error_bits + 1) << PLAT_BITS;
+
+	/*
+	 * Discard the error bits and apply the mask to find the
+	 * index for the buckets in the group
+	 */
+	offset = (PLAT_VAL - 1) & (val >> error_bits);
+
+	/* Make sure the index does not exceed (array size - 1) */
+	idx = (base + offset) < (PLAT_NR - 1) ?
+		(base + offset) : (PLAT_NR - 1);
+
+	return idx;
+}
+
+static void add_stat(struct submitter *s, int clock_index, int nr)
+{
+#ifdef ARCH_HAVE_CPU_CLOCK
+	unsigned long cycles;
+	unsigned int pidx;
+
+	cycles = get_cpu_clock();
+	cycles -= s->clock_batch[clock_index];
+	pidx = plat_val_to_idx(cycles);
+	s->plat[pidx] += nr;
+#endif
+}
+
 static int io_uring_register_buffers(struct submitter *s)
 {
 	if (do_nop)
@@ -224,7 +429,9 @@ static void init_io(struct submitter *s, unsigned index)
 	}
 	sqe->ioprio = 0;
 	sqe->off = offset;
-	sqe->user_data = (unsigned long) f;
+	sqe->user_data = (unsigned long) f->fileno;
+	if (stats)
+		sqe->user_data |= ((unsigned long)s->clock_index << 32);
 }
 
 static int prep_more_ios(struct submitter *s, int max_ios)
@@ -277,6 +484,7 @@ static int reap_events(struct submitter *s)
 	struct io_cq_ring *ring = &s->cq_ring;
 	struct io_uring_cqe *cqe;
 	unsigned head, reaped = 0;
+	int last_idx = -1, stat_nr = 0;
 
 	head = *ring->head;
 	do {
@@ -287,7 +495,9 @@ static int reap_events(struct submitter *s)
 			break;
 		cqe = &ring->cqes[head & cq_ring_mask];
 		if (!do_nop) {
-			f = (struct file *) (uintptr_t) cqe->user_data;
+			int fileno = cqe->user_data & 0xffffffff;
+
+			f = &s->files[fileno];
 			f->pending_ios--;
 			if (cqe->res != bs) {
 				printf("io: unexpected ret=%d\n", cqe->res);
@@ -296,10 +506,26 @@ static int reap_events(struct submitter *s)
 				return -1;
 			}
 		}
+		if (stats) {
+			int clock_index = cqe->user_data >> 32;
+
+			if (last_idx != clock_index) {
+				if (last_idx != -1) {
+					add_stat(s, last_idx, stat_nr);
+					stat_nr = 0;
+				}
+				last_idx = clock_index;
+			}
+			stat_nr++;
+			add_stat(s, clock_index, 1);
+		}
 		reaped++;
 		head++;
 	} while (1);
 
+	if (stat_nr)
+		add_stat(s, last_idx, stat_nr);
+
 	if (reaped) {
 		s->inflight -= reaped;
 		atomic_store_release(ring->head, head);
@@ -311,12 +537,28 @@ static void *submitter_fn(void *data)
 {
 	struct submitter *s = data;
 	struct io_sq_ring *ring = &s->sq_ring;
-	int ret, prepped;
+	int i, ret, prepped, nr_batch;
 
-	printf("submitter=%d\n", gettid());
+	s->tid = gettid();
+	printf("submitter=%d\n", s->tid);
 
 	srand48(pthread_self());
 
+	for (i = 0; i < MAX_FDS; i++)
+		s->files[i].fileno = i;
+
+	if (stats) {
+		nr_batch = roundup_pow2(depth / batch_submit);
+		s->clock_batch = calloc(nr_batch, sizeof(unsigned long));
+		s->clock_index = 0;
+
+		s->plat = calloc(PLAT_NR, sizeof(unsigned long));
+	} else {
+		s->clock_batch = NULL;
+		s->plat = NULL;
+		nr_batch = 0;
+	}
+
 	prepped = 0;
 	do {
 		int to_wait, to_submit, this_reap, to_prep;
@@ -325,6 +567,12 @@ static void *submitter_fn(void *data)
 		if (!prepped && s->inflight < depth) {
 			to_prep = min(depth - s->inflight, batch_submit);
 			prepped = prep_more_ios(s, to_prep);
+#ifdef ARCH_HAVE_CPU_CLOCK
+			if (prepped && stats) {
+				s->clock_batch[s->clock_index] = get_cpu_clock();
+				s->clock_index = (s->clock_index + 1) & (nr_batch - 1);
+			}
+#endif
 		}
 		s->inflight += prepped;
 submit_more:
@@ -555,9 +803,11 @@ static void usage(char *argv, int status)
 		" -F <bool> : Register files, default %d\n"
 		" -n <int>  : Number of threads, default %d\n"
 		" -O <bool> : Use O_DIRECT, default %d\n"
-		" -N <bool> : Perform just no-op requests, default %d\n",
+		" -N <bool> : Perform just no-op requests, default %d\n"
+		" -t <bool> : Track IO latencies, default %d\n"
+		" -T <int>  : TSC rate in HZ\n",
 		argv, DEPTH, BATCH_SUBMIT, BATCH_COMPLETE, BS, polled,
-		fixedbufs, register_files, nthreads, !buffered, do_nop);
+		fixedbufs, register_files, nthreads, !buffered, do_nop, stats);
 	exit(status);
 }
 
@@ -573,16 +823,20 @@ int main(int argc, char *argv[])
 	if (!do_nop && argc < 2)
 		usage(argv[0], 1);
 
-	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:h?")) != -1) {
+	while ((opt = getopt(argc, argv, "d:s:c:b:p:B:F:n:N:O:t:T:h?")) != -1) {
 		switch (opt) {
 		case 'd':
 			depth = atoi(optarg);
 			break;
 		case 's':
 			batch_submit = atoi(optarg);
+			if (!batch_submit)
+				batch_submit = 1;
 			break;
 		case 'c':
 			batch_complete = atoi(optarg);
+			if (!batch_complete)
+				batch_complete = 1;
 			break;
 		case 'b':
 			bs = atoi(optarg);
@@ -609,6 +863,20 @@ int main(int argc, char *argv[])
 		case 'O':
 			buffered = !atoi(optarg);
 			break;
+		case 't':
+#ifndef ARCH_HAVE_CPU_CLOCK
+			fprintf(stderr, "Stats not supported on this CPU\n");
+			return 1;
+#endif
+			stats = !!atoi(optarg);
+			break;
+		case 'T':
+#ifndef ARCH_HAVE_CPU_CLOCK
+			fprintf(stderr, "Stats not supported on this CPU\n");
+			return 1;
+#endif
+			tsc_rate = strtoul(optarg, NULL, 10);
+			break;
 		case 'h':
 		case '?':
 		default:
@@ -764,7 +1032,19 @@ int main(int argc, char *argv[])
 		s = get_submitter(j);
 		pthread_join(s->thread, &ret);
 		close(s->ring_fd);
+
+		if (stats) {
+			unsigned long nr;
+
+			printf("%d: Latency percentiles:\n", s->tid);
+			for (i = 0, nr = 0; i < PLAT_NR; i++)
+				nr += s->plat[i];
+			show_clat_percentiles(s->plat, nr, 4);
+			free(s->clock_batch);
+			free(s->plat);
+		}
 	}
+
 	free(fdepths);
 	free(submitter);
 	return 0;



[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux