Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit ac4f3d4e4cf16b1097249a819fe7111b2674b3f4:

  aioring: remove IOCB_FLAG_HIPRI (2018-12-30 17:19:40 -0700)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to ac122fea7540ca115c157e0a835a74b891f10484:

  aioring: update to newer API (2019-01-04 22:22:54 -0700)

----------------------------------------------------------------
Jens Axboe (4):
      t/aio-ring: update to newer mmap() API
      engines/aioring: update for newer mmap based API
      t/aio-ring: use syscall defines
      aioring: update to newer API

 arch/arch-x86_64.h |   8 +-
 engines/aioring.c  | 267 ++++++++++++++++++++++++++++++++---------------------
 t/aio-ring.c       | 262 +++++++++++++++++++++++++++++++---------------------
 3 files changed, 322 insertions(+), 215 deletions(-)

---

Diff of recent changes:

diff --git a/arch/arch-x86_64.h b/arch/arch-x86_64.h
index d49bcd7f..d0a98b8b 100644
--- a/arch/arch-x86_64.h
+++ b/arch/arch-x86_64.h
@@ -1,11 +1,11 @@
 #ifndef ARCH_X86_64_H
 #define ARCH_X86_64_H
 
-#ifndef __NR_sys_io_setup2
-#define __NR_sys_io_setup2	335
+#ifndef __NR_sys_io_uring_setup
+#define __NR_sys_io_uring_setup	335
 #endif
-#ifndef __NR_sys_io_ring_enter
-#define __NR_sys_io_ring_enter	336
+#ifndef __NR_sys_io_uring_enter
+#define __NR_sys_io_uring_enter	336
 #endif
 
 static inline void do_cpuid(unsigned int *eax, unsigned int *ebx,
diff --git a/engines/aioring.c b/engines/aioring.c
index f836009d..ca60b281 100644
--- a/engines/aioring.c
+++ b/engines/aioring.c
@@ -22,13 +22,13 @@
 #ifdef ARCH_HAVE_AIORING
 
 /*
- * io_setup2(2) flags
+ * io_uring_setup(2) flags
  */
-#ifndef IOCTX_FLAG_IOPOLL
-#define IOCTX_FLAG_IOPOLL	(1 << 0)
-#endif
 #ifndef IOCTX_FLAG_SCQRING
-#define IOCTX_FLAG_SCQRING	(1 << 1)
+#define IOCTX_FLAG_SCQRING	(1 << 0)
+#endif
+#ifndef IOCTX_FLAG_IOPOLL
+#define IOCTX_FLAG_IOPOLL	(1 << 1)
 #endif
 #ifndef IOCTX_FLAG_FIXEDBUFS
 #define IOCTX_FLAG_FIXEDBUFS	(1 << 2)
@@ -43,12 +43,15 @@
 #define IOCTX_FLAG_SQPOLL	(1 << 5)
 #endif
 
+#define IORING_OFF_SQ_RING	0ULL
+#define IORING_OFF_CQ_RING	0x8000000ULL
+#define IORING_OFF_IOCB		0x10000000ULL
 
 /*
- * io_ring_enter(2) flags
+ * io_uring_enter(2) flags
  */
-#ifndef IORING_FLAG_GETEVENTS
-#define IORING_FLAG_GETEVENTS	(1 << 0)
+#ifndef IORING_ENTER_GETEVENTS
+#define IORING_ENTER_GETEVENTS	(1 << 0)
 #endif
 
 typedef uint64_t u64;
@@ -59,43 +62,68 @@ typedef uint16_t u16;
 
 #define IOEV_RES2_CACHEHIT	(1 << 0)
 
+struct aio_sqring_offsets {
+	u32 head;
+	u32 tail;
+	u32 ring_mask;
+	u32 ring_entries;
+	u32 flags;
+	u32 array;
+};
+
+struct aio_cqring_offsets {
+	u32 head;
+	u32 tail;
+	u32 ring_mask;
+	u32 ring_entries;
+	u32 overflow;
+	u32 events;
+};
+
+struct aio_uring_params {
+	u32 sq_entries;
+	u32 cq_entries;
+	u32 flags;
+	u16 sq_thread_cpu;
+	u16 resv[9];
+	struct aio_sqring_offsets sq_off;
+	struct aio_cqring_offsets cq_off;
+};
+
 struct aio_sq_ring {
-	union {
-		struct {
-			u32 head;
-			u32 tail;
-			u32 nr_events;
-			u16 sq_thread_cpu;
-			u16 kflags;
-			u64 iocbs;
-		};
-		u32 pad[16];
-	};
-	u32 array[0];
+	u32 *head;
+	u32 *tail;
+	u32 *ring_mask;
+	u32 *ring_entries;
+	u32 *flags;
+	u32 *array;
 };
 
 struct aio_cq_ring {
-	union {
-		struct {
-			u32 head;
-			u32 tail;
-			u32 nr_events;
-		};
-		struct io_event pad;
-	};
-	struct io_event events[0];
+	u32 *head;
+	u32 *tail;
+	u32 *ring_mask;
+	u32 *ring_entries;
+	struct io_event *events;
+};
+
+struct aioring_mmap {
+	void *ptr;
+	size_t len;
 };
 
 struct aioring_data {
-	io_context_t aio_ctx;
+	int ring_fd;
+
 	struct io_u **io_us;
 	struct io_u **io_u_index;
 
-	struct aio_sq_ring *sq_ring;
+	struct aio_sq_ring sq_ring;
 	struct iocb *iocbs;
+	struct iovec *iovecs;
 	unsigned sq_ring_mask;
 
-	struct aio_cq_ring *cq_ring;
+	struct aio_cq_ring cq_ring;
 	struct io_event *events;
 	unsigned cq_ring_mask;
 
@@ -105,6 +133,8 @@ struct aioring_data {
 
 	uint64_t cachehit;
 	uint64_t cachemiss;
+
+	struct aioring_mmap mmap[3];
 };
 
 struct aioring_options {
@@ -178,11 +208,11 @@ static struct fio_option options[] = {
 	},
 };
 
-static int io_ring_enter(io_context_t ctx, unsigned int to_submit,
+static int io_uring_enter(struct aioring_data *ld, unsigned int to_submit,
 			 unsigned int min_complete, unsigned int flags)
 {
-	return syscall(__NR_sys_io_ring_enter, ctx, to_submit, min_complete,
-			flags);
+	return syscall(__NR_sys_io_uring_enter, ld->ring_fd, to_submit,
+			min_complete, flags);
 }
 
 static int fio_aioring_prep(struct thread_data *td, struct io_u *io_u)
@@ -220,7 +250,7 @@ static struct io_u *fio_aioring_event(struct thread_data *td, int event)
 
 	index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
 
-	ev = &ld->cq_ring->events[index];
+	ev = &ld->cq_ring.events[index];
 	io_u = ev->data;
 
 	if (ev->res != io_u->xfer_buflen) {
@@ -245,19 +275,19 @@ static int fio_aioring_cqring_reap(struct thread_data *td, unsigned int events,
 				   unsigned int max)
 {
 	struct aioring_data *ld = td->io_ops_data;
-	struct aio_cq_ring *ring = ld->cq_ring;
+	struct aio_cq_ring *ring = &ld->cq_ring;
 	u32 head, reaped = 0;
 
-	head = ring->head;
+	head = *ring->head;
 	do {
 		read_barrier();
-		if (head == ring->tail)
+		if (head == *ring->tail)
 			break;
 		reaped++;
 		head++;
 	} while (reaped + events < max);
 
-	ring->head = head;
+	*ring->head = head;
 	write_barrier();
 	return reaped;
 }
@@ -268,11 +298,11 @@ static int fio_aioring_getevents(struct thread_data *td, unsigned int min,
 	struct aioring_data *ld = td->io_ops_data;
 	unsigned actual_min = td->o.iodepth_batch_complete_min == 0 ? 0 : min;
 	struct aioring_options *o = td->eo;
-	struct aio_cq_ring *ring = ld->cq_ring;
+	struct aio_cq_ring *ring = &ld->cq_ring;
 	unsigned events = 0;
 	int r;
 
-	ld->cq_ring_off = ring->head;
+	ld->cq_ring_off = *ring->head;
 	do {
 		r = fio_aioring_cqring_reap(td, events, max);
 		if (r) {
@@ -281,12 +311,12 @@ static int fio_aioring_getevents(struct thread_data *td, unsigned int min,
 		}
 
 		if (!o->sqthread_poll) {
-			r = io_ring_enter(ld->aio_ctx, 0, actual_min,
-						IORING_FLAG_GETEVENTS);
+			r = io_uring_enter(ld, 0, actual_min,
+						IORING_ENTER_GETEVENTS);
 			if (r < 0) {
 				if (errno == EAGAIN)
 					continue;
-				td_verror(td, errno, "io_ring_enter get");
+				td_verror(td, errno, "io_uring_enter");
 				break;
 			}
 		}
@@ -299,7 +329,7 @@ static enum fio_q_status fio_aioring_queue(struct thread_data *td,
 					   struct io_u *io_u)
 {
 	struct aioring_data *ld = td->io_ops_data;
-	struct aio_sq_ring *ring = ld->sq_ring;
+	struct aio_sq_ring *ring = &ld->sq_ring;
 	unsigned tail, next_tail;
 
 	fio_ro_check(td, io_u);
@@ -317,14 +347,14 @@ static enum fio_q_status fio_aioring_queue(struct thread_data *td,
 		return FIO_Q_COMPLETED;
 	}
 
-	tail = ring->tail;
+	tail = *ring->tail;
 	next_tail = tail + 1;
 	read_barrier();
-	if (next_tail == ring->head)
+	if (next_tail == *ring->head)
 		return FIO_Q_BUSY;
 
 	ring->array[tail & ld->sq_ring_mask] = io_u->index;
-	ring->tail = next_tail;
+	*ring->tail = next_tail;
 	write_barrier();
 
 	ld->queued++;
@@ -342,7 +372,8 @@ static void fio_aioring_queued(struct thread_data *td, int start, int nr)
 	fio_gettime(&now, NULL);
 
 	while (nr--) {
-		int index = ld->sq_ring->array[start & ld->sq_ring_mask];
+		struct aio_sq_ring *ring = &ld->sq_ring;
+		int index = ring->array[start & ld->sq_ring_mask];
 		struct io_u *io_u = ld->io_u_index[index];
 
 		memcpy(&io_u->issue_time, &now, sizeof(now));
@@ -363,19 +394,19 @@ static int fio_aioring_commit(struct thread_data *td)
 
 	/* Nothing to do */
 	if (o->sqthread_poll) {
-		struct aio_sq_ring *ring = ld->sq_ring;
+		struct aio_sq_ring *ring = &ld->sq_ring;
 
-		if (ring->kflags & IORING_SQ_NEED_WAKEUP)
-			io_ring_enter(ld->aio_ctx, ld->queued, 0, 0);
+		if (*ring->flags & IORING_SQ_NEED_WAKEUP)
+			io_uring_enter(ld, ld->queued, 0, 0);
 		ld->queued = 0;
 		return 0;
 	}
 
 	do {
-		unsigned start = ld->sq_ring->head;
+		unsigned start = *ld->sq_ring.head;
 		long nr = ld->queued;
 
-		ret = io_ring_enter(ld->aio_ctx, nr, 0, IORING_FLAG_GETEVENTS);
+		ret = io_uring_enter(ld, nr, 0, IORING_ENTER_GETEVENTS);
 		if (ret > 0) {
 			fio_aioring_queued(td, start, ret);
 			io_u_mark_submit(td, ret);
@@ -394,7 +425,7 @@ static int fio_aioring_commit(struct thread_data *td)
 				usleep(1);
 				continue;
 			}
-			td_verror(td, errno, "io_ring_enter sumit");
+			td_verror(td, errno, "io_uring_enter submit");
 			break;
 		}
 	} while (ld->queued);
@@ -402,24 +433,13 @@ static int fio_aioring_commit(struct thread_data *td)
 	return ret;
 }
 
-static size_t aioring_cq_size(struct thread_data *td)
+static void fio_aioring_unmap(struct aioring_data *ld)
 {
-	return sizeof(struct aio_cq_ring) + 2 * td->o.iodepth * sizeof(struct io_event);
-}
+	int i;
 
-static size_t aioring_sq_iocb(struct thread_data *td)
-{
-	return sizeof(struct iocb) * td->o.iodepth;
-}
-
-static size_t aioring_sq_size(struct thread_data *td)
-{
-	return sizeof(struct aio_sq_ring) + td->o.iodepth * sizeof(u32);
-}
-
-static unsigned roundup_pow2(unsigned depth)
-{
-	return 1UL << __fls(depth - 1);
+	for (i = 0; i < ARRAY_SIZE(ld->mmap); i++)
+		munmap(ld->mmap[i].ptr, ld->mmap[i].len);
+	close(ld->ring_fd);
 }
 
 static void fio_aioring_cleanup(struct thread_data *td)
@@ -437,33 +457,76 @@ static void fio_aioring_cleanup(struct thread_data *td)
 		 * speeding it up a lot.
 		 */
 		if (!(td->flags & TD_F_CHILD))
-			io_destroy(ld->aio_ctx);
+			fio_aioring_unmap(ld);
+
 		free(ld->io_u_index);
 		free(ld->io_us);
-		fio_memfree(ld->sq_ring, aioring_sq_size(td), false);
-		fio_memfree(ld->iocbs, aioring_sq_iocb(td), false);
-		fio_memfree(ld->cq_ring, aioring_cq_size(td), false);
+		free(ld->iovecs);
 		free(ld);
 	}
 }
 
+static int fio_aioring_mmap(struct aioring_data *ld, struct aio_uring_params *p)
+{
+	struct aio_sq_ring *sring = &ld->sq_ring;
+	struct aio_cq_ring *cring = &ld->cq_ring;
+	void *ptr;
+
+	ld->mmap[0].len = p->sq_off.array + p->sq_entries * sizeof(u32);
+	ptr = mmap(0, ld->mmap[0].len, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_POPULATE, ld->ring_fd,
+			IORING_OFF_SQ_RING);
+	ld->mmap[0].ptr = ptr;
+	sring->head = ptr + p->sq_off.head;
+	sring->tail = ptr + p->sq_off.tail;
+	sring->ring_mask = ptr + p->sq_off.ring_mask;
+	sring->ring_entries = ptr + p->sq_off.ring_entries;
+	sring->flags = ptr + p->sq_off.flags;
+	sring->array = ptr + p->sq_off.array;
+	ld->sq_ring_mask = *sring->ring_mask;
+
+	ld->mmap[1].len = p->sq_entries * sizeof(struct iocb);
+	ld->iocbs = mmap(0, ld->mmap[1].len, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE, ld->ring_fd,
+				IORING_OFF_IOCB);
+	ld->mmap[1].ptr = ld->iocbs;
+
+	ld->mmap[2].len = p->cq_off.events +
+				p->cq_entries * sizeof(struct io_event);
+	ptr = mmap(0, ld->mmap[2].len, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_POPULATE, ld->ring_fd,
+			IORING_OFF_CQ_RING);
+	ld->mmap[2].ptr = ptr;
+	cring->head = ptr + p->cq_off.head;
+	cring->tail = ptr + p->cq_off.tail;
+	cring->ring_mask = ptr + p->cq_off.ring_mask;
+	cring->ring_entries = ptr + p->cq_off.ring_entries;
+	cring->events = ptr + p->cq_off.events;
+	ld->cq_ring_mask = *cring->ring_mask;
+	return 0;
+}
+
 static int fio_aioring_queue_init(struct thread_data *td)
 {
 	struct aioring_data *ld = td->io_ops_data;
 	struct aioring_options *o = td->eo;
-	int flags = IOCTX_FLAG_SCQRING;
 	int depth = td->o.iodepth;
+	struct aio_uring_params p;
+	int ret;
+
+	memset(&p, 0, sizeof(p));
+	p.flags = IOCTX_FLAG_SCQRING;
 
 	if (o->hipri)
-		flags |= IOCTX_FLAG_IOPOLL;
+		p.flags |= IOCTX_FLAG_IOPOLL;
 	if (o->sqthread_set) {
-		ld->sq_ring->sq_thread_cpu = o->sqthread;
-		flags |= IOCTX_FLAG_SQTHREAD;
+		p.sq_thread_cpu = o->sqthread;
+		p.flags |= IOCTX_FLAG_SQTHREAD;
 		if (o->sqthread_poll)
-			flags |= IOCTX_FLAG_SQPOLL;
+			p.flags |= IOCTX_FLAG_SQPOLL;
 	}
 	if (o->sqwq)
-		flags |= IOCTX_FLAG_SQWQ;
+		p.flags |= IOCTX_FLAG_SQWQ;
 
 	if (o->fixedbufs) {
 		struct rlimit rlim = {
@@ -472,11 +535,15 @@ static int fio_aioring_queue_init(struct thread_data *td)
 		};
 
 		setrlimit(RLIMIT_MEMLOCK, &rlim);
-		flags |= IOCTX_FLAG_FIXEDBUFS;
+		p.flags |= IOCTX_FLAG_FIXEDBUFS;
 	}
 
-	return syscall(__NR_sys_io_setup2, depth, flags,
-			ld->sq_ring, ld->cq_ring, &ld->aio_ctx);
+	ret = syscall(__NR_sys_io_uring_setup, depth, ld->iovecs, &p);
+	if (ret < 0)
+		return ret;
+
+	ld->ring_fd = ret;
+	return fio_aioring_mmap(ld, &p);
 }
 
 static int fio_aioring_post_init(struct thread_data *td)
@@ -484,22 +551,21 @@ static int fio_aioring_post_init(struct thread_data *td)
 	struct aioring_data *ld = td->io_ops_data;
 	struct aioring_options *o = td->eo;
 	struct io_u *io_u;
-	struct iocb *iocb;
-	int err = 0;
+	int err;
 
 	if (o->fixedbufs) {
 		int i;
 
 		for (i = 0; i < td->o.iodepth; i++) {
+			struct iovec *iov = &ld->iovecs[i];
+
 			io_u = ld->io_u_index[i];
-			iocb = &ld->iocbs[i];
-			iocb->u.c.buf = io_u->buf;
-			iocb->u.c.nbytes = td_max_bs(td);
+			iov->iov_base = io_u->buf;
+			iov->iov_len = td_max_bs(td);
 		}
 	}
 
 	err = fio_aioring_queue_init(td);
-
 	if (err) {
 		td_verror(td, errno, "io_queue_init");
 		return 1;
@@ -508,6 +574,11 @@ static int fio_aioring_post_init(struct thread_data *td)
 	return 0;
 }
 
+static unsigned roundup_pow2(unsigned depth)
+{
+	return 1UL << __fls(depth - 1);
+}
+
 static int fio_aioring_init(struct thread_data *td)
 {
 	struct aioring_data *ld;
@@ -522,19 +593,7 @@ static int fio_aioring_init(struct thread_data *td)
 	ld->io_u_index = calloc(td->o.iodepth, sizeof(struct io_u *));
 	ld->io_us = calloc(td->o.iodepth, sizeof(struct io_u *));
 
-	ld->iocbs = fio_memalign(page_size, aioring_sq_iocb(td), false);
-	memset(ld->iocbs, 0, aioring_sq_iocb(td));
-
-	ld->sq_ring = fio_memalign(page_size, aioring_sq_size(td), false);
-	memset(ld->sq_ring, 0, aioring_sq_size(td));
-	ld->sq_ring->nr_events = td->o.iodepth;
-	ld->sq_ring->iocbs = (u64) (uintptr_t) ld->iocbs;
-	ld->sq_ring_mask = td->o.iodepth - 1;
-
-	ld->cq_ring = fio_memalign(page_size, aioring_cq_size(td), false);
-	memset(ld->cq_ring, 0, aioring_cq_size(td));
-	ld->cq_ring->nr_events = td->o.iodepth * 2;
-	ld->cq_ring_mask = (2 * td->o.iodepth) - 1;
+	ld->iovecs = calloc(td->o.iodepth, sizeof(struct iovec));
 
 	td->io_ops_data = ld;
 	return 0;
diff --git a/t/aio-ring.c b/t/aio-ring.c
index c0c5009e..71978c68 100644
--- a/t/aio-ring.c
+++ b/t/aio-ring.c
@@ -14,6 +14,7 @@
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
 #include <sys/resource.h>
+#include <sys/mman.h>
 #include <linux/fs.h>
 #include <fcntl.h>
 #include <unistd.h>
@@ -22,11 +23,14 @@
 #include <pthread.h>
 #include <sched.h>
 
-#define IOCTX_FLAG_IOPOLL	(1 << 0)
-#define IOCTX_FLAG_SCQRING	(1 << 1)	/* Use SQ/CQ rings */
+#include "../arch/arch.h"
+
+#define IOCTX_FLAG_SCQRING	(1 << 0)	/* Use SQ/CQ rings */
+#define IOCTX_FLAG_IOPOLL	(1 << 1)
 #define IOCTX_FLAG_FIXEDBUFS	(1 << 2)
 #define IOCTX_FLAG_SQTHREAD	(1 << 3)	/* Use SQ thread */
 #define IOCTX_FLAG_SQWQ		(1 << 4)	/* Use SQ wq */
+#define IOCTX_FLAG_SQPOLL	(1 << 5)
 
 #define IOEV_RES2_CACHEHIT	(1 << 0)
 
@@ -38,33 +42,55 @@ typedef uint64_t u64;
 typedef uint32_t u32;
 typedef uint16_t u16;
 
+#define IORING_OFF_SQ_RING	0ULL
+#define IORING_OFF_CQ_RING	0x8000000ULL
+#define IORING_OFF_IOCB		0x10000000ULL
+
+struct aio_sqring_offsets {
+	u32 head;
+	u32 tail;
+	u32 ring_mask;
+	u32 ring_entries;
+	u32 flags;
+	u32 array;
+};
+
+struct aio_cqring_offsets {
+	u32 head;
+	u32 tail;
+	u32 ring_mask;
+	u32 ring_entries;
+	u32 overflow;
+	u32 events;
+};
+
+struct aio_uring_params {
+	u32 sq_entries;
+	u32 cq_entries;
+	u32 flags;
+	u16 sq_thread_cpu;
+	u16 resv[9];
+	struct aio_sqring_offsets sq_off;
+	struct aio_cqring_offsets cq_off;
+};
+
 struct aio_sq_ring {
-	union {
-		struct {
-			u32 head;
-			u32 tail;
-			u32 nr_events;
-			u16 sq_thread_cpu;
-			u64 iocbs;
-		};
-		u32 pad[16];
-	};
-	u32 array[0];
+	u32 *head;
+	u32 *tail;
+	u32 *ring_mask;
+	u32 *ring_entries;
+	u32 *array;
 };
 
 struct aio_cq_ring {
-	union {
-		struct {
-			u32 head;
-			u32 tail;
-			u32 nr_events;
-		};
-		struct io_event pad;
-	};
-	struct io_event events[0];
+	u32 *head;
+	u32 *tail;
+	u32 *ring_mask;
+	u32 *ring_entries;
+	struct io_event *events;
 };
 
-#define IORING_FLAG_GETEVENTS	(1 << 0)
+#define IORING_ENTER_GETEVENTS	(1 << 0)
 
 #define DEPTH			32
 
@@ -73,17 +99,17 @@ struct aio_cq_ring {
 
 #define BS			4096
 
-static unsigned sq_ring_mask = DEPTH - 1;
-static unsigned cq_ring_mask = (2 * DEPTH) - 1;
+static unsigned sq_ring_mask, cq_ring_mask;
 
 struct submitter {
 	pthread_t thread;
 	unsigned long max_blocks;
-	io_context_t ioc;
+	int fd;
 	struct drand48_data rand;
-	struct aio_sq_ring *sq_ring;
+	struct aio_sq_ring sq_ring;
 	struct iocb *iocbs;
-	struct aio_cq_ring *cq_ring;
+	struct iovec iovecs[DEPTH];
+	struct aio_cq_ring cq_ring;
 	int inflight;
 	unsigned long reaps;
 	unsigned long done;
@@ -96,23 +122,23 @@ struct submitter {
 static struct submitter submitters[1];
 static volatile int finish;
 
-static int polled = 1;		/* use IO polling */
-static int fixedbufs = 1;	/* use fixed user buffers */
-static int buffered = 0;	/* use buffered IO, not O_DIRECT */
+static int polled = 0;		/* use IO polling */
+static int fixedbufs = 0;	/* use fixed user buffers */
+static int buffered = 1;	/* use buffered IO, not O_DIRECT */
 static int sq_thread = 0;	/* use kernel submission thread */
 static int sq_thread_cpu = 0;	/* pin above thread to this CPU */
 
-static int io_setup2(unsigned int nr_events, unsigned int flags,
-		     struct aio_sq_ring *sq_ring, struct aio_cq_ring *cq_ring,
-		     io_context_t *ctx_idp)
+static int io_uring_setup(unsigned entries, struct iovec *iovecs,
+			  struct aio_uring_params *p)
 {
-	return syscall(335, nr_events, flags, sq_ring, cq_ring, ctx_idp);
+	return syscall(__NR_sys_io_uring_setup, entries, iovecs, p);
 }
 
-static int io_ring_enter(io_context_t ctx, unsigned int to_submit,
-			 unsigned int min_complete, unsigned int flags)
+static int io_uring_enter(struct submitter *s, unsigned int to_submit,
+			  unsigned int min_complete, unsigned int flags)
 {
-	return syscall(336, ctx, to_submit, min_complete, flags);
+	return syscall(__NR_sys_io_uring_enter, s->fd, to_submit, min_complete,
+			flags);
 }
 
 static int gettid(void)
@@ -120,8 +146,9 @@ static int gettid(void)
 	return syscall(__NR_gettid);
 }
 
-static void init_io(struct submitter *s, int fd, struct iocb *iocb)
+static void init_io(struct submitter *s, int fd, unsigned index)
 {
+	struct iocb *iocb = &s->iocbs[index];
 	unsigned long offset;
 	long r;
 
@@ -130,34 +157,34 @@ static void init_io(struct submitter *s, int fd, struct iocb *iocb)
 
 	iocb->aio_fildes = fd;
 	iocb->aio_lio_opcode = IO_CMD_PREAD;
+	iocb->u.c.buf = s->iovecs[index].iov_base;
+	iocb->u.c.nbytes = BS;
 	iocb->u.c.offset = offset;
-	if (!fixedbufs)
-		iocb->u.c.nbytes = BS;
 }
 
 static int prep_more_ios(struct submitter *s, int fd, int max_ios)
 {
-	struct aio_sq_ring *ring = s->sq_ring;
+	struct aio_sq_ring *ring = &s->sq_ring;
 	u32 index, tail, next_tail, prepped = 0;
 
-	next_tail = tail = ring->tail;
+	next_tail = tail = *ring->tail;
 	do {
 		next_tail++;
 		barrier();
-		if (next_tail == ring->head)
+		if (next_tail == *ring->head)
 			break;
 
 		index = tail & sq_ring_mask;
-		init_io(s, fd, &s->iocbs[index]);
-		s->sq_ring->array[index] = index;
+		init_io(s, fd, index);
+		ring->array[index] = index;
 		prepped++;
 		tail = next_tail;
 	} while (prepped < max_ios);
 
-	if (ring->tail != tail) {
+	if (*ring->tail != tail) {
 		/* order tail store with writes to iocbs above */
 		barrier();
-		ring->tail = tail;
+		*ring->tail = tail;
 		barrier();
 	}
 	return prepped;
@@ -187,14 +214,14 @@ static int get_file_size(int fd, unsigned long *blocks)
 
 static int reap_events(struct submitter *s)
 {
-	struct aio_cq_ring *ring = s->cq_ring;
+	struct aio_cq_ring *ring = &s->cq_ring;
 	struct io_event *ev;
 	u32 head, reaped = 0;
 
-	head = ring->head;
+	head = *ring->head;
 	do {
 		barrier();
-		if (head == ring->tail)
+		if (head == *ring->tail)
 			break;
 		ev = &ring->events[head & cq_ring_mask];
 		if (ev->res != BS) {
@@ -213,7 +240,7 @@ static int reap_events(struct submitter *s)
 	} while (1);
 
 	s->inflight -= reaped;
-	ring->head = head;
+	*ring->head = head;
 	barrier();
 	return reaped;
 }
@@ -262,8 +289,7 @@ submit:
 		else
 			to_wait = min(s->inflight + to_submit, BATCH_COMPLETE);
 
-		ret = io_ring_enter(s->ioc, to_submit, to_wait,
-					IORING_FLAG_GETEVENTS);
+		ret = io_uring_enter(s, to_submit, to_wait, IORING_ENTER_GETEVENTS);
 		s->calls++;
 
 		this_reap = reap_events(s);
@@ -288,7 +314,7 @@ submit:
 			prepped = 0;
 			continue;
 		} else if (ret < 0) {
-			if ((ret == -1 && errno == EAGAIN) || ret == -EAGAIN) {
+			if (errno == EAGAIN) {
 				if (s->finish)
 					break;
 				if (this_reap)
@@ -296,10 +322,7 @@ submit:
 				to_submit = 0;
 				goto submit;
 			}
-			if (ret == -1)
-				printf("io_submit: %s\n", strerror(errno));
-			else
-				printf("io_submit: %s\n", strerror(-ret));
+			printf("io_submit: %s\n", strerror(errno));
 			break;
 		}
 	} while (!s->finish);
@@ -327,15 +350,74 @@ static void arm_sig_int(void)
 	sigaction(SIGINT, &act, NULL);
 }
 
+static int setup_ring(struct submitter *s)
+{
+	struct aio_sq_ring *sring = &s->sq_ring;
+	struct aio_cq_ring *cring = &s->cq_ring;
+	struct aio_uring_params p;
+	void *ptr;
+	int fd;
+
+	memset(&p, 0, sizeof(p));
+
+	p.flags = IOCTX_FLAG_SCQRING;
+	if (polled)
+		p.flags |= IOCTX_FLAG_IOPOLL;
+	if (fixedbufs)
+		p.flags |= IOCTX_FLAG_FIXEDBUFS;
+	if (buffered)
+		p.flags |= IOCTX_FLAG_SQWQ;
+	else if (sq_thread) {
+		p.flags |= IOCTX_FLAG_SQTHREAD;
+		p.sq_thread_cpu = sq_thread_cpu;
+	}
+
+	if (fixedbufs)
+		fd = io_uring_setup(DEPTH, s->iovecs, &p);
+	else
+		fd = io_uring_setup(DEPTH, NULL, &p);
+	if (fd < 0) {
+		perror("io_uring_setup");
+		return 1;
+	}
+
+	s->fd = fd;
+
+	ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(u32),
+			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+			fd, IORING_OFF_SQ_RING);
+	printf("sq_ring ptr = 0x%p\n", ptr);
+	sring->head = ptr + p.sq_off.head;
+	sring->tail = ptr + p.sq_off.tail;
+	sring->ring_mask = ptr + p.sq_off.ring_mask;
+	sring->ring_entries = ptr + p.sq_off.ring_entries;
+	sring->array = ptr + p.sq_off.array;
+	sq_ring_mask = *sring->ring_mask;
+
+	s->iocbs = mmap(0, p.sq_entries * sizeof(struct iocb), PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_IOCB);
+	printf("iocbs ptr   = 0x%p\n", s->iocbs);
+
+	ptr = mmap(0, p.cq_off.events + p.cq_entries * sizeof(struct io_event),
+			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+			fd, IORING_OFF_CQ_RING);
+	printf("cq_ring ptr = 0x%p\n", ptr);
+	cring->head = ptr + p.cq_off.head;
+	cring->tail = ptr + p.cq_off.tail;
+	cring->ring_mask = ptr + p.cq_off.ring_mask;
+	cring->ring_entries = ptr + p.cq_off.ring_entries;
+	cring->events = ptr + p.cq_off.events;
+	cq_ring_mask = *cring->ring_mask;
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
 	struct submitter *s = &submitters[0];
 	unsigned long done, calls, reap, cache_hit, cache_miss;
-	int flags = 0, err;
-	int j;
-	size_t size;
-	void *p, *ret;
+	int err, i;
 	struct rlimit rlim;
+	void *ret;
 
 	if (argc < 2) {
 		printf("%s: filename\n", argv[0]);
@@ -351,58 +433,24 @@ int main(int argc, char *argv[])
 
 	arm_sig_int();
 
-	size = sizeof(struct iocb) * DEPTH;
-	if (posix_memalign(&p, 4096, size))
-		return 1;
-	memset(p, 0, size);
-	s->iocbs = p;
+	for (i = 0; i < DEPTH; i++) {
+		void *buf;
 
-	size = sizeof(struct aio_sq_ring) + DEPTH * sizeof(u32);
-	if (posix_memalign(&p, 4096, size))
-		return 1;
-	s->sq_ring = p;
-	memset(p, 0, size);
-	s->sq_ring->nr_events = DEPTH;
-	s->sq_ring->iocbs = (u64) s->iocbs;
-
-	/* CQ ring must be twice as big */
-	size = sizeof(struct aio_cq_ring) +
-			2 * DEPTH * sizeof(struct io_event);
-	if (posix_memalign(&p, 4096, size))
-		return 1;
-	s->cq_ring = p;
-	memset(p, 0, size);
-	s->cq_ring->nr_events = 2 * DEPTH;
-
-	for (j = 0; j < DEPTH; j++) {
-		struct iocb *iocb = &s->iocbs[j];
-
-		if (posix_memalign(&iocb->u.c.buf, BS, BS)) {
+		if (posix_memalign(&buf, BS, BS)) {
 			printf("failed alloc\n");
 			return 1;
 		}
-		iocb->u.c.nbytes = BS;
-	}
-
-	flags = IOCTX_FLAG_SCQRING;
-	if (polled)
-		flags |= IOCTX_FLAG_IOPOLL;
-	if (fixedbufs)
-		flags |= IOCTX_FLAG_FIXEDBUFS;
-	if (buffered)
-		flags |= IOCTX_FLAG_SQWQ;
-	else if (sq_thread) {
-		flags |= IOCTX_FLAG_SQTHREAD;
-		s->sq_ring->sq_thread_cpu = sq_thread_cpu;
+		s->iovecs[i].iov_base = buf;
+		s->iovecs[i].iov_len = BS;
 	}
 
-	err = io_setup2(DEPTH, flags, s->sq_ring, s->cq_ring, &s->ioc);
+	err = setup_ring(s);
 	if (err) {
-		printf("ctx_init failed: %s, %d\n", strerror(errno), err);
+		printf("ring setup failed: %s, %d\n", strerror(errno), err);
 		return 1;
 	}
-	printf("polled=%d, fixedbufs=%d, buffered=%d\n", polled, fixedbufs, buffered);
-	printf("  QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, s->sq_ring->nr_events, s->cq_ring->nr_events);
+	printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered);
+	printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
 	strcpy(s->filename, argv[1]);
 
 	pthread_create(&s->thread, NULL, submitter_fn, s);
@@ -437,7 +485,7 @@ int main(int argc, char *argv[])
 		}
 		printf("IOPS=%lu, IOS/call=%lu/%lu, inflight=%u (head=%u tail=%u), Cachehit=%0.2f%%\n",
 				this_done - done, rpc, ipc, s->inflight,
-				s->cq_ring->head, s->cq_ring->tail, hit);
+				*s->cq_ring.head, *s->cq_ring.tail, hit);
 		done = this_done;
 		calls = this_call;
 		reap = this_reap;



[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux