[PATCH] tools/io_uring: Fix memory ordering

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Order head and tail stores properly against CQE / SQE memory accesses.
Use <asm/barrier.h> instead of the io_uring "barrier.h" header file.

Cc: Roman Penyaev <rpenyaev@xxxxxxx>
Cc: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 tools/io_uring/Makefile         |  2 +-
 tools/io_uring/barrier.h        | 16 ---------------
 tools/io_uring/io_uring-bench.c | 20 +++++++++---------
 tools/io_uring/liburing.h       |  9 ++++-----
 tools/io_uring/queue.c          | 36 +++++++++++----------------------
 5 files changed, 26 insertions(+), 57 deletions(-)
 delete mode 100644 tools/io_uring/barrier.h

diff --git a/tools/io_uring/Makefile b/tools/io_uring/Makefile
index 00f146c54c53..bbec91c6274c 100644
--- a/tools/io_uring/Makefile
+++ b/tools/io_uring/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for io_uring test tools
-CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE
+CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE -I../include
 LDLIBS += -lpthread
 
 all: io_uring-cp io_uring-bench
diff --git a/tools/io_uring/barrier.h b/tools/io_uring/barrier.h
deleted file mode 100644
index ef00f6722ba9..000000000000
--- a/tools/io_uring/barrier.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef LIBURING_BARRIER_H
-#define LIBURING_BARRIER_H
-
-#if defined(__x86_64) || defined(__i386__)
-#define read_barrier()	__asm__ __volatile__("":::"memory")
-#define write_barrier()	__asm__ __volatile__("":::"memory")
-#else
-/*
- * Add arch appropriate definitions. Be safe and use full barriers for
- * archs we don't have support for.
- */
-#define read_barrier()	__sync_synchronize()
-#define write_barrier()	__sync_synchronize()
-#endif
-
-#endif
diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
index 0f257139b003..29971a2a1c74 100644
--- a/tools/io_uring/io_uring-bench.c
+++ b/tools/io_uring/io_uring-bench.c
@@ -28,9 +28,9 @@
 #include <string.h>
 #include <pthread.h>
 #include <sched.h>
+#include <asm/barrier.h>
 
 #include "liburing.h"
-#include "barrier.h"
 
 #define min(a, b)		((a < b) ? (a) : (b))
 
@@ -199,8 +199,7 @@ static int prep_more_ios(struct submitter *s, unsigned max_ios)
 	next_tail = tail = *ring->tail;
 	do {
 		next_tail++;
-		read_barrier();
-		if (next_tail == *ring->head)
+		if (next_tail == smp_load_acquire(ring->head))
 			break;
 
 		index = tail & sq_ring_mask;
@@ -211,10 +210,11 @@ static int prep_more_ios(struct submitter *s, unsigned max_ios)
 	} while (prepped < max_ios);
 
 	if (*ring->tail != tail) {
-		/* order tail store with writes to sqes above */
-		write_barrier();
-		*ring->tail = tail;
-		write_barrier();
+		/*
+		 * Ensure that the kernel sees the SQE updates before it sees
+		 * the tail update.
+		 */
+		smp_store_release(ring->tail, tail);
 	}
 	return prepped;
 }
@@ -251,8 +251,7 @@ static int reap_events(struct submitter *s)
 	do {
 		struct file *f;
 
-		read_barrier();
-		if (head == *ring->tail)
+		if (head == smp_load_acquire(ring->tail))
 			break;
 		cqe = &ring->cqes[head & cq_ring_mask];
 		if (!do_nop) {
@@ -270,8 +269,7 @@ static int reap_events(struct submitter *s)
 	} while (1);
 
 	s->inflight -= reaped;
-	*ring->head = head;
-	write_barrier();
+	smp_store_release(ring->head, head);
 	return reaped;
 }
 
diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
index 5f305c86b892..15b29bfac811 100644
--- a/tools/io_uring/liburing.h
+++ b/tools/io_uring/liburing.h
@@ -10,7 +10,7 @@ extern "C" {
 #include <string.h>
 #include "../../include/uapi/linux/io_uring.h"
 #include <inttypes.h>
-#include "barrier.h"
+#include <asm/barrier.h>
 
 /*
  * Library interface to io_uring
@@ -82,12 +82,11 @@ static inline void io_uring_cqe_seen(struct io_uring *ring,
 	if (cqe) {
 		struct io_uring_cq *cq = &ring->cq;
 
-		(*cq->khead)++;
 		/*
-		 * Ensure that the kernel sees our new head, the kernel has
-		 * the matching read barrier.
+		 * Ensure that the kernel only sees the new value of the head
+		 * index after the CQEs have been read.
 		 */
-		write_barrier();
+		smp_store_release(cq->khead, *cq->khead + 1);
 	}
 }
 
diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c
index 321819c132c7..ada05bc74361 100644
--- a/tools/io_uring/queue.c
+++ b/tools/io_uring/queue.c
@@ -4,9 +4,9 @@
 #include <unistd.h>
 #include <errno.h>
 #include <string.h>
+#include <asm/barrier.h>
 
 #include "liburing.h"
-#include "barrier.h"
 
 static int __io_uring_get_cqe(struct io_uring *ring,
 			      struct io_uring_cqe **cqe_ptr, int wait)
@@ -20,14 +20,12 @@ static int __io_uring_get_cqe(struct io_uring *ring,
 	head = *cq->khead;
 	do {
 		/*
-		 * It's necessary to use a read_barrier() before reading
-		 * the CQ tail, since the kernel updates it locklessly. The
-		 * kernel has the matching store barrier for the update. The
-		 * kernel also ensures that previous stores to CQEs are ordered
+		 * It's necessary to use smp_load_acquire() to read the CQ
+		 * tail. The kernel uses smp_store_release() for updating the
+		 * CQ tail to ensure that previous stores to CQEs are ordered
 		 * with the tail update.
 		 */
-		read_barrier();
-		if (head != *cq->ktail) {
+		if (head != smp_load_acquire(cq->ktail)) {
 			*cqe_ptr = &cq->cqes[head & mask];
 			break;
 		}
@@ -73,12 +71,11 @@ int io_uring_submit(struct io_uring *ring)
 	int ret;
 
 	/*
-	 * If we have pending IO in the kring, submit it first. We need a
-	 * read barrier here to match the kernels store barrier when updating
-	 * the SQ head.
+	 * If we have pending IO in the kring, submit it first. We need
+	 * smp_load_acquire() here to match the kernels smp_store_release()
+	 * when updating the SQ head.
 	 */
-	read_barrier();
-	if (*sq->khead != *sq->ktail) {
+	if (smp_load_acquire(sq->khead) != *sq->ktail) {
 		submitted = *sq->kring_entries;
 		goto submit;
 	}
@@ -94,7 +91,6 @@ int io_uring_submit(struct io_uring *ring)
 	to_submit = sq->sqe_tail - sq->sqe_head;
 	while (to_submit--) {
 		ktail_next++;
-		read_barrier();
 
 		sq->array[ktail & mask] = sq->sqe_head & mask;
 		ktail = ktail_next;
@@ -108,18 +104,10 @@ int io_uring_submit(struct io_uring *ring)
 
 	if (*sq->ktail != ktail) {
 		/*
-		 * First write barrier ensures that the SQE stores are updated
-		 * with the tail update. This is needed so that the kernel
-		 * will never see a tail update without the preceeding sQE
-		 * stores being done.
+		 * Use smp_store_release() so that the kernel will never see a
+		 * tail update without the preceding sQE stores being done.
 		 */
-		write_barrier();
-		*sq->ktail = ktail;
-		/*
-		 * The kernel has the matching read barrier for reading the
-		 * SQ tail.
-		 */
-		write_barrier();
+		smp_store_release(sq->ktail, ktail);
 	}
 
 submit:
-- 
2.23.0.rc1.153.gdeed80330f-goog




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux