[PATCH liburing 2/3] src/include/liburing/barrier.h: Use C11 atomics

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Instead of using a combination of open-coding atomic primitives and using
gcc builtins, use C11 atomics for all CPU architectures. Note: despite their
name, atomic_*() operations do not necessarily translate into an atomic
instruction. This patch changes the order of the instructions in e.g.
io_uring_get_sqe() but not the number of instructions generated by gcc 10
on x86_64:

Without this patch:

   0x0000000000000360 <+0>:     mov    0x44(%rdi),%eax
   0x0000000000000363 <+3>:     lea    0x1(%rax),%edx
   0x0000000000000366 <+6>:     mov    (%rdi),%rax
   0x0000000000000369 <+9>:     mov    (%rax),%eax
   0x000000000000036b <+11>:    mov    0x18(%rdi),%rcx
   0x000000000000036f <+15>:    mov    %edx,%esi
   0x0000000000000371 <+17>:    sub    %eax,%esi
   0x0000000000000373 <+19>:    xor    %eax,%eax
   0x0000000000000375 <+21>:    cmp    (%rcx),%esi
   0x0000000000000377 <+23>:    ja     0x38d <io_uring_get_sqe+45>
   0x0000000000000379 <+25>:    mov    0x10(%rdi),%rax
   0x000000000000037d <+29>:    mov    (%rax),%eax
   0x000000000000037f <+31>:    and    0x44(%rdi),%eax
   0x0000000000000382 <+34>:    mov    %edx,0x44(%rdi)
   0x0000000000000385 <+37>:    shl    $0x6,%rax
   0x0000000000000389 <+41>:    add    0x38(%rdi),%rax
   0x000000000000038d <+45>:    retq

With this patch applied:

   0x0000000000000360 <+0>:     mov    0x44(%rdi),%eax
   0x0000000000000363 <+3>:     lea    0x1(%rax),%edx
   0x0000000000000366 <+6>:     mov    (%rdi),%rax
   0x0000000000000369 <+9>:     mov    %edx,%esi
   0x000000000000036b <+11>:    mov    (%rax),%eax
   0x000000000000036d <+13>:    sub    %eax,%esi
   0x000000000000036f <+15>:    xor    %eax,%eax
   0x0000000000000371 <+17>:    mov    0x18(%rdi),%rcx
   0x0000000000000375 <+21>:    cmp    (%rcx),%esi
   0x0000000000000377 <+23>:    ja     0x38d <io_uring_get_sqe+45>
   0x0000000000000379 <+25>:    mov    0x10(%rdi),%rax
   0x000000000000037d <+29>:    mov    (%rax),%eax
   0x000000000000037f <+31>:    and    0x44(%rdi),%eax
   0x0000000000000382 <+34>:    mov    %edx,0x44(%rdi)
   0x0000000000000385 <+37>:    shl    $0x6,%rax
   0x0000000000000389 <+41>:    add    0x38(%rdi),%rax
   0x000000000000038d <+45>:    retq

Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 src/include/liburing/barrier.h | 44 ++++++++--------------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

diff --git a/src/include/liburing/barrier.h b/src/include/liburing/barrier.h
index ad69506bb248..c8aa4210371c 100644
--- a/src/include/liburing/barrier.h
+++ b/src/include/liburing/barrier.h
@@ -2,6 +2,8 @@
 #ifndef LIBURING_BARRIER_H
 #define LIBURING_BARRIER_H
 
+#include <stdatomic.h>
+
 /*
 From the kernel documentation file refcount-vs-atomic.rst:
 
@@ -21,40 +23,14 @@ after the acquire operation executes. This is implemented using
 :c:func:`smp_acquire__after_ctrl_dep`.
 */
 
-/* From tools/include/linux/compiler.h */
-/* Optimization barrier */
-/* The "volatile" is due to gcc bugs */
-#define io_uring_barrier()	__asm__ __volatile__("": : :"memory")
-
-/* From tools/virtio/linux/compiler.h */
-#define IO_URING_WRITE_ONCE(var, val) \
-	(*((volatile __typeof(val) *)(&(var))) = (val))
-#define IO_URING_READ_ONCE(var) (*((volatile __typeof(var) *)(&(var))))
-
+#define IO_URING_WRITE_ONCE(var, val)				\
+	atomic_store_explicit(&(var), (val), memory_order_relaxed)
+#define IO_URING_READ_ONCE(var)					\
+	atomic_load_explicit(&(var), memory_order_relaxed)
 
-#if defined(__x86_64__) || defined(__i386__)
-/* Adapted from arch/x86/include/asm/barrier.h */
-#define io_uring_smp_store_release(p, v)	\
-do {						\
-	io_uring_barrier();			\
-	IO_URING_WRITE_ONCE(*(p), (v));		\
-} while (0)
-
-#define io_uring_smp_load_acquire(p)			\
-({							\
-	__typeof(*p) ___p1 = IO_URING_READ_ONCE(*(p));	\
-	io_uring_barrier();				\
-	___p1;						\
-})
-
-#else /* defined(__x86_64__) || defined(__i386__) */
-/*
- * Add arch appropriate definitions. Use built-in atomic operations for
- * archs we don't have support for.
- */
-#define io_uring_smp_store_release(p, v) \
-	__atomic_store_n(p, v, __ATOMIC_RELEASE)
-#define io_uring_smp_load_acquire(p) __atomic_load_n(p, __ATOMIC_ACQUIRE)
-#endif /* defined(__x86_64__) || defined(__i386__) */
+#define io_uring_smp_store_release(p, v)			\
+	atomic_store_explicit((p), (v), memory_order_release)
+#define io_uring_smp_load_acquire(p)				\
+	atomic_load_explicit((p), memory_order_acquire)
 
 #endif /* defined(LIBURING_BARRIER_H) */



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux