On Wed, Dec 7, 2022 at 12:48 AM Daniel Borkmann <daniel@xxxxxxxxxxxxx> wrote: > > On 12/6/22 10:08 AM, Magnus Karlsson wrote: > > From: Magnus Karlsson <magnus.karlsson@xxxxxxxxx> > > > > Get rid of our own homegrown assembly store/release and load/acquire > > implementations. Use the HW agnositic APIs the compiler offers > > instead. > > The description is a bit terse. Could you add a bit more context, discussion > or reference on why it's safe to replace them with C11 atomics? Will do, though I will hold off on a v2 in case there are further comments. > > Signed-off-by: Magnus Karlsson <magnus.karlsson@xxxxxxxxx> > > --- > > tools/testing/selftests/bpf/xsk.h | 80 ++----------------------------- > > 1 file changed, 4 insertions(+), 76 deletions(-) > > > > diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h > > index 997723b0bfb2..24ee765aded3 100644 > > --- a/tools/testing/selftests/bpf/xsk.h > > +++ b/tools/testing/selftests/bpf/xsk.h > > @@ -23,77 +23,6 @@ > > extern "C" { > > #endif > > > > -/* This whole API has been deprecated and moved to libxdp that can be found at > > - * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so > > - * it should just be linking with libxdp instead of libbpf for this set of > > - * functionality. If not, please submit a bug report on the aforementioned page. > > - */ > > - > > -/* Load-Acquire Store-Release barriers used by the XDP socket > > - * library. The following macros should *NOT* be considered part of > > - * the xsk.h API, and is subject to change anytime. > > - * > > - * LIBRARY INTERNAL > > - */ > > - > > -#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x) > > -#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) > > - > > -#if defined(__i386__) || defined(__x86_64__) > > -# define libbpf_smp_store_release(p, v) \ > > - do { \ > > - asm volatile("" : : : "memory"); \ > > - __XSK_WRITE_ONCE(*p, v); \ > > - } while (0) > > -# define libbpf_smp_load_acquire(p) \ > > - ({ \ > > - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ > > - asm volatile("" : : : "memory"); \ > > - ___p1; \ > > - }) > > -#elif defined(__aarch64__) > > -# define libbpf_smp_store_release(p, v) \ > > - asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory") > > -# define libbpf_smp_load_acquire(p) \ > > - ({ \ > > - typeof(*p) ___p1; \ > > - asm volatile ("ldar %w0, %1" \ > > - : "=r" (___p1) : "Q" (*p) : "memory"); \ > > - ___p1; \ > > - }) > > -#elif defined(__riscv) > > -# define libbpf_smp_store_release(p, v) \ > > - do { \ > > - asm volatile ("fence rw,w" : : : "memory"); \ > > - __XSK_WRITE_ONCE(*p, v); \ > > - } while (0) > > -# define libbpf_smp_load_acquire(p) \ > > - ({ \ > > - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ > > - asm volatile ("fence r,rw" : : : "memory"); \ > > - ___p1; \ > > - }) > > -#endif > > - > > -#ifndef libbpf_smp_store_release > > -#define libbpf_smp_store_release(p, v) \ > > - do { \ > > - __sync_synchronize(); \ > > - __XSK_WRITE_ONCE(*p, v); \ > > - } while (0) > > -#endif > > - > > -#ifndef libbpf_smp_load_acquire > > -#define libbpf_smp_load_acquire(p) \ > > - ({ \ > > - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ > > - __sync_synchronize(); \ > > - ___p1; \ > > - }) > > -#endif > > - > > -/* LIBRARY INTERNAL -- END */ > > - > > /* Do not access these members directly. Use the functions below. */ > > #define DEFINE_XSK_RING(name) \ > > struct name { \ > > @@ -168,7 +97,7 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) > > * this function. Without this optimization it whould have been > > * free_entries = r->cached_prod - r->cached_cons + r->size. > > */ > > - r->cached_cons = libbpf_smp_load_acquire(r->consumer); > > + r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE); > > r->cached_cons += r->size; > > > > return r->cached_cons - r->cached_prod; > > @@ -179,7 +108,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) > > __u32 entries = r->cached_prod - r->cached_cons; > > > > if (entries == 0) { > > - r->cached_prod = libbpf_smp_load_acquire(r->producer); > > + r->cached_prod = __atomic_load_n(r->producer, __ATOMIC_ACQUIRE); > > entries = r->cached_prod - r->cached_cons; > > } > > > > @@ -202,7 +131,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) > > /* Make sure everything has been written to the ring before indicating > > * this to the kernel by writing the producer pointer. > > */ > > - libbpf_smp_store_release(prod->producer, *prod->producer + nb); > > + __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE); > > } > > > > static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) > > @@ -227,8 +156,7 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb) > > /* Make sure data has been read before indicating we are done > > * with the entries by updating the consumer pointer. > > */ > > - libbpf_smp_store_release(cons->consumer, *cons->consumer + nb); > > - > > + __atomic_store_n(cons->consumer, *cons->consumer + nb, __ATOMIC_RELEASE); > > } > > > > static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) > > >