Atomic operations on ARM

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 13 Feb 2008, Lennart Poettering wrote:
>
> On Fri, 01.02.08 15:22, Jyri Sarha (lepbtetfmvbz at spammotel.com) wrote:
>
...
>
> All code I saw that makes use of the kernel helper function calls it
> in a loop. (At least that's what I remember)
>

The same is true in many cases for pa_atomic_cmpxchg too. That is 
why the idea of having another loop inside pa_atomic_cmpxchg felt a
bit ugly to me at first.

>> So what should be done?
>>
>> 1. Change the above line in pulsecore/async.c to use pa_atomic_store
>> instead and try to look if there are other similar places.
>>
>> 2. Write loops like above to ARM specific implementations atomic
>> compare and exchange.
>
> I'd certainly vote for #2. I see no real drawbacks on this.
>
>> Any way I'll produce a proper ARM atomic ops patch as soon as I am happy
>> with it. However it may take a while because I am still only learning
>> the autoconf magic and I have some other tasks I should take care of too.
>
> Every patch greatly appreciated!
>

Here is my atomic ops patch implemented according to option #2. The patch
should apply cleanly to 0.9.9 release and probably to PA SVN HEAD too. After
applying the patch the source compiles cleanly at least in our scratchbox
based development environment and what is most important the resulting binaries
seem to work correctly.

Arm binaries are usually cross compiled, thus trying to detect CPU or
operating system capabilities at compile time may give bad results. Because 
of this I added couple of configure flags so that wanted result can be
forced in all environments. Here are the options:

--enable-atomic-arm-memory-barrier
This should only be needed in SMP systems. Since I am not aware of any ARM
based SMP systems, this is disabled by default.

--disable-atomic-arm-linux-helpers
Disables usage of linux kernel helpers, inline assembler implementation or,
if that fails, libatomic_ops is used instead. By default kernel helpers are
always used when compiling for arm in a linux system.

Cheers,
 	Jyri

// Jyri Sarha -- my.name at nokia.com
-------------- next part --------------
diff --git a/configure.ac b/configure.ac
index 10111e1..e2b584e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -121,6 +121,43 @@ if test "x$GCC" = "xyes" ; then
     done
 fi
 
+# Native atomic operation support
+AC_ARG_ENABLE([atomic-arm-linux-helpers],
+    AC_HELP_STRING([--disable-atomic-arm-linux-helpers], [use inline asm or libatomic_ops instead]),
+        [
+            case "${enableval}" in
+                yes) atomic_arm_linux_helpers=yes ;;
+                no) atomic_arm_linux_helpers=no ;;
+                *) AC_MSG_ERROR(bad value ${enableval} for --disable-atomic-arm-linux-helpers) ;;
+            esac
+        ],
+        [atomic_arm_linux_helpers=auto])
+
+AC_ARG_ENABLE([atomic-arm-memory-barrier],
+    AC_HELP_STRING([--enable-atomic-arm-memory-barrier], [only really needed in SMP arm systems]),
+        [
+            case "${enableval}" in
+                yes) AC_DEFINE_UNQUOTED(ATOMIC_ARM_MEMORY_BARRIER_ENABLED, 1, [Enable memory barriers]) ;;
+                no) ;;
+                *) AC_MSG_ERROR(bad value ${enableval} for --disable-atomic-arm-linux-helpers) ;;
+            esac
+        ],)
+
+AC_MSG_CHECKING([target operating system])
+case $host in
+    	*-*-linux*)
+	    AC_MSG_RESULT([linux]) 
+	    pulse_target_os=linux
+    	;;
+	*)
+	    AC_MSG_RESULT([unknown]) 	   
+	    pulse_target_os=unknown
+	;;
+esac
+
+# If everything else fails use libatomic_ops
+need_libatomic_ops=yes
+
 AC_MSG_CHECKING([whether $CC knows __sync_bool_compare_and_swap()])
 AC_LANG_CONFTEST([int main() { int a = 4; __sync_bool_compare_and_swap(&a, 4, 5); }])
 $CC conftest.c $CFLAGS -o conftest > /dev/null 2> /dev/null
@@ -129,8 +166,53 @@ rm -f conftest.o conftest
 if test $ret -eq 0 ; then
     AC_DEFINE([HAVE_ATOMIC_BUILTINS], 1, [Have __sync_bool_compare_and_swap() and friends.])
     AC_MSG_RESULT([yes])
+    need_libatomic_ops=no
 else
     AC_MSG_RESULT([no])
+    # HW specific atomic ops stuff 
+    AC_MSG_CHECKING([architecture for native atomic operations])
+    case $host_cpu in	
+        arm*)
+	    AC_MSG_RESULT([arm])
+	    AC_MSG_CHECKING([whether we can use Linux kernel helpers])
+	    # The Linux kernel helper functions have been there since 2.6.16. However
+  	    # compile time checking for kernel version in cross compile environment 
+	    # (which is usually the case for arm cpu) is tricky (or impossible).
+	    if test "x$pulse_target_os" = "xlinux" && test "x$atomic_arm_linux_helpers" != "xno"; then
+	        AC_MSG_RESULT([yes])
+        	AC_DEFINE_UNQUOTED(ATOMIC_ARM_LINUX_HELPERS, 1, [special arm linux implementation])
+    		need_libatomic_ops=no
+	    else
+	       AC_MSG_RESULT([no])
+	       AC_MSG_CHECKING([compiler support for arm inline asm atomic operations])
+	       AC_LANG_CONFTEST([[int main() 
+	       { 
+                   volatile int a=0;
+	           int o=0, n=1, r;
+	           asm volatile ("ldrex	%0, [%1]\n"
+			 	 "subs	%0, %0, %2\n"
+			 	 "strexeq %0, %3, [%1]\n"
+			 	 : "=&r" (r)
+			 	 : "r" (&a), "Ir" (o), "r" (n)
+			 	 : "cc");
+                   return (a==1 ? 0 : -1);
+	       }]])
+	       $CC conftest.c $CFLAGS -o conftest > /dev/null 2>&1 
+	       ret=$?
+	       rm -f conftest.o conftest
+	       if test $ret -eq 0 ; then
+	       	   AC_DEFINE([ATOMIC_ARM_INLINE_ASM], 1, [Have ARMv6 instructions.])
+	    	   AC_MSG_RESULT([yes])
+		   need_libatomic_ops=no
+	       else
+	    	   AC_MSG_RESULT([no])	       
+               fi
+	   fi
+      	;;
+        *)
+	    AC_MSG_RESULT([unknown])
+        ;;
+    esac
 fi
 
 AC_MSG_CHECKING([whether $CC knows __thread])
@@ -401,13 +483,19 @@ AC_SUBST(LIBSNDFILE_LIBS)
 
 #### atomic-ops ###
 
-AC_CHECK_HEADERS([atomic_ops.h], [], [
-AC_MSG_ERROR([*** libatomic-ops headers not found])
-])
-
-# Win32 does not need the lib and breaks horribly if we try to include it
-if test "x$os_is_win32" != "x1" ; then
-    LIBS="$LIBS -latomic_ops"
+AC_MSG_CHECKING([whether we need libatomic_ops])		
+if test "x$need_libatomic_ops" = "xyes"; then
+   AC_MSG_RESULT([yes])
+   AC_CHECK_HEADERS([atomic_ops.h], [], [
+   AC_MSG_ERROR([*** libatomic-ops headers not found])
+   ])
+
+   # Win32 does not need the lib and breaks horribly if we try to include it
+   if test "x$os_is_win32" != "x1" ; then
+       LIBS="$LIBS -latomic_ops"
+   fi
+else
+   AC_MSG_RESULT([no])
 fi
 
 #### Libsamplerate support (optional) ####
diff --git a/src/pulsecore/atomic.h b/src/pulsecore/atomic.h
index a358501..b4516e9 100644
--- a/src/pulsecore/atomic.h
+++ b/src/pulsecore/atomic.h
@@ -182,6 +182,235 @@ static inline int pa_atomic_ptr_cmpxchg(pa_atomic_ptr_t *a, void *old_p, void* n
     return result;
 }
 
+#elif defined(ATOMIC_ARM_INLINE_ASM)
+
+/*
+   These should only be enabled if we have ARMv6 or better.  
+*/
+
+typedef struct pa_atomic {
+    volatile int value;
+} pa_atomic_t;
+
+#define PA_ATOMIC_INIT(v) { .value = (v) }
+
+static inline void pa_memory_barrier(void) {
+#ifdef ATOMIC_ARM_MEMORY_BARRIER_ENABLED
+    asm volatile ("mcr	p15, 0, r0, c7, c10, 5	@ dmb");
+#endif
+}
+
+static inline int pa_atomic_load(const pa_atomic_t *a) {
+    pa_memory_barrier();
+    return a->value;
+}
+
+static inline void pa_atomic_store(pa_atomic_t *a, int i) {
+    a->value = i;
+    pa_memory_barrier();
+}
+
+/* Returns the previously set value */
+static inline int pa_atomic_add(pa_atomic_t *a, int i) {
+    unsigned long not_exclusive;
+    int new_val, old_val;
+
+    pa_memory_barrier();
+    do {
+        asm volatile ("ldrex	%0, [%3]\n"
+		      "add 	%2, %0, %4\n"
+		      "strex	%1, %2, [%3]\n"
+		      : "=&r" (old_val), "=&r" (not_exclusive), "=&r" (new_val)
+		      : "r" (&a->value), "Ir" (i)
+		      : "cc");
+    } while(not_exclusive);
+    pa_memory_barrier();
+
+    return old_val;
+}
+
+/* Returns the previously set value */
+static inline int pa_atomic_sub(pa_atomic_t *a, int i) {
+    unsigned long not_exclusive;
+    int new_val, old_val;
+
+    pa_memory_barrier();
+    do {
+        asm volatile ("ldrex	%0, [%3]\n"
+		      "sub 	%2, %0, %4\n"
+		      "strex	%1, %2, [%3]\n"
+		      : "=&r" (old_val), "=&r" (not_exclusive), "=&r" (new_val)
+		      : "r" (&a->value), "Ir" (i)
+		      : "cc");
+    } while(not_exclusive);
+    pa_memory_barrier();
+
+    return old_val;
+}
+
+static inline int pa_atomic_inc(pa_atomic_t *a) {
+    return pa_atomic_add(a, 1);
+}
+
+static inline int pa_atomic_dec(pa_atomic_t *a) {
+    return pa_atomic_sub(a, 1);
+}
+
+static inline int pa_atomic_cmpxchg(pa_atomic_t *a, int old_i, int new_i) {
+    unsigned long not_equal, not_exclusive;
+
+    pa_memory_barrier();
+    do {
+        asm volatile ("ldrex	%0, [%2]\n"
+		      "subs	%0, %0, %3\n"
+		      "mov	%1, %0\n"
+		      "strexeq %0, %4, [%2]\n"
+		      : "=&r" (not_exclusive), "=&r" (not_equal)
+		      : "r" (&a->value), "Ir" (old_i), "r" (new_i)
+		      : "cc");
+    } while(not_exclusive && !not_equal);
+    pa_memory_barrier();
+
+    return !not_equal;
+}
+
+typedef struct pa_atomic_ptr {
+    volatile unsigned long value;
+} pa_atomic_ptr_t;
+
+#define PA_ATOMIC_PTR_INIT(v) { .value = (long) (v) }
+
+static inline void* pa_atomic_ptr_load(const pa_atomic_ptr_t *a) {
+    pa_memory_barrier();
+    return (void*) a->value;
+}
+
+static inline void pa_atomic_ptr_store(pa_atomic_ptr_t *a, void *p) {
+    a->value = (unsigned long) p;
+    pa_memory_barrier();
+}
+
+static inline int pa_atomic_ptr_cmpxchg(pa_atomic_ptr_t *a, void *old_p, void* new_p) {
+    unsigned long not_equal, not_exclusive;
+
+    pa_memory_barrier();
+    do {
+        asm volatile ("ldrex	%0, [%2]\n"
+		      "subs	%0, %0, %3\n"
+		      "mov	%1, %0\n"
+		      "strexeq %0, %4, [%2]\n"
+		      : "=&r" (not_exclusive), "=&r" (not_equal)
+		      : "r" (&a->value), "Ir" (old_p), "r" (new_p)
+		      : "cc");
+    } while(not_exclusive && !not_equal);
+    pa_memory_barrier();
+
+    return !not_equal;
+}
+
+#elif defined(ATOMIC_ARM_LINUX_HELPERS)
+
+/* See file arch/arm/kernel/entry-armv.S in your kernel sources for more 
+   information about these functions. The arm kernel helper functions first
+   appeared in 2.6.16.
+   Apply --disable-atomic-arm-linux-helpers flag to confugure if you prefere 
+   inline asm implementation or you have an obsolete Linux kernel.
+*/
+/* Memory barrier */
+typedef void (__kernel_dmb_t)(void);
+#define __kernel_dmb (*(__kernel_dmb_t *)0xffff0fa0)
+
+static inline void pa_memory_barrier(void) {
+#ifndef ATOMIC_ARM_MEMORY_BARRIER_ENABLED
+    __kernel_dmb();
+#endif
+}
+
+/* Atomic exchange (__kernel_cmpxchg_t contains memory barriers if needed) */
+typedef int (__kernel_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
+#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
+
+/* This is just to get rid of all warnings */
+typedef int (__kernel_cmpxchg_u_t)(unsigned long oldval, unsigned long newval, volatile unsigned long *ptr);
+#define __kernel_cmpxchg_u (*(__kernel_cmpxchg_u_t *)0xffff0fc0)
+
+typedef struct pa_atomic {
+    volatile int value;
+} pa_atomic_t;
+
+#define PA_ATOMIC_INIT(v) { .value = (v) }
+
+static inline int pa_atomic_load(const pa_atomic_t *a) {
+    pa_memory_barrier();
+    return a->value;
+}
+
+static inline void pa_atomic_store(pa_atomic_t *a, int i) {
+    a->value = i;
+    pa_memory_barrier();  
+}
+
+/* Returns the previously set value */
+static inline int pa_atomic_add(pa_atomic_t *a, int i) {
+    int old_val;
+    do {
+	old_val = a->value;
+    } while(__kernel_cmpxchg(old_val, old_val + i, &a->value));
+    return old_val;
+}
+
+/* Returns the previously set value */
+static inline int pa_atomic_sub(pa_atomic_t *a, int i) {
+    int old_val;
+    do {
+	old_val = a->value;
+    } while(__kernel_cmpxchg(old_val, old_val - i, &a->value));
+    return old_val;
+}
+
+/* Returns the previously set value */
+static inline int pa_atomic_inc(pa_atomic_t *a) {
+    return pa_atomic_add(a, 1);
+}
+
+/* Returns the previously set value */
+static inline int pa_atomic_dec(pa_atomic_t *a) {
+    return pa_atomic_sub(a, 1);
+}
+
+/* Returns non-zero when the operation was successful. */
+static inline int pa_atomic_cmpxchg(pa_atomic_t *a, int old_i, int new_i) {
+    int failed = 1;
+    do { 
+      failed = __kernel_cmpxchg(old_i, new_i, &a->value);
+    } while(failed && a->value == old_i);
+    return !failed;
+}
+
+typedef struct pa_atomic_ptr {
+    volatile unsigned long value;
+} pa_atomic_ptr_t;
+
+#define PA_ATOMIC_PTR_INIT(v) { .value = (unsigned long) (v) }
+
+static inline void* pa_atomic_ptr_load(const pa_atomic_ptr_t *a) {
+    pa_memory_barrier();
+    return (void*) a->value;
+}
+
+static inline void pa_atomic_ptr_store(pa_atomic_ptr_t *a, void *p) {
+    a->value = (unsigned long) p;
+    pa_memory_barrier();
+}
+
+static inline int pa_atomic_ptr_cmpxchg(pa_atomic_ptr_t *a, void *old_p, void* new_p) {
+    int failed = 1;
+    do { 
+	failed = __kernel_cmpxchg_u((unsigned long) old_p, (unsigned long) new_p, &a->value);
+    } while(failed && a->value == old_p);
+    return !failed;
+}
+
 #else
 
 /* libatomic_ops based implementation */


[Index of Archives]     [Linux Audio Users]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux