[PATCHv3] Implement gcc 64bit atomic operations and fix the broken 8/16 bit ones

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




Hi,

Here is the latest set of patch for the 8/16/64 bit atomic gcc builtins.

On the kernel side a new LWS is implemented allowing for variable size CAS. On gcc's side, the new LWS is used for 8/16/64 bit atomic builtins, keeping the old LWS for 32bit ops. Since 8 and 16 bit ops were broken, it's best to use the new LWS unconditionally for these.

Hopefully, these are the final patches !

Regards,
  Guy
2014-09-12  Guy Martin  <gmsoft@xxxxxxxxxxxx>

	* config/pa/linux-atomic.c: Use new LWS CAS implementation for atomic
	builtins. Implement 64bit atomic builtins.


Index: libgcc/config/pa/linux-atomic.c
===================================================================
--- libgcc/config/pa/linux-atomic.c	(revision 212942)
+++ libgcc/config/pa/linux-atomic.c	(working copy)
@@ -75,6 +75,30 @@
   return lws_errno;
 }
 
+static inline long
+__kernel_cmpxchg2 (void * oldval, void * newval, void *mem, int val_size)
+{
+  register unsigned long lws_mem asm("r26") = (unsigned long) (mem);
+  register long lws_ret   asm("r28");
+  register long lws_errno asm("r21");
+  register unsigned long lws_old asm("r25") = (unsigned long) oldval;
+  register unsigned long lws_new asm("r24") = (unsigned long) newval;
+  register int lws_size asm("r23") = val_size;
+  asm volatile (	"ble	0xb0(%%sr2, %%r0)	\n\t"
+			"ldi	%2, %%r20		\n\t"
+	: "=r" (lws_ret), "=r" (lws_errno)
+	: "i" (2), "r" (lws_mem), "r" (lws_old), "r" (lws_new), "r" (lws_size)
+	: "r1", "r20", "r22", "r29", "r31", "fr4", "memory"
+  );
+  if (__builtin_expect (lws_errno == -EFAULT || lws_errno == -ENOSYS, 0))
+    ABORT_INSTRUCTION;
+
+  /* If the kernel LWS call fails, retrun EBUSY */
+  if (!lws_errno && lws_ret)
+    lws_errno = -EBUSY;
+
+  return lws_errno;
+}
 #define HIDDEN __attribute__ ((visibility ("hidden")))
 
 /* Big endian masks  */
@@ -84,69 +108,101 @@
 #define MASK_1 0xffu
 #define MASK_2 0xffffu
 
-#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
-  int HIDDEN								\
-  __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
+#define FETCH_AND_OP_2(OP, PFX_OP, INF_OP, TYPE, WIDTH, INDEX)		\
+  TYPE HIDDEN								\
+  __sync_fetch_and_##OP##_##WIDTH (TYPE *ptr, TYPE val)			\
   {									\
-    int failure, tmp;							\
+    TYPE tmp, newval;							\
+    int failure;							\
 									\
     do {								\
       tmp = *ptr;							\
-      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+      newval = PFX_OP (tmp INF_OP val);					\
+      failure = __kernel_cmpxchg2 (&tmp, &newval, ptr, INDEX);		\
     } while (failure != 0);						\
 									\
     return tmp;								\
   }
 
-FETCH_AND_OP_WORD (add,   , +)
-FETCH_AND_OP_WORD (sub,   , -)
-FETCH_AND_OP_WORD (or,    , |)
-FETCH_AND_OP_WORD (and,   , &)
-FETCH_AND_OP_WORD (xor,   , ^)
-FETCH_AND_OP_WORD (nand, ~, &)
+FETCH_AND_OP_2 (add,   , +, long long, 8, 3)
+FETCH_AND_OP_2 (sub,   , -, long long, 8, 3)
+FETCH_AND_OP_2 (or,    , |, long long, 8, 3)
+FETCH_AND_OP_2 (and,   , &, long long, 8, 3)
+FETCH_AND_OP_2 (xor,   , ^, long long, 8, 3)
+FETCH_AND_OP_2 (nand, ~, &, long long, 8, 3)
 
-#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
-#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+FETCH_AND_OP_2 (add,   , +, unsigned short, 2, 1)
+FETCH_AND_OP_2 (sub,   , -, unsigned short, 2, 1)
+FETCH_AND_OP_2 (or,    , |, unsigned short, 2, 1)
+FETCH_AND_OP_2 (and,   , &, unsigned short, 2, 1)
+FETCH_AND_OP_2 (xor,   , ^, unsigned short, 2, 1)
+FETCH_AND_OP_2 (nand, ~, &, unsigned short, 2, 1)
 
-/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
-   subword-sized quantities.  */
+FETCH_AND_OP_2 (add,   , +, unsigned char, 1, 0)
+FETCH_AND_OP_2 (sub,   , -, unsigned char, 1, 0)
+FETCH_AND_OP_2 (or,    , |, unsigned char, 1, 0)
+FETCH_AND_OP_2 (and,   , &, unsigned char, 1, 0)
+FETCH_AND_OP_2 (xor,   , ^, unsigned char, 1, 0)
+FETCH_AND_OP_2 (nand, ~, &, unsigned char, 1, 0)
 
-#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN)	\
+#define OP_AND_FETCH_2(OP, PFX_OP, INF_OP, TYPE, WIDTH, INDEX)		\
   TYPE HIDDEN								\
-  NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val)			\
+  __sync_##OP##_and_fetch_##WIDTH (TYPE *ptr, TYPE val)			\
   {									\
-    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
-    unsigned int mask, shift, oldval, newval;				\
+    TYPE tmp, newval;							\
     int failure;							\
 									\
-    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
-    mask = MASK_##WIDTH << shift;					\
+    do {								\
+      tmp = *ptr;							\
+      newval = PFX_OP (tmp INF_OP val);					\
+      failure = __kernel_cmpxchg2 (&tmp, &newval, ptr, INDEX);		\
+    } while (failure != 0);						\
 									\
+    return PFX_OP (tmp INF_OP val);					\
+  }
+
+OP_AND_FETCH_2 (add,   , +, long long, 8, 3)
+OP_AND_FETCH_2 (sub,   , -, long long, 8, 3)
+OP_AND_FETCH_2 (or,    , |, long long, 8, 3)
+OP_AND_FETCH_2 (and,   , &, long long, 8, 3)
+OP_AND_FETCH_2 (xor,   , ^, long long, 8, 3)
+OP_AND_FETCH_2 (nand, ~, &, long long, 8, 3)
+
+OP_AND_FETCH_2 (add,   , +, unsigned short, 2, 1)
+OP_AND_FETCH_2 (sub,   , -, unsigned short, 2, 1)
+OP_AND_FETCH_2 (or,    , |, unsigned short, 2, 1)
+OP_AND_FETCH_2 (and,   , &, unsigned short, 2, 1)
+OP_AND_FETCH_2 (xor,   , ^, unsigned short, 2, 1)
+OP_AND_FETCH_2 (nand, ~, &, unsigned short, 2, 1)
+
+OP_AND_FETCH_2 (add,   , +, unsigned char, 1, 0)
+OP_AND_FETCH_2 (sub,   , -, unsigned char, 1, 0)
+OP_AND_FETCH_2 (or,    , |, unsigned char, 1, 0)
+OP_AND_FETCH_2 (and,   , &, unsigned char, 1, 0)
+OP_AND_FETCH_2 (xor,   , ^, unsigned char, 1, 0)
+OP_AND_FETCH_2 (nand, ~, &, unsigned char, 1, 0)
+
+#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
+  {									\
+    int failure, tmp;							\
+									\
     do {								\
-      oldval = *wordptr;						\
-      newval = ((PFX_OP (((oldval & mask) >> shift)			\
-                         INF_OP (unsigned int) val)) << shift) & mask;	\
-      newval |= oldval & ~mask;						\
-      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
+      tmp = *ptr;							\
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
     } while (failure != 0);						\
 									\
-    return (RETURN & mask) >> shift;					\
+    return tmp;								\
   }
 
-SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
+FETCH_AND_OP_WORD (add,   , +)
+FETCH_AND_OP_WORD (sub,   , -)
+FETCH_AND_OP_WORD (or,    , |)
+FETCH_AND_OP_WORD (and,   , &)
+FETCH_AND_OP_WORD (xor,   , ^)
+FETCH_AND_OP_WORD (nand, ~, &)
 
-SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
-
 #define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)				\
   int HIDDEN								\
   __sync_##OP##_and_fetch_4 (int *ptr, int val)				\
@@ -168,20 +224,42 @@
 OP_AND_FETCH_WORD (xor,   , ^)
 OP_AND_FETCH_WORD (nand, ~, &)
 
-SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
+typedef unsigned char bool;
 
-SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
+#define COMPARE_AND_SWAP_2(TYPE, WIDTH, INDEX)					\
+  TYPE HIDDEN									\
+  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, TYPE newval)	\
+  {										\
+    TYPE actual_oldval;								\
+    int fail;									\
+										\
+    while (1)									\
+      {										\
+	actual_oldval = *ptr;							\
+										\
+	if (__builtin_expect (oldval != actual_oldval, 0))			\
+	  return actual_oldval;							\
+										\
+	fail = __kernel_cmpxchg2 (&actual_oldval, &newval, ptr, INDEX);		\
+										\
+	if (__builtin_expect (!fail, 1))					\
+	  return actual_oldval;							\
+    }										\
+  }										\
+										\
+  bool HIDDEN									\
+  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,	TYPE newval)	\
+  {										\
+    int failure = __kernel_cmpxchg2 (&oldval, &newval, ptr, INDEX);		\
+    return (failure != 0);							\
+  }
 
+
+
+COMPARE_AND_SWAP_2(long long,      8, 3)
+COMPARE_AND_SWAP_2(unsigned short, 2, 1)
+COMPARE_AND_SWAP_2(unsigned char,  1, 0)
+
 int HIDDEN
 __sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
 {
@@ -201,41 +279,6 @@
     }
 }
 
-#define SUBWORD_VAL_CAS(TYPE, WIDTH)					\
-  TYPE HIDDEN								\
-  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
-				       TYPE newval)			\
-  {									\
-    int *wordptr = (int *)((unsigned long) ptr & ~3), fail;		\
-    unsigned int mask, shift, actual_oldval, actual_newval;		\
-									\
-    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
-    mask = MASK_##WIDTH << shift;					\
-									\
-    while (1)								\
-      {									\
-	actual_oldval = *wordptr;					\
-									\
-	if (__builtin_expect (((actual_oldval & mask) >> shift)		\
-			      != (unsigned int) oldval, 0))		\
-	  return (actual_oldval & mask) >> shift;			\
-									\
-	actual_newval = (actual_oldval & ~mask)				\
-			| (((unsigned int) newval << shift) & mask);	\
-									\
-	fail = __kernel_cmpxchg (actual_oldval, actual_newval,		\
-				 wordptr);				\
-									\
-	if (__builtin_expect (!fail, 1))				\
-	  return (actual_oldval & mask) >> shift;			\
-      }									\
-  }
-
-SUBWORD_VAL_CAS (unsigned short, 2)
-SUBWORD_VAL_CAS (unsigned char,  1)
-
-typedef unsigned char bool;
-
 bool HIDDEN
 __sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval)
 {
@@ -243,18 +286,25 @@
   return (failure == 0);
 }
 
-#define SUBWORD_BOOL_CAS(TYPE, WIDTH)					\
-  bool HIDDEN								\
-  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
-					TYPE newval)			\
+
+#define SYNC_LOCK_TEST_AND_SET_2(TYPE, WIDTH, INDEX)			\
+TYPE HIDDEN								\
+  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)		\
   {									\
-    TYPE actual_oldval							\
-      = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval);	\
-    return (oldval == actual_oldval);					\
+    TYPE oldval;							\
+    int failure;							\
+									\
+    do {								\
+      oldval = *ptr;							\
+      failure = __kernel_cmpxchg2 (&oldval, &val, ptr, INDEX);		\
+    } while (failure != 0);						\
+									\
+    return oldval;							\
   }
 
-SUBWORD_BOOL_CAS (unsigned short, 2)
-SUBWORD_BOOL_CAS (unsigned char,  1)
+SYNC_LOCK_TEST_AND_SET_2(long long,      8, 3)
+SYNC_LOCK_TEST_AND_SET_2(unsigned short, 2, 1)
+SYNC_LOCK_TEST_AND_SET_2(unsigned char,  1, 0)
 
 int HIDDEN
 __sync_lock_test_and_set_4 (int *ptr, int val)
@@ -269,29 +319,22 @@
   return oldval;
 }
 
-#define SUBWORD_TEST_AND_SET(TYPE, WIDTH)				\
-  TYPE HIDDEN								\
-  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)		\
-  {									\
-    int failure;							\
-    unsigned int oldval, newval, shift, mask;				\
-    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
-									\
-    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
-    mask = MASK_##WIDTH << shift;					\
-									\
-    do {								\
-      oldval = *wordptr;						\
-      newval = (oldval & ~mask)						\
-	       | (((unsigned int) val << shift) & mask);		\
-      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
-    } while (failure != 0);						\
-									\
-    return (oldval & mask) >> shift;					\
+
+#define SYNC_LOCK_RELEASE_2(TYPE, WIDTH, INDEX)			\
+  void HIDDEN							\
+  __sync_lock_release_##WIDTH (TYPE *ptr)			\
+  {								\
+    TYPE failure, oldval, zero = 0;				\
+								\
+    do {							\
+      oldval = *ptr;						\
+      failure = __kernel_cmpxchg2 (&oldval, &zero, ptr, INDEX);	\
+    } while (failure != 0);					\
   }
 
-SUBWORD_TEST_AND_SET (unsigned short, 2)
-SUBWORD_TEST_AND_SET (unsigned char,  1)
+SYNC_LOCK_RELEASE_2 (long long, 8, 3)
+SYNC_LOCK_RELEASE_2 (short,     2, 1)
+SYNC_LOCK_RELEASE_2 (char,      1, 0)
 
 void HIDDEN
 __sync_lock_release_4 (int *ptr)
@@ -304,23 +347,3 @@
   } while (failure != 0);
 }
 
-#define SYNC_LOCK_RELEASE(TYPE, WIDTH)					\
-  void HIDDEN								\
-  __sync_lock_release_##WIDTH (TYPE *ptr)				\
-  {									\
-    int failure;							\
-    unsigned int oldval, newval, shift, mask;				\
-    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
-									\
-    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
-    mask = MASK_##WIDTH << shift;					\
-									\
-    do {								\
-      oldval = *wordptr;						\
-      newval = oldval & ~mask;						\
-      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
-    } while (failure != 0);						\
-  }
-
-SYNC_LOCK_RELEASE (short, 2)
-SYNC_LOCK_RELEASE (char,  1)
parisc: Implement new LWS CAS supporting 64 bit operations.

The current LWS cas only works correctly for 32bit. The new LWS allows for
CAS operations of variable size.

Signed-off-by: Guy Martin <gmsoft@xxxxxxxxxxxx>

diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 8387860..7ef22e3 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -74,7 +74,7 @@ ENTRY(linux_gateway_page)
 	/* ADDRESS 0xb0 to 0xb8, lws uses two insns for entry */
 	/* Light-weight-syscall entry must always be located at 0xb0 */
 	/* WARNING: Keep this number updated with table size changes */
-#define __NR_lws_entries (2)
+#define __NR_lws_entries (3)
 
 lws_entry:
 	gate	lws_start, %r0		/* increase privilege */
@@ -502,7 +502,7 @@ lws_exit:
 
 	
 	/***************************************************
-		Implementing CAS as an atomic operation:
+		Implementing 32bit CAS as an atomic operation:
 
 		%r26 - Address to examine
 		%r25 - Old value to check (old)
@@ -659,6 +659,230 @@ cas_action:
 	ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 3b-linux_gateway_page)
 
 
+	/***************************************************
+		New CAS implementation which uses pointers and variable size
+		information. The value pointed by old and new MUST NOT change
+		while performing CAS. The lock only protect the value at %r26.
+
+		%r26 - Address to examine
+		%r25 - Pointer to the value to check (old)
+		%r24 - Pointer to the value to set (new)
+		%r23 - Size of the variable (0/1/2/3 for 8/16/32/64 bit)
+		%r28 - Return non-zero on failure
+		%r21 - Kernel error code
+
+		%r21 has the following meanings:
+
+		EAGAIN - CAS is busy, ldcw failed, try again.
+		EFAULT - Read or write failed.
+
+		Scratch: r20, r22, r28, r29, r1, fr4 (32bit for 64bit CAS only)
+
+	****************************************************/
+
+	/* ELF32 Process entry path */
+lws_compare_and_swap_2:
+#ifdef CONFIG_64BIT
+	/* Clip the input registers */
+	depdi	0, 31, 32, %r26
+	depdi	0, 31, 32, %r25
+	depdi	0, 31, 32, %r24
+	depdi	0, 31, 32, %r23
+#endif
+
+	/* Check the validity of the size pointer */
+	subi,>>= 4, %r23, %r0
+	b,n	lws_exit_nosys
+
+	/* Jump to the functions which will load the old and new values into
+	   registers depending on the their size */
+	shlw	%r23, 2, %r29
+	blr	%r29, %r0
+	nop
+
+	/* 8bit load */
+4:	ldb	0(%sr3,%r25), %r25
+	b	cas2_lock_start
+5:	ldb	0(%sr3,%r24), %r24
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	/* 16bit load */
+6:	ldh	0(%sr3,%r25), %r25
+	b	cas2_lock_start
+7:	ldh	0(%sr3,%r24), %r24
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	/* 32bit load */
+8:	ldw	0(%sr3,%r25), %r25
+	b	cas2_lock_start
+9:	ldw	0(%sr3,%r24), %r24
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	/* 64bit load */
+#ifdef CONFIG_64BIT
+10:	ldd	0(%sr3,%r25), %r25
+11:	ldd	0(%sr3,%r24), %r24
+#else
+	/* Load new value into r22/r23 - high/low */
+10:	ldw	0(%sr3,%r25), %r22
+11:	ldw	4(%sr3,%r25), %r23
+	/* Load new value into fr4 for atomic store later */
+12:	flddx	0(%sr3,%r24), %fr4
+#endif
+
+cas2_lock_start:
+	/* Load start of lock table */
+	ldil	L%lws_lock_start, %r20
+	ldo	R%lws_lock_start(%r20), %r28
+
+	/* Extract four bits from r26 and hash lock (Bits 4-7) */
+	extru  %r26, 27, 4, %r20
+
+	/* Find lock to use, the hash is either one of 0 to
+	   15, multiplied by 16 (keep it 16-byte aligned)
+	   and add to the lock table offset. */
+	shlw	%r20, 4, %r20
+	add	%r20, %r28, %r20
+
+	rsm	PSW_SM_I, %r0			/* Disable interrupts */
+	/* COW breaks can cause contention on UP systems */
+	LDCW	0(%sr2,%r20), %r28		/* Try to acquire the lock */
+	cmpb,<>,n	%r0, %r28, cas2_action	/* Did we get it? */
+cas2_wouldblock:
+	ldo	2(%r0), %r28			/* 2nd case */
+	ssm	PSW_SM_I, %r0
+	b	lws_exit			/* Contended... */
+	ldo	-EAGAIN(%r0), %r21		/* Spin in userspace */
+
+	/*
+		prev = *addr;
+		if ( prev == old )
+		  *addr = new;
+		return prev;
+	*/
+
+	/* NOTES:
+		This all works becuse intr_do_signal
+		and schedule both check the return iasq
+		and see that we are on the kernel page
+		so this process is never scheduled off
+		or is ever sent any signal of any sort,
+		thus it is wholly atomic from usrspaces
+		perspective
+	*/
+cas2_action:
+	/* Jump to the correct function */
+	blr	%r29, %r0
+	/* Set %r28 as non-zero for now */
+	ldo	1(%r0),%r28
+
+	/* 8bit CAS */
+13:	ldb,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r25, %r0
+	b,n	cas2_end
+14:	stb,ma	%r24, 0(%sr3,%r26)
+	b	cas2_end
+	copy	%r0, %r28
+	nop
+	nop
+
+	/* 16bit CAS */
+15:	ldh,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r25, %r0
+	b,n	cas2_end
+16:	sth,ma	%r24, 0(%sr3,%r26)
+	b	cas2_end
+	copy	%r0, %r28
+	nop
+	nop
+
+	/* 32bit CAS */
+17:	ldw,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r25, %r0
+	b,n	cas2_end
+18:	stw,ma	%r24, 0(%sr3,%r26)
+	b	cas2_end
+	copy	%r0, %r28
+	nop
+	nop
+
+	/* 64bit CAS */
+#ifdef CONFIG_64BIT
+19:	ldd,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r25, %r0
+	b,n	cas2_end
+20:	std,ma	%r24, 0(%sr3,%r26)
+	copy	%r0, %r28
+#else
+	/* Compare first word */
+19:	ldw,ma	0(%sr3,%r26), %r29
+	sub,=	%r29, %r22, %r0
+	b,n	cas2_end
+	/* Compare second word */
+20:	ldw,ma	4(%sr3,%r26), %r29
+	sub,=	%r29, %r23, %r0
+	b,n	cas2_end
+	/* Perform the store */
+21:	fstdx	%fr4, 0(%sr3,%r26)
+	copy	%r0, %r28
+#endif
+
+cas2_end:
+	/* Free lock */
+	stw,ma	%r20, 0(%sr2,%r20)
+	/* Enable interrupts */
+	ssm	PSW_SM_I, %r0
+	/* Return to userspace, set no error */
+	b	lws_exit
+	copy	%r0, %r21
+
+22:
+	/* Error occurred on load or store */
+	/* Free lock */
+	stw	%r20, 0(%sr2,%r20)
+	ssm	PSW_SM_I, %r0
+	ldo	1(%r0),%r28
+	b	lws_exit
+	ldo	-EFAULT(%r0),%r21	/* set errno */
+	nop
+	nop
+	nop
+
+	/* Exception table entries, for the load and store, return EFAULT.
+	   Each of the entries must be relocated. */
+	ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(6b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(7b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(8b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(9b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(10b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(11b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(13b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(14b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(15b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(16b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(17b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(18b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(19b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(20b-linux_gateway_page, 22b-linux_gateway_page)
+#ifndef CONFIG_64BIT
+	ASM_EXCEPTIONTABLE_ENTRY(12b-linux_gateway_page, 22b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(21b-linux_gateway_page, 22b-linux_gateway_page)
+#endif
+
 	/* Make sure nothing else is placed on this page */
 	.align PAGE_SIZE
 END(linux_gateway_page)
@@ -675,8 +899,9 @@ ENTRY(end_linux_gateway_page)
 	/* Light-weight-syscall table */
 	/* Start of lws table. */
 ENTRY(lws_table)
-	LWS_ENTRY(compare_and_swap32)	/* 0 - ELF32 Atomic compare and swap */
-	LWS_ENTRY(compare_and_swap64)	/* 1 - ELF64 Atomic compare and swap */
+	LWS_ENTRY(compare_and_swap32)		/* 0 - ELF32 Atomic 32bit CAS */
+	LWS_ENTRY(compare_and_swap64)		/* 1 - ELF64 Atomic 32bit CAS */
+	LWS_ENTRY(compare_and_swap_2)		/* 2 - ELF32 Atomic 64bit CAS */
 END(lws_table)
 	/* End of lws table */
 

[Index of Archives]     [Linux SoC]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux