Hi,
Here is the latest set of patch for the 8/16/64 bit atomic gcc builtins.
On the kernel side a new LWS is implemented allowing for variable size
CAS.
On gcc's side, the new LWS is used for 8/16/64 bit atomic builtins,
keeping the old LWS for 32bit ops.
Since 8 and 16 bit ops were broken, it's best to use the new LWS
unconditionally for these.
Hopefully, these are the final patches !
Regards,
Guy
2014-09-12 Guy Martin <gmsoft@xxxxxxxxxxxx>
* config/pa/linux-atomic.c: Use new LWS CAS implementation for atomic
builtins. Implement 64bit atomic builtins.
Index: libgcc/config/pa/linux-atomic.c
===================================================================
--- libgcc/config/pa/linux-atomic.c (revision 212942)
+++ libgcc/config/pa/linux-atomic.c (working copy)
@@ -75,6 +75,30 @@
return lws_errno;
}
+static inline long
+__kernel_cmpxchg2 (void * oldval, void * newval, void *mem, int val_size)
+{
+ register unsigned long lws_mem asm("r26") = (unsigned long) (mem);
+ register long lws_ret asm("r28");
+ register long lws_errno asm("r21");
+ register unsigned long lws_old asm("r25") = (unsigned long) oldval;
+ register unsigned long lws_new asm("r24") = (unsigned long) newval;
+ register int lws_size asm("r23") = val_size;
+ asm volatile ( "ble 0xb0(%%sr2, %%r0) \n\t"
+ "ldi %2, %%r20 \n\t"
+ : "=r" (lws_ret), "=r" (lws_errno)
+ : "i" (2), "r" (lws_mem), "r" (lws_old), "r" (lws_new), "r" (lws_size)
+ : "r1", "r20", "r22", "r29", "r31", "fr4", "memory"
+ );
+ if (__builtin_expect (lws_errno == -EFAULT || lws_errno == -ENOSYS, 0))
+ ABORT_INSTRUCTION;
+
+ /* If the kernel LWS call fails, retrun EBUSY */
+ if (!lws_errno && lws_ret)
+ lws_errno = -EBUSY;
+
+ return lws_errno;
+}
#define HIDDEN __attribute__ ((visibility ("hidden")))
/* Big endian masks */
@@ -84,69 +108,101 @@
#define MASK_1 0xffu
#define MASK_2 0xffffu
-#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP) \
- int HIDDEN \
- __sync_fetch_and_##OP##_4 (int *ptr, int val) \
+#define FETCH_AND_OP_2(OP, PFX_OP, INF_OP, TYPE, WIDTH, INDEX) \
+ TYPE HIDDEN \
+ __sync_fetch_and_##OP##_##WIDTH (TYPE *ptr, TYPE val) \
{ \
- int failure, tmp; \
+ TYPE tmp, newval; \
+ int failure; \
\
do { \
tmp = *ptr; \
- failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \
+ newval = PFX_OP (tmp INF_OP val); \
+ failure = __kernel_cmpxchg2 (&tmp, &newval, ptr, INDEX); \
} while (failure != 0); \
\
return tmp; \
}
-FETCH_AND_OP_WORD (add, , +)
-FETCH_AND_OP_WORD (sub, , -)
-FETCH_AND_OP_WORD (or, , |)
-FETCH_AND_OP_WORD (and, , &)
-FETCH_AND_OP_WORD (xor, , ^)
-FETCH_AND_OP_WORD (nand, ~, &)
+FETCH_AND_OP_2 (add, , +, long long, 8, 3)
+FETCH_AND_OP_2 (sub, , -, long long, 8, 3)
+FETCH_AND_OP_2 (or, , |, long long, 8, 3)
+FETCH_AND_OP_2 (and, , &, long long, 8, 3)
+FETCH_AND_OP_2 (xor, , ^, long long, 8, 3)
+FETCH_AND_OP_2 (nand, ~, &, long long, 8, 3)
-#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
-#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+FETCH_AND_OP_2 (add, , +, unsigned short, 2, 1)
+FETCH_AND_OP_2 (sub, , -, unsigned short, 2, 1)
+FETCH_AND_OP_2 (or, , |, unsigned short, 2, 1)
+FETCH_AND_OP_2 (and, , &, unsigned short, 2, 1)
+FETCH_AND_OP_2 (xor, , ^, unsigned short, 2, 1)
+FETCH_AND_OP_2 (nand, ~, &, unsigned short, 2, 1)
-/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
- subword-sized quantities. */
+FETCH_AND_OP_2 (add, , +, unsigned char, 1, 0)
+FETCH_AND_OP_2 (sub, , -, unsigned char, 1, 0)
+FETCH_AND_OP_2 (or, , |, unsigned char, 1, 0)
+FETCH_AND_OP_2 (and, , &, unsigned char, 1, 0)
+FETCH_AND_OP_2 (xor, , ^, unsigned char, 1, 0)
+FETCH_AND_OP_2 (nand, ~, &, unsigned char, 1, 0)
-#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN) \
+#define OP_AND_FETCH_2(OP, PFX_OP, INF_OP, TYPE, WIDTH, INDEX) \
TYPE HIDDEN \
- NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val) \
+ __sync_##OP##_and_fetch_##WIDTH (TYPE *ptr, TYPE val) \
{ \
- int *wordptr = (int *) ((unsigned long) ptr & ~3); \
- unsigned int mask, shift, oldval, newval; \
+ TYPE tmp, newval; \
int failure; \
\
- shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \
- mask = MASK_##WIDTH << shift; \
+ do { \
+ tmp = *ptr; \
+ newval = PFX_OP (tmp INF_OP val); \
+ failure = __kernel_cmpxchg2 (&tmp, &newval, ptr, INDEX); \
+ } while (failure != 0); \
\
+ return PFX_OP (tmp INF_OP val); \
+ }
+
+OP_AND_FETCH_2 (add, , +, long long, 8, 3)
+OP_AND_FETCH_2 (sub, , -, long long, 8, 3)
+OP_AND_FETCH_2 (or, , |, long long, 8, 3)
+OP_AND_FETCH_2 (and, , &, long long, 8, 3)
+OP_AND_FETCH_2 (xor, , ^, long long, 8, 3)
+OP_AND_FETCH_2 (nand, ~, &, long long, 8, 3)
+
+OP_AND_FETCH_2 (add, , +, unsigned short, 2, 1)
+OP_AND_FETCH_2 (sub, , -, unsigned short, 2, 1)
+OP_AND_FETCH_2 (or, , |, unsigned short, 2, 1)
+OP_AND_FETCH_2 (and, , &, unsigned short, 2, 1)
+OP_AND_FETCH_2 (xor, , ^, unsigned short, 2, 1)
+OP_AND_FETCH_2 (nand, ~, &, unsigned short, 2, 1)
+
+OP_AND_FETCH_2 (add, , +, unsigned char, 1, 0)
+OP_AND_FETCH_2 (sub, , -, unsigned char, 1, 0)
+OP_AND_FETCH_2 (or, , |, unsigned char, 1, 0)
+OP_AND_FETCH_2 (and, , &, unsigned char, 1, 0)
+OP_AND_FETCH_2 (xor, , ^, unsigned char, 1, 0)
+OP_AND_FETCH_2 (nand, ~, &, unsigned char, 1, 0)
+
+#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP) \
+ int HIDDEN \
+ __sync_fetch_and_##OP##_4 (int *ptr, int val) \
+ { \
+ int failure, tmp; \
+ \
do { \
- oldval = *wordptr; \
- newval = ((PFX_OP (((oldval & mask) >> shift) \
- INF_OP (unsigned int) val)) << shift) & mask; \
- newval |= oldval & ~mask; \
- failure = __kernel_cmpxchg (oldval, newval, wordptr); \
+ tmp = *ptr; \
+ failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \
} while (failure != 0); \
\
- return (RETURN & mask) >> shift; \
+ return tmp; \
}
-SUBWORD_SYNC_OP (add, , +, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (or, , |, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (and, , &, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
+FETCH_AND_OP_WORD (add, , +)
+FETCH_AND_OP_WORD (sub, , -)
+FETCH_AND_OP_WORD (or, , |)
+FETCH_AND_OP_WORD (and, , &)
+FETCH_AND_OP_WORD (xor, , ^)
+FETCH_AND_OP_WORD (nand, ~, &)
-SUBWORD_SYNC_OP (add, , +, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (or, , |, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (and, , &, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
-
#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP) \
int HIDDEN \
__sync_##OP##_and_fetch_4 (int *ptr, int val) \
@@ -168,20 +224,42 @@
OP_AND_FETCH_WORD (xor, , ^)
OP_AND_FETCH_WORD (nand, ~, &)
-SUBWORD_SYNC_OP (add, , +, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (or, , |, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (and, , &, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
+typedef unsigned char bool;
-SUBWORD_SYNC_OP (add, , +, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (or, , |, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (and, , &, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
+#define COMPARE_AND_SWAP_2(TYPE, WIDTH, INDEX) \
+ TYPE HIDDEN \
+ __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, TYPE newval) \
+ { \
+ TYPE actual_oldval; \
+ int fail; \
+ \
+ while (1) \
+ { \
+ actual_oldval = *ptr; \
+ \
+ if (__builtin_expect (oldval != actual_oldval, 0)) \
+ return actual_oldval; \
+ \
+ fail = __kernel_cmpxchg2 (&actual_oldval, &newval, ptr, INDEX); \
+ \
+ if (__builtin_expect (!fail, 1)) \
+ return actual_oldval; \
+ } \
+ } \
+ \
+ bool HIDDEN \
+ __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, TYPE newval) \
+ { \
+ int failure = __kernel_cmpxchg2 (&oldval, &newval, ptr, INDEX); \
+ return (failure != 0); \
+ }
+
+
+COMPARE_AND_SWAP_2(long long, 8, 3)
+COMPARE_AND_SWAP_2(unsigned short, 2, 1)
+COMPARE_AND_SWAP_2(unsigned char, 1, 0)
+
int HIDDEN
__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
{
@@ -201,41 +279,6 @@
}
}
-#define SUBWORD_VAL_CAS(TYPE, WIDTH) \
- TYPE HIDDEN \
- __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \
- TYPE newval) \
- { \
- int *wordptr = (int *)((unsigned long) ptr & ~3), fail; \
- unsigned int mask, shift, actual_oldval, actual_newval; \
- \
- shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \
- mask = MASK_##WIDTH << shift; \
- \
- while (1) \
- { \
- actual_oldval = *wordptr; \
- \
- if (__builtin_expect (((actual_oldval & mask) >> shift) \
- != (unsigned int) oldval, 0)) \
- return (actual_oldval & mask) >> shift; \
- \
- actual_newval = (actual_oldval & ~mask) \
- | (((unsigned int) newval << shift) & mask); \
- \
- fail = __kernel_cmpxchg (actual_oldval, actual_newval, \
- wordptr); \
- \
- if (__builtin_expect (!fail, 1)) \
- return (actual_oldval & mask) >> shift; \
- } \
- }
-
-SUBWORD_VAL_CAS (unsigned short, 2)
-SUBWORD_VAL_CAS (unsigned char, 1)
-
-typedef unsigned char bool;
-
bool HIDDEN
__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval)
{
@@ -243,18 +286,25 @@
return (failure == 0);
}
-#define SUBWORD_BOOL_CAS(TYPE, WIDTH) \
- bool HIDDEN \
- __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \
- TYPE newval) \
+
+#define SYNC_LOCK_TEST_AND_SET_2(TYPE, WIDTH, INDEX) \
+TYPE HIDDEN \
+ __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val) \
{ \
- TYPE actual_oldval \
- = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval); \
- return (oldval == actual_oldval); \
+ TYPE oldval; \
+ int failure; \
+ \
+ do { \
+ oldval = *ptr; \
+ failure = __kernel_cmpxchg2 (&oldval, &val, ptr, INDEX); \
+ } while (failure != 0); \
+ \
+ return oldval; \
}
-SUBWORD_BOOL_CAS (unsigned short, 2)
-SUBWORD_BOOL_CAS (unsigned char, 1)
+SYNC_LOCK_TEST_AND_SET_2(long long, 8, 3)
+SYNC_LOCK_TEST_AND_SET_2(unsigned short, 2, 1)
+SYNC_LOCK_TEST_AND_SET_2(unsigned char, 1, 0)
int HIDDEN
__sync_lock_test_and_set_4 (int *ptr, int val)
@@ -269,29 +319,22 @@
return oldval;
}
-#define SUBWORD_TEST_AND_SET(TYPE, WIDTH) \
- TYPE HIDDEN \
- __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val) \
- { \
- int failure; \
- unsigned int oldval, newval, shift, mask; \
- int *wordptr = (int *) ((unsigned long) ptr & ~3); \
- \
- shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \
- mask = MASK_##WIDTH << shift; \
- \
- do { \
- oldval = *wordptr; \
- newval = (oldval & ~mask) \
- | (((unsigned int) val << shift) & mask); \
- failure = __kernel_cmpxchg (oldval, newval, wordptr); \
- } while (failure != 0); \
- \
- return (oldval & mask) >> shift; \
+
+#define SYNC_LOCK_RELEASE_2(TYPE, WIDTH, INDEX) \
+ void HIDDEN \
+ __sync_lock_release_##WIDTH (TYPE *ptr) \
+ { \
+ TYPE failure, oldval, zero = 0; \
+ \
+ do { \
+ oldval = *ptr; \
+ failure = __kernel_cmpxchg2 (&oldval, &zero, ptr, INDEX); \
+ } while (failure != 0); \
}
-SUBWORD_TEST_AND_SET (unsigned short, 2)
-SUBWORD_TEST_AND_SET (unsigned char, 1)
+SYNC_LOCK_RELEASE_2 (long long, 8, 3)
+SYNC_LOCK_RELEASE_2 (short, 2, 1)
+SYNC_LOCK_RELEASE_2 (char, 1, 0)
void HIDDEN
__sync_lock_release_4 (int *ptr)
@@ -304,23 +347,3 @@
} while (failure != 0);
}
-#define SYNC_LOCK_RELEASE(TYPE, WIDTH) \
- void HIDDEN \
- __sync_lock_release_##WIDTH (TYPE *ptr) \
- { \
- int failure; \
- unsigned int oldval, newval, shift, mask; \
- int *wordptr = (int *) ((unsigned long) ptr & ~3); \
- \
- shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \
- mask = MASK_##WIDTH << shift; \
- \
- do { \
- oldval = *wordptr; \
- newval = oldval & ~mask; \
- failure = __kernel_cmpxchg (oldval, newval, wordptr); \
- } while (failure != 0); \
- }
-
-SYNC_LOCK_RELEASE (short, 2)
-SYNC_LOCK_RELEASE (char, 1)
parisc: Implement new LWS CAS supporting 64 bit operations.
The current LWS cas only works correctly for 32bit. The new LWS allows for
CAS operations of variable size.
Signed-off-by: Guy Martin <gmsoft@xxxxxxxxxxxx>
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 8387860..7ef22e3 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -74,7 +74,7 @@ ENTRY(linux_gateway_page)
/* ADDRESS 0xb0 to 0xb8, lws uses two insns for entry */
/* Light-weight-syscall entry must always be located at 0xb0 */
/* WARNING: Keep this number updated with table size changes */
-#define __NR_lws_entries (2)
+#define __NR_lws_entries (3)
lws_entry:
gate lws_start, %r0 /* increase privilege */
@@ -502,7 +502,7 @@ lws_exit:
/***************************************************
- Implementing CAS as an atomic operation:
+ Implementing 32bit CAS as an atomic operation:
%r26 - Address to examine
%r25 - Old value to check (old)
@@ -659,6 +659,230 @@ cas_action:
ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 3b-linux_gateway_page)
+ /***************************************************
+ New CAS implementation which uses pointers and variable size
+ information. The value pointed by old and new MUST NOT change
+ while performing CAS. The lock only protect the value at %r26.
+
+ %r26 - Address to examine
+ %r25 - Pointer to the value to check (old)
+ %r24 - Pointer to the value to set (new)
+ %r23 - Size of the variable (0/1/2/3 for 8/16/32/64 bit)
+ %r28 - Return non-zero on failure
+ %r21 - Kernel error code
+
+ %r21 has the following meanings:
+
+ EAGAIN - CAS is busy, ldcw failed, try again.
+ EFAULT - Read or write failed.
+
+ Scratch: r20, r22, r28, r29, r1, fr4 (32bit for 64bit CAS only)
+
+ ****************************************************/
+
+ /* ELF32 Process entry path */
+lws_compare_and_swap_2:
+#ifdef CONFIG_64BIT
+ /* Clip the input registers */
+ depdi 0, 31, 32, %r26
+ depdi 0, 31, 32, %r25
+ depdi 0, 31, 32, %r24
+ depdi 0, 31, 32, %r23
+#endif
+
+ /* Check the validity of the size pointer */
+ subi,>>= 4, %r23, %r0
+ b,n lws_exit_nosys
+
+ /* Jump to the functions which will load the old and new values into
+ registers depending on the their size */
+ shlw %r23, 2, %r29
+ blr %r29, %r0
+ nop
+
+ /* 8bit load */
+4: ldb 0(%sr3,%r25), %r25
+ b cas2_lock_start
+5: ldb 0(%sr3,%r24), %r24
+ nop
+ nop
+ nop
+ nop
+ nop
+
+ /* 16bit load */
+6: ldh 0(%sr3,%r25), %r25
+ b cas2_lock_start
+7: ldh 0(%sr3,%r24), %r24
+ nop
+ nop
+ nop
+ nop
+ nop
+
+ /* 32bit load */
+8: ldw 0(%sr3,%r25), %r25
+ b cas2_lock_start
+9: ldw 0(%sr3,%r24), %r24
+ nop
+ nop
+ nop
+ nop
+ nop
+
+ /* 64bit load */
+#ifdef CONFIG_64BIT
+10: ldd 0(%sr3,%r25), %r25
+11: ldd 0(%sr3,%r24), %r24
+#else
+ /* Load new value into r22/r23 - high/low */
+10: ldw 0(%sr3,%r25), %r22
+11: ldw 4(%sr3,%r25), %r23
+ /* Load new value into fr4 for atomic store later */
+12: flddx 0(%sr3,%r24), %fr4
+#endif
+
+cas2_lock_start:
+ /* Load start of lock table */
+ ldil L%lws_lock_start, %r20
+ ldo R%lws_lock_start(%r20), %r28
+
+ /* Extract four bits from r26 and hash lock (Bits 4-7) */
+ extru %r26, 27, 4, %r20
+
+ /* Find lock to use, the hash is either one of 0 to
+ 15, multiplied by 16 (keep it 16-byte aligned)
+ and add to the lock table offset. */
+ shlw %r20, 4, %r20
+ add %r20, %r28, %r20
+
+ rsm PSW_SM_I, %r0 /* Disable interrupts */
+ /* COW breaks can cause contention on UP systems */
+ LDCW 0(%sr2,%r20), %r28 /* Try to acquire the lock */
+ cmpb,<>,n %r0, %r28, cas2_action /* Did we get it? */
+cas2_wouldblock:
+ ldo 2(%r0), %r28 /* 2nd case */
+ ssm PSW_SM_I, %r0
+ b lws_exit /* Contended... */
+ ldo -EAGAIN(%r0), %r21 /* Spin in userspace */
+
+ /*
+ prev = *addr;
+ if ( prev == old )
+ *addr = new;
+ return prev;
+ */
+
+ /* NOTES:
+ This all works becuse intr_do_signal
+ and schedule both check the return iasq
+ and see that we are on the kernel page
+ so this process is never scheduled off
+ or is ever sent any signal of any sort,
+ thus it is wholly atomic from usrspaces
+ perspective
+ */
+cas2_action:
+ /* Jump to the correct function */
+ blr %r29, %r0
+ /* Set %r28 as non-zero for now */
+ ldo 1(%r0),%r28
+
+ /* 8bit CAS */
+13: ldb,ma 0(%sr3,%r26), %r29
+ sub,= %r29, %r25, %r0
+ b,n cas2_end
+14: stb,ma %r24, 0(%sr3,%r26)
+ b cas2_end
+ copy %r0, %r28
+ nop
+ nop
+
+ /* 16bit CAS */
+15: ldh,ma 0(%sr3,%r26), %r29
+ sub,= %r29, %r25, %r0
+ b,n cas2_end
+16: sth,ma %r24, 0(%sr3,%r26)
+ b cas2_end
+ copy %r0, %r28
+ nop
+ nop
+
+ /* 32bit CAS */
+17: ldw,ma 0(%sr3,%r26), %r29
+ sub,= %r29, %r25, %r0
+ b,n cas2_end
+18: stw,ma %r24, 0(%sr3,%r26)
+ b cas2_end
+ copy %r0, %r28
+ nop
+ nop
+
+ /* 64bit CAS */
+#ifdef CONFIG_64BIT
+19: ldd,ma 0(%sr3,%r26), %r29
+ sub,= %r29, %r25, %r0
+ b,n cas2_end
+20: std,ma %r24, 0(%sr3,%r26)
+ copy %r0, %r28
+#else
+ /* Compare first word */
+19: ldw,ma 0(%sr3,%r26), %r29
+ sub,= %r29, %r22, %r0
+ b,n cas2_end
+ /* Compare second word */
+20: ldw,ma 4(%sr3,%r26), %r29
+ sub,= %r29, %r23, %r0
+ b,n cas2_end
+ /* Perform the store */
+21: fstdx %fr4, 0(%sr3,%r26)
+ copy %r0, %r28
+#endif
+
+cas2_end:
+ /* Free lock */
+ stw,ma %r20, 0(%sr2,%r20)
+ /* Enable interrupts */
+ ssm PSW_SM_I, %r0
+ /* Return to userspace, set no error */
+ b lws_exit
+ copy %r0, %r21
+
+22:
+ /* Error occurred on load or store */
+ /* Free lock */
+ stw %r20, 0(%sr2,%r20)
+ ssm PSW_SM_I, %r0
+ ldo 1(%r0),%r28
+ b lws_exit
+ ldo -EFAULT(%r0),%r21 /* set errno */
+ nop
+ nop
+ nop
+
+ /* Exception table entries, for the load and store, return EFAULT.
+ Each of the entries must be relocated. */
+ ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(6b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(7b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(8b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(9b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(10b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(11b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(13b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(14b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(15b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(16b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(17b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(18b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(19b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(20b-linux_gateway_page, 22b-linux_gateway_page)
+#ifndef CONFIG_64BIT
+ ASM_EXCEPTIONTABLE_ENTRY(12b-linux_gateway_page, 22b-linux_gateway_page)
+ ASM_EXCEPTIONTABLE_ENTRY(21b-linux_gateway_page, 22b-linux_gateway_page)
+#endif
+
/* Make sure nothing else is placed on this page */
.align PAGE_SIZE
END(linux_gateway_page)
@@ -675,8 +899,9 @@ ENTRY(end_linux_gateway_page)
/* Light-weight-syscall table */
/* Start of lws table. */
ENTRY(lws_table)
- LWS_ENTRY(compare_and_swap32) /* 0 - ELF32 Atomic compare and swap */
- LWS_ENTRY(compare_and_swap64) /* 1 - ELF64 Atomic compare and swap */
+ LWS_ENTRY(compare_and_swap32) /* 0 - ELF32 Atomic 32bit CAS */
+ LWS_ENTRY(compare_and_swap64) /* 1 - ELF64 Atomic 32bit CAS */
+ LWS_ENTRY(compare_and_swap_2) /* 2 - ELF32 Atomic 64bit CAS */
END(lws_table)
/* End of lws table */