[RFC PATCH] 64bit LWS CAS

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I've attached the gcc and kernel patch for 64bit CAS. So far I've
implemented the easiest use case which is for 64bit kernel.

I'll investigate using the FPU register for 64 bit operations with
32bit kernels.

I feel like there is a lot of code duplication in my patches, this can
probably be optimized altho it might reduce readability.

Any comments ?


Thanks,
  Guy
--- ./arch/parisc/kernel/syscall.S.orig	2014-07-16 16:39:20.684498341 +0200
+++ ./arch/parisc/kernel/syscall.S	2014-07-17 21:34:35.091933739 +0200
@@ -74,7 +74,7 @@
 	/* ADDRESS 0xb0 to 0xb8, lws uses two insns for entry */
 	/* Light-weight-syscall entry must always be located at 0xb0 */
 	/* WARNING: Keep this number updated with table size changes */
-#define __NR_lws_entries (2)
+#define __NR_lws_entries (3)
 
 lws_entry:
 	gate	lws_start, %r0		/* increase privilege */
@@ -502,7 +502,7 @@
 
 	
 	/***************************************************
-		Implementing CAS as an atomic operation:
+		Implementing 32bit CAS as an atomic operation:
 
 		%r26 - Address to examine
 		%r25 - Old value to check (old)
@@ -658,6 +658,161 @@
 	ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 3b-linux_gateway_page)
 	ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 3b-linux_gateway_page)
 
+	
+	/***************************************************
+		Implementing 64bit CAS as an atomic operation for ELF32:
+
+		%r26 - Address to examine
+		%r25 - Old 32bit high value to check (old)
+		%r24 - Old 32bit low value to check (old)
+		%r23 - New 32bit high value to set (new)
+		%r22 - New 32bit low value to set (new)
+		%r28 - Return prev 32bit high through this register.
+		%r29 - Return prev 32bit low through this register.
+		%r21 - Kernel error code
+
+		If debugging is DISabled:
+
+		%r21 has the following meanings:
+
+		EAGAIN - CAS is busy, ldcw failed, try again.
+		EFAULT - Read or write failed.		
+
+		If debugging is enabled:
+
+		EDEADLOCK - CAS called recursively.
+		EAGAIN && r28 == 1 - CAS is busy. Lock contended.
+		EAGAIN && r28 == 2 - CAS is busy. ldcw failed.
+		EFAULT - Read or write failed.
+
+		Scratch: r20, r28, r1
+
+	****************************************************/
+
+	/* ELF32 Process entry path */
+lws_compare_and_swap_dword:
+#ifdef CONFIG_64BIT
+	/* Clip all the input registers */
+	depdi	0, 31, 32, %r26
+	/* Merge low/high bits */
+	shld	%r25, 32, %r24
+	shld	%r23, 32, %r22
+#else
+#error Not implemented
+#endif
+	/* Load start of lock table */
+	ldil	L%lws_lock_start, %r20
+	ldo	R%lws_lock_start(%r20), %r28
+
+	/* Extract four bits from r26 and hash lock (Bits 4-7) */
+	extru  %r26, 27, 4, %r20
+
+	/* Find lock to use, the hash is either one of 0 to
+	   15, multiplied by 16 (keep it 16-byte aligned)
+	   and add to the lock table offset. */
+	shlw	%r20, 4, %r20
+	add	%r20, %r28, %r20
+
+# if ENABLE_LWS_DEBUG
+	/*	
+		DEBUG, check for deadlock! 
+		If the thread register values are the same
+		then we were the one that locked it last and
+		this is a recurisve call that will deadlock.
+		We *must* giveup this call and fail.
+	*/
+	ldw	4(%sr2,%r20), %r28			/* Load thread register */
+	/* WARNING: If cr27 cycles to the same value we have problems */
+	mfctl	%cr27, %r21				/* Get current thread register */
+	cmpb,<>,n	%r21, %r28, cas_dword_lock	/* Called recursive? */
+	b	lws_exit				/* Return error! */
+	ldo	-EDEADLOCK(%r0), %r21
+cas_dword_lock:
+	cmpb,=,n	%r0, %r28, cas_dword_nocontend /* Is nobody using it? */
+	ldo	1(%r0), %r28				/* 1st case */
+	b	lws_exit				/* Contended... */
+	ldo	-EAGAIN(%r0), %r21			/* Spin in userspace */
+cas_dword_nocontend:
+# endif
+/* ENABLE_LWS_DEBUG */
+
+	rsm	PSW_SM_I, %r0				/* Disable interrupts */
+	/* COW breaks can cause contention on UP systems */
+	LDCW	0(%sr2,%r20), %r28			/* Try to acquire the lock */
+	cmpb,<>,n	%r0, %r28, cas_dword_action	/* Did we get it? */
+cas_dword_wouldblock:
+	ldo	2(%r0), %r28				/* 2nd case */
+	ssm	PSW_SM_I, %r0
+	b	lws_exit				/* Contended... */
+	ldo	-EAGAIN(%r0), %r21			/* Spin in userspace */
+
+	/*
+		prev = *addr;
+		if ( prev == old )
+		  *addr = new;
+		return prev;
+	*/
+
+	/* NOTES:
+		This all works becuse intr_do_signal
+		and schedule both check the return iasq
+		and see that we are on the kernel page
+		so this process is never scheduled off
+		or is ever sent any signal of any sort,
+		thus it is wholly atomic from usrspaces
+		perspective
+	*/
+cas_dword_action:
+#if defined CONFIG_SMP && ENABLE_LWS_DEBUG
+	/* DEBUG */
+	mfctl	%cr27, %r1
+	stw	%r1, 4(%sr2,%r20)
+#endif
+
+#ifdef CONFIG_64BIT
+	/* The load and store could fail */
+4:	ldd,ma	0(%sr3,%r26), %r29
+	sub,<>	%r29, %r24, %r0
+5:	std,ma	%r22, 0(%sr3,%r26)
+	/* Split the high/low bit of the result */
+	shrd	%r29,32,%r28
+	depdi	0, 31, 32, %r28
+#else
+#error Not implemented
+#endif
+
+	/* Free lock */
+	stw,ma	%r20, 0(%sr2,%r20)
+#if ENABLE_LWS_DEBUG
+	/* Clear thread register indicator */
+	stw	%r0, 4(%sr2,%r20)
+#endif
+	/* Enable interrupts */
+	ssm	PSW_SM_I, %r0
+	/* Return to userspace, set no error */
+	b	lws_exit
+	copy	%r0, %r21
+
+6:		
+	/* Error occurred on load or store */
+	/* Free lock */
+	stw	%r20, 0(%sr2,%r20)
+#if ENABLE_LWS_DEBUG
+	stw	%r0, 4(%sr2,%r20)
+#endif
+	ssm	PSW_SM_I, %r0
+	b	lws_exit
+	ldo	-EFAULT(%r0),%r21	/* set errno */
+	nop
+	nop
+	nop
+	nop
+
+	/* Two exception table entries, one for the loads,
+	   the other for the store. Either return -EFAULT.
+	   Each of the entries must be relocated. */
+	ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 6b-linux_gateway_page)
+	ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 6b-linux_gateway_page)
 
 	/* Make sure nothing else is placed on this page */
 	.align PAGE_SIZE
@@ -675,8 +830,9 @@
 	/* Light-weight-syscall table */
 	/* Start of lws table. */
 ENTRY(lws_table)
-	LWS_ENTRY(compare_and_swap32)	/* 0 - ELF32 Atomic compare and swap */
-	LWS_ENTRY(compare_and_swap64)	/* 1 - ELF64 Atomic compare and swap */
+	LWS_ENTRY(compare_and_swap32)		/* 0 - ELF32 Atomic 32bit compare and swap */
+	LWS_ENTRY(compare_and_swap64)		/* 1 - ELF64 Atomic 32bit compare and swap */
+	LWS_ENTRY(compare_and_swap_dword)	/* 2 - ELF32 Atomic 64bit compare and swap */
 END(lws_table)
 	/* End of lws table */
 
--- libgcc/config/pa/linux-atomic.c.orig	2014-07-16 19:29:28.670595484 +0000
+++ libgcc/config/pa/linux-atomic.c	2014-07-16 19:31:32.754003341 +0000
@@ -24,6 +24,8 @@
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
+#include <stdint.h>
+
 #define EFAULT  14 
 #define EBUSY   16
 #define ENOSYS 251 
@@ -75,6 +77,39 @@
   return lws_errno;
 }
 
+/* Kernel helper for compare-and-exchange a 64-bit value from ELF32.  */
+static inline long
+__kernel_cmpxchg_dword32 (int64_t oldval, int64_t newval, int64_t *mem)
+{
+  register unsigned long lws_mem asm("r26") = (unsigned long) (mem);
+  register long lws_ret_h   asm("r28");
+  register long lws_ret_l   asm("r29");
+  register long lws_errno   asm("r21");
+  register int lws_old_h    asm("r25") = oldval >> 32;
+  register int lws_old_l    asm("r24") = oldval & 0xffffffff;
+  register int lws_new_h    asm("r23") = newval >> 32;
+  register int lws_new_l    asm("r22") = newval & 0xffffffff;
+  asm volatile (	"ble	0xb0(%%sr2, %%r0)	\n\t"
+			"ldi	%8, %%r20		\n\t"
+	: "=r" (lws_ret_h), "=r" (lws_ret_l), "=r" (lws_errno), "=r" (lws_mem),
+	  "=r" (lws_old_h), "=r" (lws_old_l), "=r" (lws_new_h), "=r" (lws_new_l)
+	: "i" (2), "3" (lws_mem), "4" (lws_old_h), "5" (lws_old_l), "6" (lws_new_h), "7" (lws_new_l)
+	: "r1", "r20", "r31", "memory"
+  );
+  if (__builtin_expect (lws_errno == -EFAULT || lws_errno == -ENOSYS, 0))
+    ABORT_INSTRUCTION;
+
+   int64_t lws_ret = ((int64_t)lws_ret_h << 32) | (int64_t)lws_ret_l;
+
+  /* If the kernel LWS call succeeded (lws_errno == 0), lws_ret contains
+     the old value from memory.  If this value is equal to OLDVAL, the
+     new value was written to memory.  If not, return -EBUSY.  */
+  if (!lws_errno && lws_ret != oldval)
+    lws_errno = -EBUSY;
+
+  return lws_errno;
+}
+
 #define HIDDEN __attribute__ ((visibility ("hidden")))
 
 /* Big endian masks  */
@@ -84,6 +119,28 @@
 #define MASK_1 0xffu
 #define MASK_2 0xffffu
 
+#define FETCH_AND_OP_DWORD(OP, PFX_OP, INF_OP)					\
+  int64_t HIDDEN								\
+  __sync_fetch_and_##OP##_8 (int64_t *ptr, int64_t val)				\
+  {										\
+    int64_t tmp;								\
+    int failure;								\
+										\
+    do {									\
+      tmp = *ptr;								\
+      failure = __kernel_cmpxchg_dword32 (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (failure != 0);							\
+										\
+    return tmp;									\
+  }
+
+FETCH_AND_OP_DWORD (add,   , +)
+FETCH_AND_OP_DWORD (sub,   , -)
+FETCH_AND_OP_DWORD (or,    , |)
+FETCH_AND_OP_DWORD (and,   , &)
+FETCH_AND_OP_DWORD (xor,   , ^)
+FETCH_AND_OP_DWORD (nand, ~, &)
+
 #define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
   int HIDDEN								\
   __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
@@ -147,6 +204,28 @@
 SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
 SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
 
+#define OP_AND_FETCH_DWORD(OP, PFX_OP, INF_OP)					\
+  int64_t HIDDEN								\
+  __sync_##OP##_and_fetch_8 (int64_t *ptr, int64_t val)				\
+  {										\
+    int64_t tmp;								\
+    int failure;								\
+										\
+    do {									\
+      tmp = *ptr;								\
+      failure = __kernel_cmpxchg_dword32 (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (failure != 0);							\
+										\
+    return PFX_OP (tmp INF_OP val);						\
+  }
+
+OP_AND_FETCH_DWORD (add,   , +)
+OP_AND_FETCH_DWORD (sub,   , -)
+OP_AND_FETCH_DWORD (or,    , |)
+OP_AND_FETCH_DWORD (and,   , &)
+OP_AND_FETCH_DWORD (xor,   , ^)
+OP_AND_FETCH_DWORD (nand, ~, &)
+
 #define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)				\
   int HIDDEN								\
   __sync_##OP##_and_fetch_4 (int *ptr, int val)				\
@@ -182,6 +261,26 @@
 SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
 SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
 
+int64_t HIDDEN
+__sync_val_compare_and_swap_8 (int64_t *ptr, int64_t oldval, int64_t newval)
+{
+  int64_t actual_oldval;
+  int fail;
+    
+  while (1)
+    {
+      actual_oldval = *ptr;
+
+      if (__builtin_expect (oldval != actual_oldval, 0))
+	return actual_oldval;
+
+      fail = __kernel_cmpxchg_dword32 (actual_oldval, newval, ptr);
+  
+      if (__builtin_expect (!fail, 1))
+	return actual_oldval;
+    }
+}
+
 int HIDDEN
 __sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
 {
@@ -256,6 +355,20 @@
 SUBWORD_BOOL_CAS (unsigned short, 2)
 SUBWORD_BOOL_CAS (unsigned char,  1)
 
+int64_t HIDDEN
+__sync_lock_test_and_set_8 (int64_t *ptr, int64_t val)
+{
+  int64_t oldval;
+  int failure;
+
+  do {
+    oldval = *ptr;
+    failure = __kernel_cmpxchg_dword32 (oldval, val, ptr);
+  } while (failure != 0);
+
+  return oldval;
+}
+
 int HIDDEN
 __sync_lock_test_and_set_4 (int *ptr, int val)
 {
@@ -300,6 +413,7 @@
     *ptr = 0;								\
   }
 
-SYNC_LOCK_RELEASE (int,   4)
-SYNC_LOCK_RELEASE (short, 2)
-SYNC_LOCK_RELEASE (char,  1)
+SYNC_LOCK_RELEASE (int64_t, 8)
+SYNC_LOCK_RELEASE (int,     4)
+SYNC_LOCK_RELEASE (short,   2)
+SYNC_LOCK_RELEASE (char,    1)

[Index of Archives]     [Linux SoC]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux