This patch adds two new LWS routines - lws_atomic_xchg and lws_atomic_store. These are simpler than the CAS routines. Currently, we use the CAS routines for atomic stores. This is inefficient since it requires both winning the spinlock and a successful CAS operation. Change has been tested on c8000 and rp3440. Signed-off-by: John David Anglin <dave.anglin@xxxxxxxx> --- diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 71cb6d17afac..b95d02b9c926 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -90,7 +90,7 @@ ENTRY(linux_gateway_page) /* ADDRESS 0xb0 to 0xb8, lws uses two insns for entry */ /* Light-weight-syscall entry must always be located at 0xb0 */ /* WARNING: Keep this number updated with table size changes */ -#define __NR_lws_entries (3) +#define __NR_lws_entries (5) lws_entry: gate lws_start, %r0 /* increase privilege */ @@ -883,6 +883,393 @@ cas2_lock_start: ASM_EXCEPTIONTABLE_ENTRY(20b-linux_gateway_page, 31b-linux_gateway_page) #endif + + /*************************************************** + LWS atomic exchange. + + %r26 - Exchange address + %r25 - Size of the variable (0/1/2/3 for 8/16/32/64 bit) + %r24 - Address of new value + %r23 - Address of old value + %r28 - Return non-zero on failure + %r21 - Kernel error code + + %r21 returns the following error codes: + EAGAIN - CAS is busy, ldcw failed, try again. + EFAULT - Read or write failed. + + If EAGAIN is returned, %r28 indicates the busy reason: + r28 == 1 - CAS is busy. lock contended. + r28 == 2 - CAS is busy. ldcw failed. + r28 == 3 - CAS is busy. page fault. + + Scratch: r20, r1 + + ****************************************************/ + +lws_atomic_xchg: +#ifdef CONFIG_64BIT + /* Wide mode user process? */ + bb,<,n %sp, 31, atomic_xchg_begin + + /* Clip the input registers for 32-bit processes. We don't + need to clip %r23 as we only use it for word operations */ + depdi 0, 31, 32, %r26 + depdi 0, 31, 32, %r25 + depdi 0, 31, 32, %r24 + depdi 0, 31, 32, %r23 +#endif + +atomic_xchg_begin: + /* Check the validity of the size pointer */ + subi,>>= 3, %r25, %r0 + b,n lws_exit_nosys + + /* Jump to the functions which will load the old and new values into + registers depending on the their size */ + shlw %r25, 2, %r1 + blr %r1, %r0 + nop + + /* Perform exception checks */ + + /* 8-bit exchange */ +1: ldb 0(%r24), %r20 + copy %r23, %r20 + depi_safe 0, 31, 2, %r20 + b atomic_xchg_start +2: stbys,e %r0, 0(%r20) + nop + nop + nop + + /* 16-bit exchange */ +3: ldh 0(%r24), %r20 + copy %r23, %r20 + depi_safe 0, 31, 2, %r20 + b atomic_xchg_start +4: stbys,e %r0, 0(%r20) + nop + nop + nop + + /* 32-bit exchange */ +5: ldw 0(%r24), %r20 + b atomic_xchg_start +6: stbys,e %r0, 0(%r23) + nop + nop + nop + nop + nop + + /* 64-bit exchange */ +#ifdef CONFIG_64BIT +7: ldd 0(%r24), %r20 +8: stdby,e %r0, 0(%r23) +#else +7: ldw 0(%r24), %r20 +8: ldw 4(%r24), %r20 + copy %r23, %r20 + depi_safe 0, 31, 2, %r20 +9: stbys,e %r0, 0(%r20) +10: stbys,e %r0, 4(%r20) +#endif + +atomic_xchg_start: + /* Trigger memory reference interruptions without writing to memory */ + copy %r26, %r28 + depi_safe 0, 31, 2, %r28 +11: ldw 0(%r28), %r1 +12: stbys,e %r0, 0(%r28) + + /* Calculate 8-bit hash index from virtual address */ + extru_safe %r26, 27, 8, %r20 + + /* Load start of lock table */ + ldil L%lws_lock_start, %r28 + ldo R%lws_lock_start(%r28), %r28 + + /* Find lock to use, the hash index is one of 0 to + 255, multiplied by 16 (keep it 16-byte aligned) + and add to the lock table offset. */ + shlw %r20, 4, %r20 + add %r20, %r28, %r20 + + /* Disable page faults to prevent sleeping in critical region */ + lws_pagefault_disable %r21,%r28 + rsm PSW_SM_I, %r0 /* Disable interrupts */ + + /* Try to acquire the lock */ + LDCW 0(%sr2,%r20), %r28 + comclr,<> %r0, %r28, %r0 + b,n lws_wouldblock + + /* NOTES: + This all works because intr_do_signal + and schedule both check the return iasq + and see that we are on the kernel page + so this process is never scheduled off + or is ever sent any signal of any sort, + thus it is wholly atomic from userspace's + perspective + */ + + /* Jump to the correct function */ + blr %r1, %r0 + /* Set %r28 as non-zero for now */ + ldo 1(%r0),%r28 + + /* 8-bit exchange */ +14: ldb 0(%r26), %r1 +15: stb %r1, 0(%r23) +15: ldb 0(%r24), %r1 +17: stb %r1, 0(%r26) + b lws_exit_noerror + copy %r0, %r28 + nop + nop + + /* 16-bit exchange */ +18: ldh 0(%r26), %r1 +19: sth %r1, 0(%r23) +20: ldh 0(%r24), %r1 +21: sth %r1, 0(%r26) + b lws_exit_noerror + copy %r0, %r28 + nop + nop + + /* 32-bit exchange */ +22: ldw 0(%r26), %r1 +23: stw %r1, 0(%r23) +24: ldw 0(%r24), %r1 +25: stw %r1, 0(%r26) + b lws_exit_noerror + copy %r0, %r28 + nop + nop + + /* 64-bit exchange */ +#ifdef CONFIG_64BIT +26: ldd 0(%r26), %r1 +27: std %r1, 0(%r23) +28: ldd 0(%r24), %r1 +29: std %r1, 0(%r26) +#else +26: flddx 0(%r26), %fr4 +27: fstdx %fr4, 0(%r23) +28: flddx 0(%r24), %fr4 +29: fstdx %fr4, 0(%r26) +#endif + b lws_exit_noerror + copy %r0, %r28 + + /* A fault occurred on load or stbys,e store */ +30: b,n lws_fault + ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(3b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(6b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(7b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(8b-linux_gateway_page, 30b-linux_gateway_page) +#ifndef CONFIG_64BIT + ASM_EXCEPTIONTABLE_ENTRY(9b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(10b-linux_gateway_page, 30b-linux_gateway_page) +#endif + + ASM_EXCEPTIONTABLE_ENTRY(11b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(12b-linux_gateway_page, 30b-linux_gateway_page) + + /* A page fault occurred in critical region */ +31: b,n lws_pagefault + ASM_EXCEPTIONTABLE_ENTRY(14b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(15b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(16b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(17b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(18b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(19b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(20b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(21b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(22b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(23b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(24b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(25b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(26b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(27b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(28b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(29b-linux_gateway_page, 31b-linux_gateway_page) + + /*************************************************** + LWS atomic store. + + %r26 - Address to store + %r25 - Size of the variable (0/1/2/3 for 8/16/32/64 bit) + %r24 - Address of value to store + %r28 - Return non-zero on failure + %r21 - Kernel error code + + %r21 returns the following error codes: + EAGAIN - CAS is busy, ldcw failed, try again. + EFAULT - Read or write failed. + + If EAGAIN is returned, %r28 indicates the busy reason: + r28 == 1 - CAS is busy. lock contended. + r28 == 2 - CAS is busy. ldcw failed. + r28 == 3 - CAS is busy. page fault. + + Scratch: r20, r1 + + ****************************************************/ + +lws_atomic_store: +#ifdef CONFIG_64BIT + /* Wide mode user process? */ + bb,<,n %sp, 31, atomic_store_begin + + /* Clip the input registers for 32-bit processes. We don't + need to clip %r23 as we only use it for word operations */ + depdi 0, 31, 32, %r26 + depdi 0, 31, 32, %r25 + depdi 0, 31, 32, %r24 +#endif + +atomic_store_begin: + /* Check the validity of the size pointer */ + subi,>>= 3, %r25, %r0 + b,n lws_exit_nosys + + shlw %r25, 1, %r1 + blr %r1, %r0 + nop + + /* Perform exception checks */ + + /* 8-bit store */ +1: ldb 0(%r24), %r20 + b,n atomic_store_start + nop + nop + + /* 16-bit store */ +2: ldh 0(%r24), %r20 + b,n atomic_store_start + nop + nop + + /* 32-bit store */ +3: ldw 0(%r24), %r20 + b,n atomic_store_start + nop + nop + + /* 64-bit store */ +#ifdef CONFIG_64BIT +4: ldd 0(%r24), %r20 +#else +4: ldw 0(%r24), %r20 +5: ldw 4(%r24), %r20 +#endif + +atomic_store_start: + /* Trigger memory reference interruptions without writing to memory */ + copy %r26, %r28 + depi_safe 0, 31, 2, %r28 +6: ldw 0(%r28), %r1 +7: stbys,e %r0, 0(%r28) + + /* Calculate 8-bit hash index from virtual address */ + extru_safe %r26, 27, 8, %r20 + + /* Load start of lock table */ + ldil L%lws_lock_start, %r28 + ldo R%lws_lock_start(%r28), %r28 + + /* Find lock to use, the hash index is one of 0 to + 255, multiplied by 16 (keep it 16-byte aligned) + and add to the lock table offset. */ + shlw %r20, 4, %r20 + add %r20, %r28, %r20 + + /* Disable page faults to prevent sleeping in critical region */ + lws_pagefault_disable %r21,%r28 + rsm PSW_SM_I, %r0 /* Disable interrupts */ + + /* Try to acquire the lock */ + LDCW 0(%sr2,%r20), %r28 + comclr,<> %r0, %r28, %r0 + b,n lws_wouldblock + + /* NOTES: + This all works because intr_do_signal + and schedule both check the return iasq + and see that we are on the kernel page + so this process is never scheduled off + or is ever sent any signal of any sort, + thus it is wholly atomic from userspace's + perspective + */ + + /* Jump to the correct function */ + blr %r1, %r0 + /* Set %r28 as non-zero for now */ + ldo 1(%r0),%r28 + + /* 8-bit store */ +9: ldb 0(%r24), %r1 +10: stb %r1, 0(%r26) + b lws_exit_noerror + copy %r0, %r28 + + /* 16-bit store */ +11: ldh 0(%r24), %r1 +12: sth %r1, 0(%r26) + b lws_exit_noerror + copy %r0, %r28 + + /* 32-bit store */ +13: ldw 0(%r24), %r1 +14: stw %r1, 0(%r26) + b lws_exit_noerror + copy %r0, %r28 + + /* 64-bit store */ +#ifdef CONFIG_64BIT +15: ldd 0(%r24), %r1 +16: std %r1, 0(%r26) +#else +15: flddx 0(%r24), %fr4 +16: fstdx %fr4, 0(%r26) +#endif + b lws_exit_noerror + copy %r0, %r28 + + /* A fault occurred on load or stbys,e store */ +30: b,n lws_fault + ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(3b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 30b-linux_gateway_page) +#ifndef CONFIG_64BIT + ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 30b-linux_gateway_page) +#endif + + ASM_EXCEPTIONTABLE_ENTRY(6b-linux_gateway_page, 30b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(7b-linux_gateway_page, 30b-linux_gateway_page) + + /* A page fault occurred in critical region */ +31: b,n lws_pagefault + ASM_EXCEPTIONTABLE_ENTRY(9b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(10b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(11b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(12b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(13b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(14b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(15b-linux_gateway_page, 31b-linux_gateway_page) + ASM_EXCEPTIONTABLE_ENTRY(16b-linux_gateway_page, 31b-linux_gateway_page) + /* Make sure nothing else is placed on this page */ .align PAGE_SIZE END(linux_gateway_page) @@ -902,6 +1289,8 @@ ENTRY(lws_table) LWS_ENTRY(compare_and_swap32) /* 0 - ELF32 Atomic 32bit CAS */ LWS_ENTRY(compare_and_swap64) /* 1 - ELF64 Atomic 32bit CAS */ LWS_ENTRY(compare_and_swap_2) /* 2 - Atomic 64bit CAS */ + LWS_ENTRY(atomic_xchg) /* 3 - Atomic Exchange */ + LWS_ENTRY(atomic_store) /* 4 - Atomic Store */ END(lws_table) /* End of lws table */
Attachment:
signature.asc
Description: PGP signature