On 11/13/2012 01:15 PM, Jon Masters wrote: > On 11/11/2012 03:56 AM, Jon Masters wrote: > >> This is an ugly kludge with some hand crafted assembly, but it builds >> locally, and I think the scratch build should succeed this time. Note, I >> was confused by the fact that there are at least *two* different sets of >> atomics implemented in OMPI (it's a mess). With this, both are now >> fixed, regardless of which compiler is being used. > > I've updated the patch, following useful feedback from Nicolas Pitre. > There is at least one bug in the current code (r3 clobbering, bad use of > r12 because I knew it wasn't touched and didn't want to save it, which > was just plain lazy and shouldn't have been done). I've tested these > fixes in a little test harness and have a scratch going now: Oops. I meant this one :) Scratch build going: http://arm.koji.fedoraproject.org/koji/taskinfo?taskID=1250212 Jon.
diff -urNp openmpi-1.6.3_orig/opal/asm/base/ARM.asm openmpi-1.6.3/opal/asm/base/ARM.asm --- openmpi-1.6.3_orig/opal/asm/base/ARM.asm 2012-04-03 10:29:44.000000000 -0400 +++ openmpi-1.6.3/opal/asm/base/ARM.asm 1969-12-31 19:00:00.000000000 -0500 @@ -1,153 +0,0 @@ -START_FILE - TEXT - - ALIGN(4) -START_FUNC(opal_atomic_mb) - dmb - bx lr -END_FUNC(opal_atomic_mb) - - -START_FUNC(opal_atomic_rmb) - dmb - bx lr -END_FUNC(opal_atomic_rmb) - - -START_FUNC(opal_atomic_wmb) - dmb - bx lr -END_FUNC(opal_atomic_wmb) - - -START_FUNC(opal_atomic_cmpset_32) - LSYM(1) - ldrex r3, [r0] - cmp r1, r3 - bne REFLSYM(2) - strex r12, r2, [r0] - cmp r12, #0 - bne REFLSYM(1) - mov r0, #1 - LSYM(2) - movne r0, #0 - bx lr -END_FUNC(opal_atomic_cmpset_32) - - -START_FUNC(opal_atomic_cmpset_acq_32) - LSYM(3) - ldrex r3, [r0] - cmp r1, r3 - bne REFLSYM(4) - strex r12, r2, [r0] - cmp r12, #0 - bne REFLSYM(3) - dmb - mov r0, #1 - LSYM(4) - movne r0, #0 - bx lr -END_FUNC(opal_atomic_cmpset_acq_32) - - -START_FUNC(opal_atomic_cmpset_rel_32) - LSYM(5) - ldrex r3, [r0] - cmp r1, r3 - bne REFLSYM(6) - dmb - strex r12, r2, [r0] - cmp r12, #0 - bne REFLSYM(4) - mov r0, #1 - LSYM(6) - movne r0, #0 - bx lr -END_FUNC(opal_atomic_cmpset_rel_32) - -#START_64BIT -START_FUNC(opal_atomic_cmpset_64) - push {r4-r7} - ldrd r6, r7, [sp, #16] - LSYM(7) - ldrexd r4, r5, [r0] - cmp r4, r2 - it eq - cmpeq r5, r3 - bne REFLSYM(8) - strexd r1, r6, r7, [r0] - cmp r1, #0 - bne REFLSYM(7) - mov r0, #1 - LSYM(8) - movne r0, #0 - pop {r4-r7} - bx lr -END_FUNC(opal_atomic_cmpset_64) - -START_FUNC(opal_atomic_cmpset_acq_64) - push {r4-r7} - ldrd r6, r7, [sp, #16] - LSYM(9) - ldrexd r4, r5, [r0] - cmp r4, r2 - it eq - cmpeq r5, r3 - bne REFLSYM(10) - strexd r1, r6, r7, [r0] - cmp r1, #0 - bne REFLSYM(9) - dmb - mov r0, #1 - LSYM(10) - movne r0, #0 - pop {r4-r7} - bx lr -END_FUNC(opal_atomic_cmpset_acq_64) - - -START_FUNC(opal_atomic_cmpset_rel_64) - push {r4-r7} - ldrd r6, r7, [sp, #16] - LSYM(11) - ldrexd r4, r5, [r0] - cmp r4, r2 - it eq - cmpeq r5, r3 - bne REFLSYM(12) - dmb - strexd r1, r6, r7, [r0] - cmp r1, #0 - bne REFLSYM(11) - mov r0, #1 - LSYM(12) - movne r0, #0 - pop {r4-r7} - bx lr -END_FUNC(opal_atomic_cmpset_rel_64) -#END_64BIT - - -START_FUNC(opal_atomic_add_32) - LSYM(13) - ldrex r2, [r0] - add r2, r2, r1 - strex r3, r2, [r0] - cmp r3, #0 - bne REFLSYM(13) - mov r0, r2 - bx lr -END_FUNC(opal_atomic_add_32) - - -START_FUNC(opal_atomic_sub_32) - LSYM(14) - ldrex r2, [r0] - sub r2, r2, r1 - strex r3, r2, [r0] - cmp r3, #0 - bne REFLSYM(14) - mov r0, r2 - bx lr -END_FUNC(opal_atomic_sub_32) diff -urNp openmpi-1.6.3_orig/opal/asm/base/ARMV5.asm openmpi-1.6.3/opal/asm/base/ARMV5.asm --- openmpi-1.6.3_orig/opal/asm/base/ARMV5.asm 1969-12-31 19:00:00.000000000 -0500 +++ openmpi-1.6.3/opal/asm/base/ARMV5.asm 2012-11-13 14:43:17.018685692 -0500 @@ -0,0 +1,109 @@ +START_FILE + TEXT + + ALIGN(4) +START_FUNC(opal_atomic_mb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_mb) + + +START_FUNC(opal_atomic_rmb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_rmb) + + +START_FUNC(opal_atomic_wmb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_wmb) + + +START_FUNC(opal_atomic_cmpset_32) + push {r4, lr} + mov r3, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + ldr r3, REFLSYM(1) + blx r3 + movcc r0, #0 + movcs r0, #1 + pop {r4, lr} + bx lr + .align 2 + LSYM(1) + .word 0xffff0fc0 +END_FUNC(opal_atomic_cmpset_32) + + +START_FUNC(opal_atomic_cmpset_acq_32) + push {r4, lr} + mov r3, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + ldr r3, REFLSYM(2) + blx r3 + movcc r0, #0 + movcs r0, #1 + pop {r4, lr} + bx lr + .align 2 + LSYM(2) + .word 0xffff0fc0 +END_FUNC(opal_atomic_cmpset_acq_32) + + +START_FUNC(opal_atomic_cmpset_rel_32) + push {r4, lr} + mov r3, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + ldr r3, REFLSYM(3) + blx r3 + movcc r0, #0 + movcs r0, #1 + pop {r4, lr} + bx lr + .align 2 + LSYM(3) + .word 0xffff0fc0 +END_FUNC(opal_atomic_cmpset_rel_32) + +START_FUNC(opal_atomic_add_32) + push {r4, lr} + mov r4, r1 + mov r2, r0 + LSYM(4) + ldr r0, [r2] + ldr r3, REFLSYM(5) + add r1, r0, r4 + blx r3 + bcc REFLSYM(4) + pop {r4, lr} + bx lr + .align 2 + LSYM(5) + .word 0xffff0fc0 +END_FUNC(opal_atomic_add_32) + + +START_FUNC(opal_atomic_sub_32) + push {r4, lr} + mov r4, r1 + mov r2, r0 + LSYM(6) + ldr r0, [r2] + ldr r3, REFLSYM(7) + sub r1, r0, r4 + blx r3 + bcc REFLSYM(6) + pop {r4, lr} + bx lr + .align 2 + LSYM(7) + .word 0xffff0fc0 +END_FUNC(opal_atomic_sub_32) diff -urNp openmpi-1.6.3_orig/opal/asm/base/ARMV6.asm openmpi-1.6.3/opal/asm/base/ARMV6.asm --- openmpi-1.6.3_orig/opal/asm/base/ARMV6.asm 1969-12-31 19:00:00.000000000 -0500 +++ openmpi-1.6.3/opal/asm/base/ARMV6.asm 2012-11-10 01:14:33.560297218 -0500 @@ -0,0 +1,153 @@ +START_FILE + TEXT + + ALIGN(4) +START_FUNC(opal_atomic_mb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_mb) + + +START_FUNC(opal_atomic_rmb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_rmb) + + +START_FUNC(opal_atomic_wmb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_wmb) + + +START_FUNC(opal_atomic_cmpset_32) + LSYM(1) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(2) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(1) + mov r0, #1 + LSYM(2) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_32) + + +START_FUNC(opal_atomic_cmpset_acq_32) + LSYM(3) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(4) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(3) + dmb + mov r0, #1 + LSYM(4) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_acq_32) + + +START_FUNC(opal_atomic_cmpset_rel_32) + LSYM(5) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(6) + dmb + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(4) + mov r0, #1 + LSYM(6) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_rel_32) + +#START_64BIT +START_FUNC(opal_atomic_cmpset_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(7) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(8) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(7) + mov r0, #1 + LSYM(8) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_64) + +START_FUNC(opal_atomic_cmpset_acq_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(9) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(10) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(9) + dmb + mov r0, #1 + LSYM(10) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_acq_64) + + +START_FUNC(opal_atomic_cmpset_rel_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(11) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(12) + dmb + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(11) + mov r0, #1 + LSYM(12) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_rel_64) +#END_64BIT + + +START_FUNC(opal_atomic_add_32) + LSYM(13) + ldrex r2, [r0] + add r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(13) + mov r0, r2 + bx lr +END_FUNC(opal_atomic_add_32) + + +START_FUNC(opal_atomic_sub_32) + LSYM(14) + ldrex r2, [r0] + sub r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(14) + mov r0, r2 + bx lr +END_FUNC(opal_atomic_sub_32) diff -urNp openmpi-1.6.3_orig/opal/asm/base/ARMV7.asm openmpi-1.6.3/opal/asm/base/ARMV7.asm --- openmpi-1.6.3_orig/opal/asm/base/ARMV7.asm 1969-12-31 19:00:00.000000000 -0500 +++ openmpi-1.6.3/opal/asm/base/ARMV7.asm 2012-04-03 10:29:44.000000000 -0400 @@ -0,0 +1,153 @@ +START_FILE + TEXT + + ALIGN(4) +START_FUNC(opal_atomic_mb) + dmb + bx lr +END_FUNC(opal_atomic_mb) + + +START_FUNC(opal_atomic_rmb) + dmb + bx lr +END_FUNC(opal_atomic_rmb) + + +START_FUNC(opal_atomic_wmb) + dmb + bx lr +END_FUNC(opal_atomic_wmb) + + +START_FUNC(opal_atomic_cmpset_32) + LSYM(1) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(2) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(1) + mov r0, #1 + LSYM(2) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_32) + + +START_FUNC(opal_atomic_cmpset_acq_32) + LSYM(3) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(4) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(3) + dmb + mov r0, #1 + LSYM(4) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_acq_32) + + +START_FUNC(opal_atomic_cmpset_rel_32) + LSYM(5) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(6) + dmb + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(4) + mov r0, #1 + LSYM(6) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_rel_32) + +#START_64BIT +START_FUNC(opal_atomic_cmpset_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(7) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(8) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(7) + mov r0, #1 + LSYM(8) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_64) + +START_FUNC(opal_atomic_cmpset_acq_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(9) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(10) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(9) + dmb + mov r0, #1 + LSYM(10) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_acq_64) + + +START_FUNC(opal_atomic_cmpset_rel_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(11) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(12) + dmb + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(11) + mov r0, #1 + LSYM(12) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_rel_64) +#END_64BIT + + +START_FUNC(opal_atomic_add_32) + LSYM(13) + ldrex r2, [r0] + add r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(13) + mov r0, r2 + bx lr +END_FUNC(opal_atomic_add_32) + + +START_FUNC(opal_atomic_sub_32) + LSYM(14) + ldrex r2, [r0] + sub r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(14) + mov r0, r2 + bx lr +END_FUNC(opal_atomic_sub_32) diff -urNp openmpi-1.6.3_orig/opal/asm/Makefile.am openmpi-1.6.3/opal/asm/Makefile.am --- openmpi-1.6.3_orig/opal/asm/Makefile.am 2012-04-03 10:29:44.000000000 -0400 +++ openmpi-1.6.3/opal/asm/Makefile.am 2012-11-11 02:32:34.933714963 -0500 @@ -65,7 +65,9 @@ EXTRA_DIST = \ base/default.conf \ base/ALPHA.asm \ base/AMD64.asm \ - base/ARM.asm \ + base/ARMV5.asm \ + base/ARMV6.asm \ + base/ARMV7.asm \ base/IA32.asm \ base/IA64.asm \ base/MIPS.asm \ diff -urNp openmpi-1.6.3_orig/opal/asm/Makefile.in openmpi-1.6.3/opal/asm/Makefile.in --- openmpi-1.6.3_orig/opal/asm/Makefile.in 2012-10-24 11:41:12.000000000 -0400 +++ openmpi-1.6.3/opal/asm/Makefile.in 2012-11-11 02:33:07.069715466 -0500 @@ -1154,7 +1154,9 @@ EXTRA_DIST = \ base/default.conf \ base/ALPHA.asm \ base/AMD64.asm \ - base/ARM.asm \ + base/ARMV5.asm \ + base/ARMV6.asm \ + base/ARMV7.asm \ base/IA32.asm \ base/IA64.asm \ base/MIPS.asm \ diff -urNp openmpi-1.6.3_orig/opal/config/opal_config_asm.m4 openmpi-1.6.3/opal/config/opal_config_asm.m4 --- openmpi-1.6.3_orig/opal/config/opal_config_asm.m4 2012-05-02 09:04:04.000000000 -0400 +++ openmpi-1.6.3/opal/config/opal_config_asm.m4 2012-11-11 02:31:38.429714062 -0500 @@ -904,6 +904,7 @@ AC_DEFUN([OMPI_CONFIG_ASM],[ ompi_cv_asm_arch="ARM" OPAL_ASM_SUPPORT_64BIT=1 OPAL_ASM_ARM_VERSION=7 + cp -f "$top_ompi_builddir/opal/asm/base/ARMV7.asm" "$top_ompi_builddir/opal/asm/base/ARM.asm" AC_DEFINE_UNQUOTED([OPAL_ASM_ARM_VERSION], [$OPAL_ASM_ARM_VERSION], [What ARM assembly version to use]) OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' @@ -913,6 +914,7 @@ AC_DEFUN([OMPI_CONFIG_ASM],[ ompi_cv_asm_arch="ARM" OPAL_ASM_SUPPORT_64BIT=0 OPAL_ASM_ARM_VERSION=6 + cp -f "$top_ompi_builddir/opal/asm/base/ARMV6.asm" "$top_ompi_builddir/opal/asm/base/ARM.asm" AC_DEFINE_UNQUOTED([OPAL_ASM_ARM_VERSION], [$OPAL_ASM_ARM_VERSION], [What ARM assembly version to use]) OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' @@ -923,6 +925,7 @@ AC_DEFUN([OMPI_CONFIG_ASM],[ ompi_cv_asm_arch="ARM" OPAL_ASM_SUPPORT_64BIT=0 OPAL_ASM_ARM_VERSION=5 + cp -f "$top_ompi_builddir/opal/asm/base/ARMV5.asm" "$top_ompi_builddir/opal/asm/base/ARM.asm" AC_DEFINE_UNQUOTED([OPAL_ASM_ARM_VERSION], [$OPAL_ASM_ARM_VERSION], [What ARM assembly version to use]) OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' Binary files openmpi-1.6.3_orig/opal/include/opal/sys/arm/.atomic.h.swp and openmpi-1.6.3/opal/include/opal/sys/arm/.atomic.h.swp differ
_______________________________________________ arm mailing list arm@xxxxxxxxxxxxxxxxxxxxxxx https://admin.fedoraproject.org/mailman/listinfo/arm