On 11/11/2012 03:56 AM, Jon Masters wrote: > This is an ugly kludge with some hand crafted assembly, but it builds > locally, and I think the scratch build should succeed this time. Note, I > was confused by the fact that there are at least *two* different sets of > atomics implemented in OMPI (it's a mess). With this, both are now > fixed, regardless of which compiler is being used. I've updated the patch, following useful feedback from Nicolas Pitre. There is at least one bug in the current code (r3 clobbering, bad use of r12 because I knew it wasn't touched and didn't want to save it, which was just plain lazy and shouldn't have been done). I've tested these fixes in a little test harness and have a scratch going now: http://arm.koji.fedoraproject.org/koji/taskinfo?taskID=1250144 Assuming that completes ok, let's pull this newer version in. Note that a previous scratch build failed due to being unable to make a buildroot. There's something up on a few builders I think. Jon.
diff -urNp openmpi-1.6.3_orig/opal/asm/base/ARM.asm openmpi-1.6.3/opal/asm/base/ARM.asm --- openmpi-1.6.3_orig/opal/asm/base/ARM.asm 2012-04-03 10:29:44.000000000 -0400 +++ openmpi-1.6.3/opal/asm/base/ARM.asm 1969-12-31 19:00:00.000000000 -0500 @@ -1,153 +0,0 @@ -START_FILE - TEXT - - ALIGN(4) -START_FUNC(opal_atomic_mb) - dmb - bx lr -END_FUNC(opal_atomic_mb) - - -START_FUNC(opal_atomic_rmb) - dmb - bx lr -END_FUNC(opal_atomic_rmb) - - -START_FUNC(opal_atomic_wmb) - dmb - bx lr -END_FUNC(opal_atomic_wmb) - - -START_FUNC(opal_atomic_cmpset_32) - LSYM(1) - ldrex r3, [r0] - cmp r1, r3 - bne REFLSYM(2) - strex r12, r2, [r0] - cmp r12, #0 - bne REFLSYM(1) - mov r0, #1 - LSYM(2) - movne r0, #0 - bx lr -END_FUNC(opal_atomic_cmpset_32) - - -START_FUNC(opal_atomic_cmpset_acq_32) - LSYM(3) - ldrex r3, [r0] - cmp r1, r3 - bne REFLSYM(4) - strex r12, r2, [r0] - cmp r12, #0 - bne REFLSYM(3) - dmb - mov r0, #1 - LSYM(4) - movne r0, #0 - bx lr -END_FUNC(opal_atomic_cmpset_acq_32) - - -START_FUNC(opal_atomic_cmpset_rel_32) - LSYM(5) - ldrex r3, [r0] - cmp r1, r3 - bne REFLSYM(6) - dmb - strex r12, r2, [r0] - cmp r12, #0 - bne REFLSYM(4) - mov r0, #1 - LSYM(6) - movne r0, #0 - bx lr -END_FUNC(opal_atomic_cmpset_rel_32) - -#START_64BIT -START_FUNC(opal_atomic_cmpset_64) - push {r4-r7} - ldrd r6, r7, [sp, #16] - LSYM(7) - ldrexd r4, r5, [r0] - cmp r4, r2 - it eq - cmpeq r5, r3 - bne REFLSYM(8) - strexd r1, r6, r7, [r0] - cmp r1, #0 - bne REFLSYM(7) - mov r0, #1 - LSYM(8) - movne r0, #0 - pop {r4-r7} - bx lr -END_FUNC(opal_atomic_cmpset_64) - -START_FUNC(opal_atomic_cmpset_acq_64) - push {r4-r7} - ldrd r6, r7, [sp, #16] - LSYM(9) - ldrexd r4, r5, [r0] - cmp r4, r2 - it eq - cmpeq r5, r3 - bne REFLSYM(10) - strexd r1, r6, r7, [r0] - cmp r1, #0 - bne REFLSYM(9) - dmb - mov r0, #1 - LSYM(10) - movne r0, #0 - pop {r4-r7} - bx lr -END_FUNC(opal_atomic_cmpset_acq_64) - - -START_FUNC(opal_atomic_cmpset_rel_64) - push {r4-r7} - ldrd r6, r7, [sp, #16] - LSYM(11) - ldrexd r4, r5, [r0] - cmp r4, r2 - it eq - cmpeq r5, r3 - bne REFLSYM(12) - dmb - strexd r1, r6, r7, [r0] - cmp r1, #0 - bne REFLSYM(11) - mov r0, #1 - LSYM(12) - movne r0, #0 - pop {r4-r7} - bx lr -END_FUNC(opal_atomic_cmpset_rel_64) -#END_64BIT - - -START_FUNC(opal_atomic_add_32) - LSYM(13) - ldrex r2, [r0] - add r2, r2, r1 - strex r3, r2, [r0] - cmp r3, #0 - bne REFLSYM(13) - mov r0, r2 - bx lr -END_FUNC(opal_atomic_add_32) - - -START_FUNC(opal_atomic_sub_32) - LSYM(14) - ldrex r2, [r0] - sub r2, r2, r1 - strex r3, r2, [r0] - cmp r3, #0 - bne REFLSYM(14) - mov r0, r2 - bx lr -END_FUNC(opal_atomic_sub_32) diff -urNp openmpi-1.6.3_orig/opal/asm/base/ARMV5.asm openmpi-1.6.3/opal/asm/base/ARMV5.asm --- openmpi-1.6.3_orig/opal/asm/base/ARMV5.asm 1969-12-31 19:00:00.000000000 -0500 +++ openmpi-1.6.3/opal/asm/base/ARMV5.asm 2012-11-13 13:04:54.205593809 -0500 @@ -0,0 +1,109 @@ +START_FILE + TEXT + + ALIGN(4) +START_FUNC(opal_atomic_mb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_mb) + + +START_FUNC(opal_atomic_rmb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_rmb) + + +START_FUNC(opal_atomic_wmb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_wmb) + + +START_FUNC(opal_atomic_cmpset_32) + push {lr} + mov r3, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + ldr r3, REFLSYM(1) + blx r3 + movcc r0, #0 + movcs r0, #1 + pop {lr} + bx lr + .align 2 + LSYM(1) + .word 0xffff0fc0 +END_FUNC(opal_atomic_cmpset_32) + + +START_FUNC(opal_atomic_cmpset_acq_32) + push {lr} + mov r3, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + ldr r3, REFLSYM(2) + blx r3 + movcc r0, #0 + movcs r0, #1 + pop {lr} + bx lr + .align 2 + LSYM(2) + .word 0xffff0fc0 +END_FUNC(opal_atomic_cmpset_acq_32) + + +START_FUNC(opal_atomic_cmpset_rel_32) + push {lr} + mov r3, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + ldr r3, REFLSYM(3) + blx r3 + movcc r0, #0 + movcs r0, #1 + pop {lr} + bx lr + .align 2 + LSYM(3) + .word 0xffff0fc0 +END_FUNC(opal_atomic_cmpset_rel_32) + +START_FUNC(opal_atomic_add_32) + push {r4, lr} + mov r4, r1 + mov r2, r0 + LSYM(4) + ldr r0, [r2] + ldr r3, REFLSYM(5) + add r1, r0, r4 + blx r3 + bcc REFLSYM(4) + pop {r4, lr} + bx lr + .align 2 + LSYM(5) + .word 0xffff0fc0 +END_FUNC(opal_atomic_add_32) + + +START_FUNC(opal_atomic_sub_32) + push {r4, lr} + mov r4, r1 + mov r2, r0 + LSYM(6) + ldr r0, [r2] + ldr r3, REFLSYM(7) + sub r1, r0, r4 + blx r3 + bcc REFLSYM(6) + pop {r4, lr} + bx lr + .align 2 + LSYM(7) + .word 0xffff0fc0 +END_FUNC(opal_atomic_sub_32) diff -urNp openmpi-1.6.3_orig/opal/asm/base/ARMV6.asm openmpi-1.6.3/opal/asm/base/ARMV6.asm --- openmpi-1.6.3_orig/opal/asm/base/ARMV6.asm 1969-12-31 19:00:00.000000000 -0500 +++ openmpi-1.6.3/opal/asm/base/ARMV6.asm 2012-11-10 01:14:33.560297218 -0500 @@ -0,0 +1,153 @@ +START_FILE + TEXT + + ALIGN(4) +START_FUNC(opal_atomic_mb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_mb) + + +START_FUNC(opal_atomic_rmb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_rmb) + + +START_FUNC(opal_atomic_wmb) + mcr p15, 0, r0, c7, c10, 5 + bx lr +END_FUNC(opal_atomic_wmb) + + +START_FUNC(opal_atomic_cmpset_32) + LSYM(1) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(2) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(1) + mov r0, #1 + LSYM(2) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_32) + + +START_FUNC(opal_atomic_cmpset_acq_32) + LSYM(3) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(4) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(3) + dmb + mov r0, #1 + LSYM(4) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_acq_32) + + +START_FUNC(opal_atomic_cmpset_rel_32) + LSYM(5) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(6) + dmb + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(4) + mov r0, #1 + LSYM(6) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_rel_32) + +#START_64BIT +START_FUNC(opal_atomic_cmpset_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(7) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(8) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(7) + mov r0, #1 + LSYM(8) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_64) + +START_FUNC(opal_atomic_cmpset_acq_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(9) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(10) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(9) + dmb + mov r0, #1 + LSYM(10) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_acq_64) + + +START_FUNC(opal_atomic_cmpset_rel_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(11) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(12) + dmb + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(11) + mov r0, #1 + LSYM(12) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_rel_64) +#END_64BIT + + +START_FUNC(opal_atomic_add_32) + LSYM(13) + ldrex r2, [r0] + add r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(13) + mov r0, r2 + bx lr +END_FUNC(opal_atomic_add_32) + + +START_FUNC(opal_atomic_sub_32) + LSYM(14) + ldrex r2, [r0] + sub r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(14) + mov r0, r2 + bx lr +END_FUNC(opal_atomic_sub_32) diff -urNp openmpi-1.6.3_orig/opal/asm/base/ARMV7.asm openmpi-1.6.3/opal/asm/base/ARMV7.asm --- openmpi-1.6.3_orig/opal/asm/base/ARMV7.asm 1969-12-31 19:00:00.000000000 -0500 +++ openmpi-1.6.3/opal/asm/base/ARMV7.asm 2012-04-03 10:29:44.000000000 -0400 @@ -0,0 +1,153 @@ +START_FILE + TEXT + + ALIGN(4) +START_FUNC(opal_atomic_mb) + dmb + bx lr +END_FUNC(opal_atomic_mb) + + +START_FUNC(opal_atomic_rmb) + dmb + bx lr +END_FUNC(opal_atomic_rmb) + + +START_FUNC(opal_atomic_wmb) + dmb + bx lr +END_FUNC(opal_atomic_wmb) + + +START_FUNC(opal_atomic_cmpset_32) + LSYM(1) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(2) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(1) + mov r0, #1 + LSYM(2) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_32) + + +START_FUNC(opal_atomic_cmpset_acq_32) + LSYM(3) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(4) + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(3) + dmb + mov r0, #1 + LSYM(4) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_acq_32) + + +START_FUNC(opal_atomic_cmpset_rel_32) + LSYM(5) + ldrex r3, [r0] + cmp r1, r3 + bne REFLSYM(6) + dmb + strex r12, r2, [r0] + cmp r12, #0 + bne REFLSYM(4) + mov r0, #1 + LSYM(6) + movne r0, #0 + bx lr +END_FUNC(opal_atomic_cmpset_rel_32) + +#START_64BIT +START_FUNC(opal_atomic_cmpset_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(7) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(8) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(7) + mov r0, #1 + LSYM(8) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_64) + +START_FUNC(opal_atomic_cmpset_acq_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(9) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(10) + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(9) + dmb + mov r0, #1 + LSYM(10) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_acq_64) + + +START_FUNC(opal_atomic_cmpset_rel_64) + push {r4-r7} + ldrd r6, r7, [sp, #16] + LSYM(11) + ldrexd r4, r5, [r0] + cmp r4, r2 + it eq + cmpeq r5, r3 + bne REFLSYM(12) + dmb + strexd r1, r6, r7, [r0] + cmp r1, #0 + bne REFLSYM(11) + mov r0, #1 + LSYM(12) + movne r0, #0 + pop {r4-r7} + bx lr +END_FUNC(opal_atomic_cmpset_rel_64) +#END_64BIT + + +START_FUNC(opal_atomic_add_32) + LSYM(13) + ldrex r2, [r0] + add r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(13) + mov r0, r2 + bx lr +END_FUNC(opal_atomic_add_32) + + +START_FUNC(opal_atomic_sub_32) + LSYM(14) + ldrex r2, [r0] + sub r2, r2, r1 + strex r3, r2, [r0] + cmp r3, #0 + bne REFLSYM(14) + mov r0, r2 + bx lr +END_FUNC(opal_atomic_sub_32) diff -urNp openmpi-1.6.3_orig/opal/asm/Makefile.am openmpi-1.6.3/opal/asm/Makefile.am --- openmpi-1.6.3_orig/opal/asm/Makefile.am 2012-04-03 10:29:44.000000000 -0400 +++ openmpi-1.6.3/opal/asm/Makefile.am 2012-11-11 02:32:34.933714963 -0500 @@ -65,7 +65,9 @@ EXTRA_DIST = \ base/default.conf \ base/ALPHA.asm \ base/AMD64.asm \ - base/ARM.asm \ + base/ARMV5.asm \ + base/ARMV6.asm \ + base/ARMV7.asm \ base/IA32.asm \ base/IA64.asm \ base/MIPS.asm \ diff -urNp openmpi-1.6.3_orig/opal/asm/Makefile.in openmpi-1.6.3/opal/asm/Makefile.in --- openmpi-1.6.3_orig/opal/asm/Makefile.in 2012-10-24 11:41:12.000000000 -0400 +++ openmpi-1.6.3/opal/asm/Makefile.in 2012-11-11 02:33:07.069715466 -0500 @@ -1154,7 +1154,9 @@ EXTRA_DIST = \ base/default.conf \ base/ALPHA.asm \ base/AMD64.asm \ - base/ARM.asm \ + base/ARMV5.asm \ + base/ARMV6.asm \ + base/ARMV7.asm \ base/IA32.asm \ base/IA64.asm \ base/MIPS.asm \ diff -urNp openmpi-1.6.3_orig/opal/config/opal_config_asm.m4 openmpi-1.6.3/opal/config/opal_config_asm.m4 --- openmpi-1.6.3_orig/opal/config/opal_config_asm.m4 2012-05-02 09:04:04.000000000 -0400 +++ openmpi-1.6.3/opal/config/opal_config_asm.m4 2012-11-11 02:31:38.429714062 -0500 @@ -904,6 +904,7 @@ AC_DEFUN([OMPI_CONFIG_ASM],[ ompi_cv_asm_arch="ARM" OPAL_ASM_SUPPORT_64BIT=1 OPAL_ASM_ARM_VERSION=7 + cp -f "$top_ompi_builddir/opal/asm/base/ARMV7.asm" "$top_ompi_builddir/opal/asm/base/ARM.asm" AC_DEFINE_UNQUOTED([OPAL_ASM_ARM_VERSION], [$OPAL_ASM_ARM_VERSION], [What ARM assembly version to use]) OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' @@ -913,6 +914,7 @@ AC_DEFUN([OMPI_CONFIG_ASM],[ ompi_cv_asm_arch="ARM" OPAL_ASM_SUPPORT_64BIT=0 OPAL_ASM_ARM_VERSION=6 + cp -f "$top_ompi_builddir/opal/asm/base/ARMV6.asm" "$top_ompi_builddir/opal/asm/base/ARM.asm" AC_DEFINE_UNQUOTED([OPAL_ASM_ARM_VERSION], [$OPAL_ASM_ARM_VERSION], [What ARM assembly version to use]) OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' @@ -923,6 +925,7 @@ AC_DEFUN([OMPI_CONFIG_ASM],[ ompi_cv_asm_arch="ARM" OPAL_ASM_SUPPORT_64BIT=0 OPAL_ASM_ARM_VERSION=5 + cp -f "$top_ompi_builddir/opal/asm/base/ARMV5.asm" "$top_ompi_builddir/opal/asm/base/ARM.asm" AC_DEFINE_UNQUOTED([OPAL_ASM_ARM_VERSION], [$OPAL_ASM_ARM_VERSION], [What ARM assembly version to use]) OMPI_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' Binary files openmpi-1.6.3_orig/opal/include/opal/sys/arm/.atomic.h.swp and openmpi-1.6.3/opal/include/opal/sys/arm/.atomic.h.swp differ
_______________________________________________ arm mailing list arm@xxxxxxxxxxxxxxxxxxxxxxx https://admin.fedoraproject.org/mailman/listinfo/arm