On 14/07/2023 20:18, Mason wrote: > For fun, I'm trying to write code propagating carries in bignums. > > gcc-trunk generates incorrect code, but I suspect that's > because my asm template is invalid (it's a byzantine syntax) > > > typedef unsigned long long u64; > > void testcase(u64 *acc, u64 a, u64 b) > { > asm("add %[LO], %[D0]\n\t" "adc %[HI], %[D1]\n" > "1:\n\t" "adc $0, %[D2]\n\t" "lea %[D1], %[ACC]\n\t" "jc 1b" : > [D0] "+m" (acc[0]), [D1] "+m" (acc[1]), [D2] "+m" (acc[2]), [ACC] "+r" (acc) : > [LO] "r" (a), [HI] "r" (b) : "cc"); > } > > void foo(u64 *acc, u64 a, u64 b) > { > testcase(acc+0, a, b); > testcase(acc+1, a, b); > } If I tweak the code as below, it works as expected, except I now have to mark the asm block as volatile. Why though? (And see below for offset optimization.) typedef unsigned long long u64; void testcase(u64 *acc, u64 a, u64 b) { asm volatile("add %[LO], 0(%[ACC])\n\t" "adc %[HI], 8(%[ACC])\n" "1:\n\t" "adc $0, 16(%[ACC])\n\t" "lea 8(%[ACC]), %[ACC]\n\t" "jc 1b" : [ACC] "+r" (acc) : [LO] "r" (a), [HI] "r" (b) : "cc"); } void foo(u64 *acc, u64 a, u64 b) { testcase(acc+0, a, b); testcase(acc+1, a, b); } testcase: add %rsi, 0(%rdi) adc %rdx, 8(%rdi) 1: adc $0, 16(%rdi) lea 8(%rdi), %rdi jc 1b # Everything checks out so far ret foo: movq %rdi, %rax # useless copy. caused by volatile? add %rsi, 0(%rax) adc %rdx, 8(%rax) 1: adc $0, 16(%rax) lea 8(%rax), %rax # that works jc 1b # sub-optimal add, gcc could have shifted the offset by 8... # EXCEPT it doesn't *know* about the offsets, because they're # hard-coded in the template... Back to square 2 then :) addq $8, %rdi add %rsi, 0(%rdi) adc %rdx, 8(%rdi) 1: adc $0, 16(%rdi) lea 8(%rdi), %rdi jc 1b ret Regards