Hello, For fun, I'm trying to write code propagating carries in bignums. gcc-trunk generates incorrect code, but I suspect that's because my asm template is invalid (it's a byzantine syntax) typedef unsigned long long u64; void testcase(u64 *acc, u64 a, u64 b) { asm("add %[LO], %[D0]\n\t" "adc %[HI], %[D1]\n" "1:\n\t" "adc $0, %[D2]\n\t" "lea %[D1], %[ACC]\n\t" "jc 1b" : [D0] "+m" (acc[0]), [D1] "+m" (acc[1]), [D2] "+m" (acc[2]), [ACC] "+r" (acc) : [LO] "r" (a), [HI] "r" (b) : "cc"); } void foo(u64 *acc, u64 a, u64 b) { testcase(acc+0, a, b); testcase(acc+1, a, b); } $ gcc-trunk -O3 -march=znver1 -S testcase: add %rsi, (%rdi) # acc[0] += a adc %rdx, 8(%rdi) # acc[1] += b + cf 1: adc $0, 16(%rdi) # acc[2] += cf lea 8(%rdi), %rdi # ++acc jc 1b # loop until cf = 0 ret foo: leaq 8(%rdi), %rcx # rcx = acc+1 movq %rdi, %rax # rax = acc add %rsi, (%rax) # acc[0] += a adc %rdx, 8(%rax) # acc[1] += b + cf 1: adc $0, 16(%rax) # acc[2] += cf lea 8(%rax), %rax # ++acc jc 1b # So far, so good movq %rcx, %rax # rax = acc+1 add %rsi, (%rax) # acc[1] += a # Not sure why we switch to rdi instead of rax at this point, but why not... adc %rdx, 16(%rdi) # acc[2] += b + cf 1: adc $0, 24(%rdi) # acc[3] += cf # BROKEN: increments wrong reg, and by 2 lea 16(%rdi), %rax jc 1b ret I guess I'm using the wrong constraints? Regards