The following code (extracted from OpenSSL)
#include <sys/types.h>
typedef unsigned long BN_ULONG;
BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG
*bp,int n)
{ BN_ULONG ret;
size_t i=0;
if (n <= 0) return 0;
asm (
" subq %0,%0 \n" /* clear borrow */
" jmp 1f \n"
".p2align 4 \n"
"1: movq (%4,%2,8),%0 \n"
" sbbq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
" lea 1(%2),%2 \n"
" loop 1b \n"
" sbbq %0,%0 \n"
: "=r"(ret),"+c"(n),"+r"(i)
: "r"(rp),"r"(ap),"r"(bp)
: "cc"
);
return ret&1;
}
produces this output when compiled with "-O1 -S":
bn_sub_words:
.LFB3:
.cfi_startproc
testl %ecx, %ecx
jle .L3
movl $0, %r8d
#APP
# 11 "t.c" 1
subq %rdi,%rdi
jmp 1f
.p2align 4
1: movq (%rsi,%r8,8),%rdi
sbbq (%rdx,%r8,8),%rdi
movq %rdi,(%rdi,%r8,8)
lea 1(%r8),%r8
loop 1b
sbbq %rdi,%rdi
# 0 "" 2
#NO_APP
movq %rdi, %rax
andl $1, %eax
ret
.L3:
movl $0, %eax
ret
.cfi_endproc
.LFE3:
Note how %0 and %3 are assigned the same register. Changing "=r" to
"+r" fixes this. If an output-only operand must be modified last, the
original register assignment would be correct, but the Extended Asm
documentation does not require such a thing.
I saw this with gcc-4.7.2-2.fc17.x86_64 from Fedora, but GCC mainline as
of today shows the same behavior.
--
Florian Weimer / Red Hat Product Security Team