Hi,
The attached codes, snipped from linux kernel source code produce
wrong code above -O1.
$ arm-linux-gcc -O2 -c div64.S
$ arm-linux-gcc -O2 -save-temps dodiv.c div64.o -o dodiv
actual result:
$ ./dodiv
tbuf: <4>[ 52.305419]
expected result:
tbuf: <4>[ 0.305419]
when compiled with -fno-tree-ter, the result was OK.
I'm using gcc-4.1.2.
The problem comes from bad argument passing to sprintf(), looking from
dodiv.s:
...
mov r2, r5 <<<<<< (1)
mov ip, ip, lsr #6
mov r3, r5 <<<<<< (2)
ldr r1, .L10+8
mov r0, r4
str ip, [sp, #0]
bl sprintf
Where (1) is loglev_char argument and (2) is (unsigned long)t argument
in sprintf() in dodiv.c. Apparently, the loglev_char is also assigned
to the first %5lu part of sprintf, which is the cause.
I'm not sure if this is a gcc bug, since in div64.S obtained from
linux kernel, comments that it is non standard for optimal code.
Any hint/advice please?
Thanks in Advance.
(Hiroki Kaminaga)
t
--
#include <stdio.h>
#include <alloca.h>
#define __xl "r0"
#define __xh "r1"
#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t"
#define do_div(n,base) \
({ \
register unsigned int __base asm("r4") = base; \
register unsigned long long __n asm("r0") = n; \
register unsigned long long __res asm("r2"); \
register unsigned int __rem asm(__xh); \
asm( __asmeq("%0", __xh) \
__asmeq("%1", "r2") \
__asmeq("%2", "r0") \
__asmeq("%3", "r4") \
"bl __do_div64" \
: "=r" (__rem), "=r" (__res) \
: "r" (__n), "r" (__base) \
: "ip", "lr", "cc"); \
n = __res; \
__rem; \
})
#define default_message_loglevel 4
unsigned long long printk_clock(void){
return (unsigned long long) 0x12345678;
}
int main(void)
{
int loglev_char;
char *p;
char tbuf[50], *tp;
unsigned tlen;
unsigned long long t;
unsigned long nanosec_rem;
p = alloca(1024);
if (p[0] == '<' ) {
loglev_char = p[1];
} else {
loglev_char = default_message_loglevel + '0';
}
t = printk_clock();
nanosec_rem = do_div(t, 1000000000);
tlen = sprintf(tbuf,
"<%c>[%5lu.%06lu] ",
loglev_char,
(unsigned long)t,
nanosec_rem/1000);
printf("tbuf: %s\n", tbuf);
return 0;
}
/*
* linux/arch/arm/lib/div64.S
*
* Optimized computation of 64-bit dividend / 32-bit divisor
*
* Author: Nicolas Pitre
* Created: Oct 5, 2003
* Copyright: Monta Vista Software, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
//#include <linux/linkage.h>
#define ALIGN .align 4,0x90
#define ENTRY(name) \
.globl name ; \
ALIGN ; \
name:
#ifdef __ARMEB__
#define xh r0
#define xl r1
#define yh r2
#define yl r3
#else
#define xl r0
#define xh r1
#define yl r2
#define yh r3
#endif
/*
* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
*
* Note: Calling convention is totally non standard for optimal code.
* This is meant to be used by do_div() from include/asm/div64.h only.
*
* Input parameters:
* xh-xl = dividend (clobbered)
* r4 = divisor (preserved)
*
* Output values:
* yh-yl = result
* xh = remainder
*
* Clobbered regs: xl, ip
*/
ENTRY(__do_div64)
@ Test for easy paths first.
subs ip, r4, #1
bls 9f @ divisor is 0 or 1
tst ip, r4
beq 8f @ divisor is power of 2
@ See if we need to handle upper 32-bit result.
cmp xh, r4
mov yh, #0
blo 3f
@ Align divisor with upper part of dividend.
@ The aligned divisor is stored in yl preserving the original.
@ The bit position is stored in ip.
#if __LINUX_ARM_ARCH__ >= 5
clz yl, r4
clz ip, xh
sub yl, yl, ip
mov ip, #1
mov ip, ip, lsl yl
mov yl, r4, lsl yl
#else
mov yl, r4
mov ip, #1
1: cmp yl, #0x80000000
cmpcc yl, xh
movcc yl, yl, lsl #1
movcc ip, ip, lsl #1
bcc 1b
#endif
@ The division loop for needed upper bit positions.
@ Break out early if dividend reaches 0.
2: cmp xh, yl
orrcs yh, yh, ip
subcss xh, xh, yl
movnes ip, ip, lsr #1
mov yl, yl, lsr #1
bne 2b
@ See if we need to handle lower 32-bit result.
3: cmp xh, #0
mov yl, #0
cmpeq xl, r4
movlo xh, xl
movlo pc, lr
@ The division loop for lower bit positions.
@ Here we shift remainer bits leftwards rather than moving the
@ divisor for comparisons, considering the carry-out bit as well.
mov ip, #0x80000000
4: movs xl, xl, lsl #1
adcs xh, xh, xh
beq 6f
cmpcc xh, r4
5: orrcs yl, yl, ip
subcs xh, xh, r4
movs ip, ip, lsr #1
bne 4b
mov pc, lr
@ The top part of remainder became zero. If carry is set
@ (the 33th bit) this is a false positive so resume the loop.
@ Otherwise, if lower part is also null then we are done.
6: bcs 5b
cmp xl, #0
moveq pc, lr
@ We still have remainer bits in the low part. Bring them up.
#if __LINUX_ARM_ARCH__ >= 5
clz xh, xl @ we know xh is zero here so...
add xh, xh, #1
mov xl, xl, lsl xh
mov ip, ip, lsr xh
#else
7: movs xl, xl, lsl #1
mov ip, ip, lsr #1
bcc 7b
#endif
@ Current remainder is now 1. It is worthless to compare with
@ divisor at this point since divisor can not be smaller than 3 here.
@ If possible, branch for another shift in the division loop.
@ If no bit position left then we are done.
movs ip, ip, lsr #1
mov xh, #1
bne 4b
mov pc, lr
8: @ Division by a power of 2: determine what that divisor order is
@ then simply shift values around
#if __LINUX_ARM_ARCH__ >= 5
clz ip, r4
rsb ip, ip, #31
#else
mov yl, r4
cmp r4, #(1 << 16)
mov ip, #0
movhs yl, yl, lsr #16
movhs ip, #16
cmp yl, #(1 << 8)
movhs yl, yl, lsr #8
addhs ip, ip, #8
cmp yl, #(1 << 4)
movhs yl, yl, lsr #4
addhs ip, ip, #4
cmp yl, #(1 << 2)
addhi ip, ip, #3
addls ip, ip, yl, lsr #1
#endif
mov yh, xh, lsr ip
mov yl, xl, lsr ip
rsb ip, ip, #32
orr yl, yl, xh, lsl ip
mov xh, xl, lsl ip
mov xh, xh, lsr ip
mov pc, lr
@ eq -> division by 1: obvious enough...
9: moveq yl, xl
moveq yh, xh
moveq xh, #0
moveq pc, lr
@ Division by 0:
str lr, [sp, #-4]!
bl __div0
@ as wrong as it could be...
mov yl, #0
mov yh, #0
mov xh, #0
ldr pc, [sp], #4