Fast multipliers on ARM

Michael Robins via Gcc-help <gcc-help@xxxxxxxxxxx> · Fri, 2 Apr 2021 23:27:22 +0100

I am cross-compiling using "arm-none-eabi-gcc -mcpu=cortex-m0plus -O3" 
for a target architecture that performs a multiply in a single cycle, 
using gcc version 10.2.0 on a PC running Fedora Linux.

Is there an option to persuade the compiler to use the multiply 
instruction automatically instead of shifts and adds when multiplying by 
a constant?

In the example code below, gcc uses the trick of multiplying by a big 
number instead of dividing by a small one (12 in this case). For my 
target, the code from "-O3" is both longer and slower then that for "-Os".

foobar.c:

typedef struct {int x[3];} threeInts;
int foo(threeInts * p, threeInts * q)
{
    return p - q;
}
#pragma GCC push_options
#pragma GCC optimize("-Os")
int bar(threeInts * p, threeInts * q)
{
    return p - q;
}
#pragma GCC pop_options

foobar.s:

    .cpu cortex-m0plus
    .eabi_attribute 20, 1
    .eabi_attribute 21, 1
    .eabi_attribute 23, 3
    .eabi_attribute 24, 1
    .eabi_attribute 25, 1
    .eabi_attribute 26, 1
    .eabi_attribute 30, 2
    .eabi_attribute 34, 0
    .eabi_attribute 18, 4
    .file    "foobar.c"
    .text
    .align    1
    .p2align 2,,3
    .global    foo
    .arch armv6s-m
    .syntax unified
    .code    16
    .thumb_func
    .fpu softvfp
    .type    foo, %function
foo:
    @ args = 0, pretend = 0, frame = 0
    @ frame_needed = 0, uses_anonymous_args = 0
    @ link register save eliminated.
    subs    r1, r0, r1
    asrs    r1, r1, #2
    lsls    r3, r1, #2
    adds    r3, r3, r1
    lsls    r0, r3, #4
    adds    r3, r3, r0
    lsls    r0, r3, #8
    adds    r3, r3, r0
    lsls    r0, r3, #16
    adds    r0, r3, r0
    lsls    r0, r0, #1
    adds    r0, r0, r1
    @ sp needed
    bx    lr
    .size    foo, .-foo
    .align    1
    .global    bar
    .syntax unified
    .code    16
    .thumb_func
    .fpu softvfp
    .type    bar, %function
bar:
    @ args = 0, pretend = 0, frame = 0
    @ frame_needed = 0, uses_anonymous_args = 0
    @ link register save eliminated.
    subs    r0, r0, r1
    ldr    r1, .L4
    asrs    r0, r0, #2
    muls    r0, r1
    @ sp needed
    bx    lr
.L5:
    .align    2
.L4:
    .word    -1431655765
    .size    bar, .-bar
    .ident    "GCC: (GNU) 10.2.0"

Kind regards

Mike Robins