On Sun, 28 Jan 2024, Vincent Mailhol wrote:
The compiler is not able to do constant folding on "asm volatile" code. Evaluate whether or not the function argument is a constant expression and if this is the case, return an equivalent builtin expression. On linux 6.7 with an allyesconfig and GCC 13.2.1, it saves roughly 11 KB. $ size --format=GNU vmlinux.before vmlinux.after text data bss total filename 60457964 70953697 2288644 133700305 vmlinux.before 60441196 70957057 2290724 133688977 vmlinux.after Reference: commit fdb6649ab7c1 ("x86/asm/bitops: Use __builtin_ctzl() to evaluate constant expressions") Link: https://git.kernel.org/torvalds/c/fdb6649ab7c1 Reviewed-by: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx> Signed-off-by: Vincent Mailhol <mailhol.vincent@xxxxxxxxxx> --- arch/m68k/include/asm/bitops.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index a8b23f897f24..02ec8a193b96 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -469,6 +469,9 @@ static __always_inline unsigned long ffz(unsigned long word) { int res; + if (__builtin_constant_p(word)) + return __builtin_ctzl(~word); + __asm__ __volatile__ ("bfffo %1{#0,#0},%0" : "=d" (res) : "d" (~word & -~word)); return res ^ 31;
If the builtin has the desired behaviour, why do we reimplement it in asm? Shouldn't we abandon one or the other to avoid having to prove (and maintain) their equivalence?
@@ -490,6 +493,9 @@ static __always_inline unsigned long ffz(unsigned long word) !defined(CONFIG_M68000) static __always_inline unsigned long __ffs(unsigned long x) { + if (__builtin_constant_p(x)) + return __builtin_ctzl(x); + __asm__ __volatile__ ("bitrev %0; ff1 %0" : "=d" (x) : "0" (x)); @@ -522,6 +528,9 @@ static __always_inline int ffs(int x) { int cnt; + if (__builtin_constant_p(x)) + return __builtin_ffs(x); + __asm__ ("bfffo %1{#0:#0},%0" : "=d" (cnt) : "dm" (x & -x)); @@ -540,6 +549,9 @@ static __always_inline int fls(unsigned int x) { int cnt; + if (__builtin_constant_p(x)) + return x ? BITS_PER_TYPE(x) - __builtin_clz(x) : 0; + __asm__ ("bfffo %1{#0,#0},%0" : "=d" (cnt) : "dm" (x));