Hi Xiao, On Tue, Oct 31, 2023 at 7:37 AM Xiao Wang <xiao.w.wang@xxxxxxxxx> wrote: > This patch leverages the alternative mechanism to dynamically optimize > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When > Zbb ext is not supported by the runtime CPU, legacy implementation is > used. If Zbb is supported, then the optimized variants will be selected > via alternative patching. > > The legacy bitops support is taken from the generic C implementation as > fallback. > > If the parameter is a build-time constant, we leverage compiler builtin to > calculate the result directly, this approach is inspired by x86 bitops > implementation. > > EFI stub runs before the kernel, so alternative mechanism should not be > used there, this patch introduces a macro NO_ALTERNATIVE for this purpose. > > Signed-off-by: Xiao Wang <xiao.w.wang@xxxxxxxxx> > Reviewed-by: Charlie Jenkins <charlie@xxxxxxxxxxxx> Thanks for your patch, which is now commit 457926b253200bd9 ("riscv: Optimize bitops with Zbb extension") in riscv/for-next. > --- a/arch/riscv/include/asm/bitops.h > +++ b/arch/riscv/include/asm/bitops.h > @@ -15,13 +15,261 @@ > #include <asm/barrier.h> > #include <asm/bitsperlong.h> > > +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) > #include <asm-generic/bitops/__ffs.h> > -#include <asm-generic/bitops/ffz.h> > -#include <asm-generic/bitops/fls.h> > #include <asm-generic/bitops/__fls.h> > +#include <asm-generic/bitops/ffs.h> > +#include <asm-generic/bitops/fls.h> > + > +#else > +#include <asm/alternative-macros.h> > +#include <asm/hwcap.h> > + > +#if (BITS_PER_LONG == 64) > +#define CTZW "ctzw " > +#define CLZW "clzw " > +#elif (BITS_PER_LONG == 32) > +#define CTZW "ctz " > +#define CLZW "clz " > +#else > +#error "Unexpected BITS_PER_LONG" > +#endif > + > +static __always_inline unsigned long variable__ffs(unsigned long word) > +{ > + int num; > + > + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, > + RISCV_ISA_EXT_ZBB, 1) > + : : : : legacy); > + > + asm volatile (".option push\n" > + ".option arch,+zbb\n" > + "ctz %0, %1\n" > + ".option pop\n" > + : "=r" (word) : "r" (word) :); > + > + return word; > + > +legacy: > + num = 0; > +#if BITS_PER_LONG == 64 > + if ((word & 0xffffffff) == 0) { > + num += 32; > + word >>= 32; > + } > +#endif > + if ((word & 0xffff) == 0) { > + num += 16; > + word >>= 16; > + } > + if ((word & 0xff) == 0) { > + num += 8; > + word >>= 8; > + } > + if ((word & 0xf) == 0) { > + num += 4; > + word >>= 4; > + } > + if ((word & 0x3) == 0) { > + num += 2; > + word >>= 2; > + } > + if ((word & 0x1) == 0) > + num += 1; > + return num; > +} Surely we can do better than duplicating include/asm-generic/bitops/__ffs.h? E.g. rename the generic implementation to generic___ffs(): -static __always_inline unsigned long __ffs(unsigned long word) +static __always_inline unsigned long generic__ffs(unsigned long word) { ... } +#ifndef __ffs +#define __ffs(x) generic__ffs(x) +#endif and explicitly calling the generic one here? Same comment for the other functions. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@xxxxxxxxxxxxxx In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds