Hi, > -----Original Message----- > From: Anup Patel <anup@xxxxxxxxxxxxxx> > Sent: Tuesday, August 29, 2023 7:08 PM > To: Wang, Xiao W <xiao.w.wang@xxxxxxxxx> > Cc: paul.walmsley@xxxxxxxxxx; palmer@xxxxxxxxxxx; > aou@xxxxxxxxxxxxxxxxx; ardb@xxxxxxxxxx; Li, Haicheng > <haicheng.li@xxxxxxxxx>; linux-riscv@xxxxxxxxxxxxxxxxxxx; linux- > efi@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx > Subject: Re: [PATCH] RISC-V: Optimize bitops with Zbb extension > > On Sun, Aug 6, 2023 at 8:09 AM Xiao Wang <xiao.w.wang@xxxxxxxxx> wrote: > > > > This patch leverages the alternative mechanism to dynamically optimize > > bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When > > Zbb ext is not supported by the runtime CPU, legacy implementation is > > used. If Zbb is supported, then the optimized variants will be selected > > via alternative patching. > > > > The legacy bitops support is taken from the generic C implementation as > > fallback. > > > > If the parameter is a build-time constant, we leverage compiler builtin to > > calculate the result directly, this approach is inspired by x86 bitops > > implementation. > > > > EFI stub runs before the kernel, so alternative mechanism should not be > > used there, this patch introduces a macro EFI_NO_ALTERNATIVE for this > > purpose. > > I am getting the following compile error with this patch: > > GEN Makefile > UPD include/config/kernel.release > UPD include/generated/utsrelease.h > CC kernel/bounds.s > In file included from /home/anup/Work/riscv- > test/linux/include/linux/bitmap.h:9, > from > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/cpufeature.h:9, > from > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/hwcap.h:90, It looks there's a cyclic header including, which leads to this build error. I checked https://github.com/kvm-riscv/linux/tree/master and https://github.com/torvalds/linux/tree/master, but I don't see "asm/cpufeature.h" is included in asm/hwcap.h:90, maybe I miss something, could you help point me to the repo/branch I should work on? Thanks, Xiao > from > /home/anup/Work/riscv-test/linux/arch/riscv/include/asm/bitops.h:26, > from > /home/anup/Work/riscv-test/linux/include/linux/bitops.h:68, > from /home/anup/Work/riscv-test/linux/include/linux/log2.h:12, > from /home/anup/Work/riscv-test/linux/kernel/bounds.c:13: > /home/anup/Work/riscv-test/linux/include/linux/find.h: In function > 'find_next_bit': > /home/anup/Work/riscv-test/linux/include/linux/find.h:64:30: error: > implicit declaration of function '__ffs' > [-Werror=implicit-function-declaration] > 64 | return val ? __ffs(val) : size; > > Regards, > Anup > > > > > > Signed-off-by: Xiao Wang <xiao.w.wang@xxxxxxxxx> > > --- > > arch/riscv/include/asm/bitops.h | 266 +++++++++++++++++++++++++- > > drivers/firmware/efi/libstub/Makefile | 2 +- > > 2 files changed, 264 insertions(+), 4 deletions(-) > > > > diff --git a/arch/riscv/include/asm/bitops.h > b/arch/riscv/include/asm/bitops.h > > index 3540b690944b..f727f6489cd5 100644 > > --- a/arch/riscv/include/asm/bitops.h > > +++ b/arch/riscv/include/asm/bitops.h > > @@ -15,13 +15,273 @@ > > #include <asm/barrier.h> > > #include <asm/bitsperlong.h> > > > > +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(EFI_NO_ALTERNATIVE) > > #include <asm-generic/bitops/__ffs.h> > > -#include <asm-generic/bitops/ffz.h> > > -#include <asm-generic/bitops/fls.h> > > #include <asm-generic/bitops/__fls.h> > > +#include <asm-generic/bitops/ffs.h> > > +#include <asm-generic/bitops/fls.h> > > + > > +#else > > +#include <asm/alternative-macros.h> > > +#include <asm/hwcap.h> > > + > > +#if (BITS_PER_LONG == 64) > > +#define CTZW "ctzw " > > +#define CLZW "clzw " > > +#elif (BITS_PER_LONG == 32) > > +#define CTZW "ctz " > > +#define CLZW "clz " > > +#else > > +#error "Unexpected BITS_PER_LONG" > > +#endif > > + > > +static __always_inline unsigned long variable__ffs(unsigned long word) > > +{ > > + int num; > > + > > + asm_volatile_goto( > > + ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) > > + : : : : legacy); > > + > > + asm volatile ( > > + ".option push\n" > > + ".option arch,+zbb\n" > > + "ctz %0, %1\n" > > + ".option pop\n" > > + : "=r" (word) : "r" (word) :); > > + > > + return word; > > + > > +legacy: > > + num = 0; > > +#if BITS_PER_LONG == 64 > > + if ((word & 0xffffffff) == 0) { > > + num += 32; > > + word >>= 32; > > + } > > +#endif > > + if ((word & 0xffff) == 0) { > > + num += 16; > > + word >>= 16; > > + } > > + if ((word & 0xff) == 0) { > > + num += 8; > > + word >>= 8; > > + } > > + if ((word & 0xf) == 0) { > > + num += 4; > > + word >>= 4; > > + } > > + if ((word & 0x3) == 0) { > > + num += 2; > > + word >>= 2; > > + } > > + if ((word & 0x1) == 0) > > + num += 1; > > + return num; > > +} > > + > > +/** > > + * __ffs - find first set bit in a long word > > + * @word: The word to search > > + * > > + * Undefined if no set bit exists, so code should check against 0 first. > > + */ > > +#define __ffs(word) \ > > + (__builtin_constant_p(word) ? \ > > + (unsigned long)__builtin_ctzl(word) : \ > > + variable__ffs(word)) > > + [...]