On Tue, Sep 26, 2023 at 08:20:04AM +0300, Andy Shevchenko wrote: > These helpers are the optimized versions of the bitmap_remap() > where one of the bitmaps (source or destination) is of sequential bits. If so, can you add a test that makes sure that new API is consistent with the old bitmap_remap? And also provide numbers how well are they optimized, comparing to bitmap_remap. > See more in the kernel documentation of the helpers. I grepped the whole kernel, not only Documentation directory, and found nothing... > Signed-off-by: Andy Shevchenko <andriy.shevchenko@xxxxxxxxxxxxxxx> > --- > include/linux/bitmap.h | 9 ++++++ > lib/bitmap.c | 70 ++++++++++++++++++++++++++++++++++++++++++ > lib/test_bitmap.c | 23 ++++++++++++++ > 3 files changed, 102 insertions(+) > > diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h > index 1516ff979315..87013b9a7dd8 100644 > --- a/include/linux/bitmap.h > +++ b/include/linux/bitmap.h > @@ -60,6 +60,8 @@ struct device; > * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n > * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest > * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) > + * bitmap_scatter(dst, src, mask, nbits) *dst = map(dense, sparse)(src) > + * bitmap_gather(dst, src, mask, nbits) *dst = map(sparse, dense)(src) > * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) > * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) > * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap > @@ -208,6 +210,12 @@ int bitmap_parselist(const char *buf, unsigned long *maskp, > int nmaskbits); > int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, > unsigned long *dst, int nbits); > + > +unsigned int bitmap_scatter(unsigned long *dst, const unsigned long *src, > + const unsigned long *mask, unsigned int nbits); > +unsigned int bitmap_gather(unsigned long *dst, const unsigned long *src, > + const unsigned long *mask, unsigned int nbits); > + > void bitmap_remap(unsigned long *dst, const unsigned long *src, > const unsigned long *old, const unsigned long *new, unsigned int nbits); > int bitmap_bitremap(int oldbit, > @@ -216,6 +224,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig, > const unsigned long *relmap, unsigned int bits); > void bitmap_fold(unsigned long *dst, const unsigned long *orig, > unsigned int sz, unsigned int nbits); > + > int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); > void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); > int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); > diff --git a/lib/bitmap.c b/lib/bitmap.c > index 935e0f96e785..31cfc7846aae 100644 > --- a/lib/bitmap.c > +++ b/lib/bitmap.c > @@ -942,6 +942,76 @@ int bitmap_parse(const char *start, unsigned int buflen, > } > EXPORT_SYMBOL(bitmap_parse); > > +/** > + * bitmap_scatter - Scatter a bitmap according to the given mask > + * @dst: scattered bitmap > + * @src: gathered bitmap > + * @mask: bits to assign to in the scattered bitmap > + * @nbits: number of bits in each of these bitmaps > + * > + * Scatters bitmap with sequential bits according to the given @mask. > + * > + * Example: > + * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302. > + * > + * Or in binary form > + * @src @mask @dst > + * 0000000001011010 0001001100010011 0000001100000010 > + * > + * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12) > + * > + * Returns: the weight of the @mask. Returning a weight of the mask is somewhat non-trivial... To me it would be logical to return a weight of destination, for example... But I see that in the following patch you're using the returned value. Maybe add a few words to advocate that? > + */ > +unsigned int bitmap_scatter(unsigned long *dst, const unsigned long *src, > + const unsigned long *mask, unsigned int nbits) > +{ > + unsigned int bit; > + int n = 0; Is n signed for purpose? I think it should be consistent with return value. > + > + bitmap_zero(dst, nbits); > + > + for_each_set_bit(bit, mask, nbits) > + __assign_bit(bit, dst, test_bit(n++, src)); > + > + return n; > +} > +EXPORT_SYMBOL(bitmap_scatter); > + > +/** > + * bitmap_gather - Gather a bitmap according to given mask > + * @dst: gathered bitmap > + * @src: scattered bitmap > + * @mask: bits to extract from in the scattered bitmap > + * @nbits: number of bits in each of these bitmaps > + * > + * Gathers bitmap with sparse bits according to the given @mask. > + * > + * Example: > + * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a. Not sure about others, but to me hex representation is quite useless, moreover it's followed by binary one. > + * Or in binary form > + * @src @mask @dst > + * 0000001100000010 0001001100010011 0000000000011010 > + * > + * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5) > + * > + * Returns: the weight of the @mask. > + */ It looks like those are designed complement to each other. Is that true? If so, can you make your example showing that scatter -> gather -> scatter would restore the original bitmap? If I'm wrong, can you please underline that they are not complement, and why? > +unsigned int bitmap_gather(unsigned long *dst, const unsigned long *src, > + const unsigned long *mask, unsigned int nbits) > +{ > + unsigned int bit; > + int n = 0; > + > + bitmap_zero(dst, nbits); > + > + for_each_set_bit(bit, mask, nbits) > + __assign_bit(n++, dst, test_bit(bit, src)); > + > + return n; > +} > +EXPORT_SYMBOL(bitmap_gather); I feel like they should reside in header, because they are quite a small functions indeed, and they would benefit from compile-time optimizations without bloating the kernel. Moreover, you are using them in patch #3 on 64-bit bitmaps, which would benefit from small_const_nbits() optimization. > + > /** > * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap > * @buf: pointer to a bitmap > diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c > index 1f2dc7fef17f..f43a07679998 100644 > --- a/lib/test_bitmap.c > +++ b/lib/test_bitmap.c > @@ -50,6 +50,9 @@ static const unsigned long exp2[] __initconst = { > static const unsigned long exp2_to_exp3_mask[] __initconst = { > BITMAP_FROM_U64(0x008000020020212eULL), > }; > +static const unsigned long exp2_to_exp3_maskg[] __initconst = { > + BITMAP_FROM_U64(0x00000000000001ffULL), > +}; > /* exp3_0_1 = (exp2[0] & ~exp2_to_exp3_mask) | (exp2[1] & exp2_to_exp3_mask) */ > static const unsigned long exp3_0_1[] __initconst = { > BITMAP_FROM_U64(0x33b3333311313137ULL), > @@ -357,6 +360,25 @@ static void __init test_replace(void) > expect_eq_bitmap(bmap, exp3_1_0, nbits); > } > > +static void __init test_bitmap_sg(void) > +{ > + unsigned int nbits = 64; > + DECLARE_BITMAP(bmap, 1024); Can you make it 1000? That way we'll test non-aligned case. > + unsigned int w; > + > + bitmap_zero(bmap, 1024); > + w = bitmap_gather(bmap, exp2_to_exp3_mask, exp2_to_exp3_mask, nbits); > + expect_eq_uint(bitmap_weight(exp2_to_exp3_mask, nbits), w); > + expect_eq_uint(bitmap_weight(bmap, 1024), w); > + expect_eq_bitmap(bmap, exp2_to_exp3_maskg, nbits); > + > + bitmap_zero(bmap, 1024); > + w = bitmap_scatter(bmap, exp2_to_exp3_maskg, exp2_to_exp3_mask, nbits); > + expect_eq_uint(bitmap_weight(exp2_to_exp3_maskg, nbits), w); > + expect_eq_uint(bitmap_weight(bmap, 1024), w); > + expect_eq_bitmap(bmap, exp2_to_exp3_mask, nbits); Would be interesting to compare bitmap scatter/gather performance against bitmap_remap. > +} > + > #define PARSE_TIME 0x1 > #define NO_LEN 0x2 > > @@ -1228,6 +1250,7 @@ static void __init selftest(void) > test_fill_set(); > test_copy(); > test_replace(); > + test_bitmap_sg(); > test_bitmap_arr32(); > test_bitmap_arr64(); > test_bitmap_parse(); > -- > 2.40.0.1.gaa8946217a0b