Re: [PATCH v1 2/5] lib/bitmap: Introduce bitmap_scatter() and bitmap_gather() helpers

Yury Norov <yury.norov@xxxxxxxxx> · Tue, 26 Sep 2023 17:25:13 -0700

On Tue, Sep 26, 2023 at 08:20:04AM +0300, Andy Shevchenko wrote:
> These helpers are the optimized versions of the bitmap_remap()
> where one of the bitmaps (source or destination) is of sequential bits.

If so, can you add a test that makes sure that new API is consistent
with the old bitmap_remap? And also provide numbers how well are they
optimized, comparing to bitmap_remap.

> See more in the kernel documentation of the helpers.

I grepped the whole kernel, not only Documentation directory, and found
nothing...

> Signed-off-by: Andy Shevchenko <andriy.shevchenko@xxxxxxxxxxxxxxx>
> ---
>  include/linux/bitmap.h |  9 ++++++
>  lib/bitmap.c           | 70 ++++++++++++++++++++++++++++++++++++++++++
>  lib/test_bitmap.c      | 23 ++++++++++++++
>  3 files changed, 102 insertions(+)
> 
> diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
> index 1516ff979315..87013b9a7dd8 100644
> --- a/include/linux/bitmap.h
> +++ b/include/linux/bitmap.h
> @@ -60,6 +60,8 @@ struct device;
>   *  bitmap_shift_left(dst, src, n, nbits)       *dst = *src << n
>   *  bitmap_cut(dst, src, first, n, nbits)       Cut n bits from first, copy rest
>   *  bitmap_replace(dst, old, new, mask, nbits)  *dst = (*old & ~(*mask)) | (*new & *mask)
> + *  bitmap_scatter(dst, src, mask, nbits)	*dst = map(dense, sparse)(src)
> + *  bitmap_gather(dst, src, mask, nbits)	*dst = map(sparse, dense)(src)
>   *  bitmap_remap(dst, src, old, new, nbits)     *dst = map(old, new)(src)
>   *  bitmap_bitremap(oldbit, old, new, nbits)    newbit = map(old, new)(oldbit)
>   *  bitmap_onto(dst, orig, relmap, nbits)       *dst = orig relative to relmap
> @@ -208,6 +210,12 @@ int bitmap_parselist(const char *buf, unsigned long *maskp,
>  			int nmaskbits);
>  int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
>  			unsigned long *dst, int nbits);
> +
> +unsigned int bitmap_scatter(unsigned long *dst, const unsigned long *src,
> +		const unsigned long *mask, unsigned int nbits);
> +unsigned int bitmap_gather(unsigned long *dst, const unsigned long *src,
> +		const unsigned long *mask, unsigned int nbits);
> +
>  void bitmap_remap(unsigned long *dst, const unsigned long *src,
>  		const unsigned long *old, const unsigned long *new, unsigned int nbits);
>  int bitmap_bitremap(int oldbit,
> @@ -216,6 +224,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
>  		const unsigned long *relmap, unsigned int bits);
>  void bitmap_fold(unsigned long *dst, const unsigned long *orig,
>  		unsigned int sz, unsigned int nbits);
> +
>  int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
>  void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
>  int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
> diff --git a/lib/bitmap.c b/lib/bitmap.c
> index 935e0f96e785..31cfc7846aae 100644
> --- a/lib/bitmap.c
> +++ b/lib/bitmap.c
> @@ -942,6 +942,76 @@ int bitmap_parse(const char *start, unsigned int buflen,
>  }
>  EXPORT_SYMBOL(bitmap_parse);
>  
> +/**
> + * bitmap_scatter - Scatter a bitmap according to the given mask
> + * @dst: scattered bitmap
> + * @src: gathered bitmap
> + * @mask: bits to assign to in the scattered bitmap
> + * @nbits: number of bits in each of these bitmaps
> + *
> + * Scatters bitmap with sequential bits according to the given @mask.
> + *
> + * Example:
> + * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302.
> + *
> + * Or in binary form
> + * @src			@mask			@dst
> + * 0000000001011010	0001001100010011	0000001100000010
> + *
> + * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12)
> + *
> + * Returns: the weight of the @mask.

Returning a weight of the mask is somewhat non-trivial... To me it
would be logical to return a weight of destination, for example...

But I see that in the following patch you're using the returned value.
Maybe add a few words to advocate that?

> + */
> +unsigned int bitmap_scatter(unsigned long *dst, const unsigned long *src,
> +			    const unsigned long *mask, unsigned int nbits)
> +{
> +	unsigned int bit;
> +	int n = 0;

Is n signed for purpose? I think it should be consistent with
return value.

> +
> +	bitmap_zero(dst, nbits);
> +
> +	for_each_set_bit(bit, mask, nbits)
> +		__assign_bit(bit, dst, test_bit(n++, src));
> +
> +	return n;
> +}
> +EXPORT_SYMBOL(bitmap_scatter);
> +
> +/**
> + * bitmap_gather - Gather a bitmap according to given mask
> + * @dst: gathered bitmap
> + * @src: scattered bitmap
> + * @mask: bits to extract from in the scattered bitmap
> + * @nbits: number of bits in each of these bitmaps
> + *
> + * Gathers bitmap with sparse bits according to the given @mask.
> + *
> + * Example:
> + * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a.

Not sure about others, but to me hex representation is quite useless,
moreover it's followed by binary one.

> + * Or in binary form
> + * @src			@mask			@dst
> + * 0000001100000010	0001001100010011	0000000000011010
> + *
> + * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5)
> + *
> + * Returns: the weight of the @mask.
> + */

It looks like those are designed complement to each other. Is that
true? If so, can you make your example showing that
        scatter -> gather -> scatter
would restore the original bitmap?

If I'm wrong, can you please underline that they are not complement,
and why?

> +unsigned int bitmap_gather(unsigned long *dst, const unsigned long *src,
> +			   const unsigned long *mask, unsigned int nbits)
> +{
> +	unsigned int bit;
> +	int n = 0;
> +
> +	bitmap_zero(dst, nbits);
> +
> +	for_each_set_bit(bit, mask, nbits)
> +		__assign_bit(n++, dst, test_bit(bit, src));
> +
> +	return n;
> +}
> +EXPORT_SYMBOL(bitmap_gather);

I feel like they should reside in header, because they are quite a small
functions indeed, and they would benefit from compile-time optimizations
without bloating the kernel.

Moreover, you are using them in patch #3 on 64-bit bitmaps, which
would benefit from small_const_nbits() optimization.

> +
>  /**
>   * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
>   *	@buf: pointer to a bitmap
> diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
> index 1f2dc7fef17f..f43a07679998 100644
> --- a/lib/test_bitmap.c
> +++ b/lib/test_bitmap.c
> @@ -50,6 +50,9 @@ static const unsigned long exp2[] __initconst = {
>  static const unsigned long exp2_to_exp3_mask[] __initconst = {
>  	BITMAP_FROM_U64(0x008000020020212eULL),
>  };
> +static const unsigned long exp2_to_exp3_maskg[] __initconst = {
> +	BITMAP_FROM_U64(0x00000000000001ffULL),
> +};
>  /* exp3_0_1 = (exp2[0] & ~exp2_to_exp3_mask) | (exp2[1] & exp2_to_exp3_mask) */
>  static const unsigned long exp3_0_1[] __initconst = {
>  	BITMAP_FROM_U64(0x33b3333311313137ULL),
> @@ -357,6 +360,25 @@ static void __init test_replace(void)
>  	expect_eq_bitmap(bmap, exp3_1_0, nbits);
>  }
>  
> +static void __init test_bitmap_sg(void)
> +{
> +	unsigned int nbits = 64;
> +	DECLARE_BITMAP(bmap, 1024);

Can you make it 1000? That way we'll test non-aligned case.

> +	unsigned int w;
> +
> +	bitmap_zero(bmap, 1024);
> +	w = bitmap_gather(bmap, exp2_to_exp3_mask, exp2_to_exp3_mask, nbits);
> +	expect_eq_uint(bitmap_weight(exp2_to_exp3_mask, nbits), w);
> +	expect_eq_uint(bitmap_weight(bmap, 1024), w);
> +	expect_eq_bitmap(bmap, exp2_to_exp3_maskg, nbits);
> +
> +	bitmap_zero(bmap, 1024);
> +	w = bitmap_scatter(bmap, exp2_to_exp3_maskg, exp2_to_exp3_mask, nbits);
> +	expect_eq_uint(bitmap_weight(exp2_to_exp3_maskg, nbits), w);
> +	expect_eq_uint(bitmap_weight(bmap, 1024), w);
> +	expect_eq_bitmap(bmap, exp2_to_exp3_mask, nbits);

Would be interesting to compare bitmap scatter/gather performance
against bitmap_remap.

> +}
> +
>  #define PARSE_TIME	0x1
>  #define NO_LEN		0x2
>  
> @@ -1228,6 +1250,7 @@ static void __init selftest(void)
>  	test_fill_set();
>  	test_copy();
>  	test_replace();
> +	test_bitmap_sg();
>  	test_bitmap_arr32();
>  	test_bitmap_arr64();
>  	test_bitmap_parse();
> -- 
> 2.40.0.1.gaa8946217a0b