Re: [PATCH] drivers/md/bcache: Clean up bch_get_congested()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2019/3/13 12:41 下午, George Spelvin wrote:
> There are a few nits in this function.  They could in theory all
> be separate patches, but that's probably taking small commits
> too far.
> 
> 1) I added a brief comment saying what it does.
> 
> 2) I like to declare pointer parameters "const" where possible
>    for documentation reasons.
> 
> 3) It uses bitmap_weight(&rand, BITS_PER_LONG) to compute the Hamming
> weight of a 32-bit random number (giving a random integer with
> mean 16 and variance 8).  Passing by reference in a 64-bit variable
> is silly; just use hweight32().
> 
> 4) Its helper function fract_exp_two is unnecessarily tangled.
> Gcc can optimize the multiply by (1 << x) to a shift, but it can
> be written in a much more straightforward way at the cost of one
> more bit of internal precision.  Some analysis reveals that this
> bit is always available.
> 
> This shrinks the object code for fract_exp_two(x, 6) from 23 bytes:
> 
> 0000000000000000 <foo1>:
>    0:   89 f9                   mov    %edi,%ecx
>    2:   c1 e9 06                shr    $0x6,%ecx
>    5:   b8 01 00 00 00          mov    $0x1,%eax
>    a:   d3 e0                   shl    %cl,%eax
>    c:   83 e7 3f                and    $0x3f,%edi
>    f:   d3 e7                   shl    %cl,%edi
>   11:   c1 ef 06                shr    $0x6,%edi
>   14:   01 f8                   add    %edi,%eax
>   16:   c3                      retq
> 
> To 19:
> 
> 0000000000000017 <foo2>:
>   17:   89 f8                   mov    %edi,%eax
>   19:   83 e0 3f                and    $0x3f,%eax
>   1c:   83 c0 40                add    $0x40,%eax
>   1f:   89 f9                   mov    %edi,%ecx
>   21:   c1 e9 06                shr    $0x6,%ecx
>   24:   d3 e0                   shl    %cl,%eax
>   26:   c1 e8 06                shr    $0x6,%eax
>   29:   c3                      retq
> 
> (Verified with 0 <= frac_bits <= 8, 0 <= x < 16<<frac_bits;
> both versions produce the same output.)
> 
> 5) And finally, the call to bch_get_congested() in check_should_bypass()
> is separated from the use of the value by multiple tests which
> could moot the need to compute it.  Move the computation down to
> where it's needed.  This also saves a local register to hold the
> computed value.
> 
> Signed-off-by: George Spelvin <lkml@xxxxxxx>

Hi George,

I will add it into my for-test directory. Thanks.

Coly Li

> ---
>  drivers/md/bcache/request.c | 15 ++++++++-------
>  drivers/md/bcache/request.h |  2 +-
>  drivers/md/bcache/util.h    | 26 +++++++++++++++++++-------
>  3 files changed, 28 insertions(+), 15 deletions(-)
> 
> This is quite a drive-by patch; I don't even use bcache or have an SSD to
> test it on.  But I happened to be looking at the code and was scratching
> my head trying to figure out what the heck bch_get_congested() was doing.
> This patch is the result of that analysis.  Feel free to take whatever
> part of it is useful to you.
> 
> diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> index 15070412a32e..f1e465c32288 100644
> --- a/drivers/md/bcache/request.c
> +++ b/drivers/md/bcache/request.c
> @@ -329,12 +329,13 @@ void bch_data_insert(struct closure *cl)
>  	bch_data_insert_start(cl);
>  }
>  
> -/* Congested? */
> -
> -unsigned int bch_get_congested(struct cache_set *c)
> +/*
> + * Congested?  Return 0 (not congested) or the limit (in sectors)
> + * beyond which we should bypass the cache due to congestion.
> + */
> +unsigned int bch_get_congested(const struct cache_set *c)
>  {
>  	int i;
> -	long rand;
>  
>  	if (!c->congested_read_threshold_us &&
>  	    !c->congested_write_threshold_us)
> @@ -353,8 +354,7 @@ unsigned int bch_get_congested(struct cache_set *c)
>  	if (i > 0)
>  		i = fract_exp_two(i, 6);
>  
> -	rand = get_random_int();
> -	i -= bitmap_weight(&rand, BITS_PER_LONG);
> +	i -= hweight32(get_random_u32());
>  
>  	return i > 0 ? i : 1;
>  }
> @@ -376,7 +376,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
>  {
>  	struct cache_set *c = dc->disk.c;
>  	unsigned int mode = cache_mode(dc);
> -	unsigned int sectors, congested = bch_get_congested(c);
> +	unsigned int sectors, congested;
>  	struct task_struct *task = current;
>  	struct io *i;
>  
> @@ -411,6 +411,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
>  			goto rescale;
>  	}
>  
> +	congested = bch_get_congested(c);
>  	if (!congested && !dc->sequential_cutoff)
>  		goto rescale;
>  
> diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
> index 721bf336ed1a..c64dbd7a91aa 100644
> --- a/drivers/md/bcache/request.h
> +++ b/drivers/md/bcache/request.h
> @@ -33,7 +33,7 @@ struct data_insert_op {
>  	BKEY_PADDED(replace_key);
>  };
>  
> -unsigned int bch_get_congested(struct cache_set *c);
> +unsigned int bch_get_congested(const struct cache_set *c);
>  void bch_data_insert(struct closure *cl);
>  
>  void bch_cached_dev_request_init(struct cached_dev *dc);
> diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
> index 00aab6abcfe4..1fbced94e4cc 100644
> --- a/drivers/md/bcache/util.h
> +++ b/drivers/md/bcache/util.h
> @@ -560,17 +560,29 @@ static inline uint64_t bch_crc64_update(uint64_t crc,
>  	return crc;
>  }
>  
> -/* Does linear interpolation between powers of two */
> +/*
> + * A stepwise-linear pseudo-exponential.  This returns 1 << (x >>
> + * frac_bits), with the less-significant bits filled in by linear
> + * interpolation.
> + *
> + * This can also be interpreted as a floating-point number format,
> + * where the low frac_bits are the mantissa (with implicit leading
> + * 1 bit), and the more significant bits are the exponent.
> + * The return value is 1.mantissa * 2^exponent.
> + *
> + * The way this is used, fract_bits is 6 and the largest possible
> + * input is CONGESTED_MAX-1 = 1023 (exponent 16, mantissa 0x1.fc),
> + * so the maximum output is 0x1fc00.
> + */
>  static inline unsigned int fract_exp_two(unsigned int x,
>  					 unsigned int fract_bits)
>  {
> -	unsigned int fract = x & ~(~0 << fract_bits);
> +	unsigned int mantissa = 1 << fract_bits;	/* Implicit bit */
>  
> -	x >>= fract_bits;
> -	x   = 1 << x;
> -	x  += (x * fract) >> fract_bits;
> -
> -	return x;
> +	mantissa += x & (mantissa - 1);
> +	x >>= fract_bits;	/* The exponent */
> +	/* Largest intermediate value 0x7f0000 */
> +	return mantissa << x >> fract_bits;
>  }
>  
>  void bch_bio_map(struct bio *bio, void *base);
> 


-- 

Coly Li



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux ARM Kernel]     [Linux Filesystem Development]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux