Re: [failures] mm-page-writeback-consolidate-wb_thresh-bumping-logic-into-__wb_calc_thresh.patch removed from -mm tree

Jan Kara <jack@xxxxxxx> · Thu, 16 Jan 2025 18:45:08 +0100

Hello Andrew!

On Mon 13-01-25 15:22:46, Andrew Morton wrote:
> 
> The quilt patch titled
>      Subject: mm/page-writeback: consolidate wb_thresh bumping logic into __wb_calc_thresh
> has been removed from the -mm tree.  Its filename was
>      mm-page-writeback-consolidate-wb_thresh-bumping-logic-into-__wb_calc_thresh.patch
> 
> This patch was dropped because it had testing failures

Can you please put this patch back? We have been debugging with Guenter the
problem and in the end it seems to be a HW issue (in emulation of
big-endian arch) that was just triggered by timing / code changes
introduced by this patch [1]. Thanks!

								Honza

[1] https://lore.kernel.org/all/897b426d-7ca7-4bfe-8342-d2af910f8202@xxxxxxxxxxxx

> 
> ------------------------------------------------------
> From: Jim Zhao <jimzhao.ai@xxxxxxxxx>
> Subject: mm/page-writeback: consolidate wb_thresh bumping logic into __wb_calc_thresh
> Date: Thu, 21 Nov 2024 18:05:39 +0800
> 
> Address the feedback from 39ac99852fca ("mm/page-writeback: raise
> wb_thresh to prevent write blocking with strictlimit)".  The wb_thresh
> bumping logic is scattered across wb_position_ratio, __wb_calc_thresh, and
> wb_update_dirty_ratelimit.  For consistency, consolidate all wb_thresh
> bumping logic into __wb_calc_thresh.
> 
> Link: https://lkml.kernel.org/r/20241121100539.605818-1-jimzhao.ai@xxxxxxxxx
> Signed-off-by: Jim Zhao <jimzhao.ai@xxxxxxxxx>
> Reviewed-by: Jan Kara <jack@xxxxxxx>
> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
> Cc: Kemeng Shi <shikemeng@xxxxxxxxxxxxxxx>
> Cc: Guenter Roeck <linux@xxxxxxxxxxxx>
> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> ---
> 
>  mm/page-writeback.c |   53 ++++++++++++------------------------------
>  1 file changed, 16 insertions(+), 37 deletions(-)
> 
> --- a/mm/page-writeback.c~mm-page-writeback-consolidate-wb_thresh-bumping-logic-into-__wb_calc_thresh
> +++ a/mm/page-writeback.c
> @@ -942,26 +942,25 @@ static unsigned long __wb_calc_thresh(st
>  	wb_min_max_ratio(wb, &wb_min_ratio, &wb_max_ratio);
>  
>  	wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
> -	wb_max_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
> -	if (wb_thresh > wb_max_thresh)
> -		wb_thresh = wb_max_thresh;
>  
>  	/*
> -	 * With strictlimit flag, the wb_thresh is treated as
> -	 * a hard limit in balance_dirty_pages() and wb_position_ratio().
> -	 * It's possible that wb_thresh is close to zero, not because
> -	 * the device is slow, but because it has been inactive.
> -	 * To prevent occasional writes from being blocked, we raise wb_thresh.
> +	 * It's very possible that wb_thresh is close to 0 not because the
> +	 * device is slow, but that it has remained inactive for long time.
> +	 * Honour such devices a reasonable good (hopefully IO efficient)
> +	 * threshold, so that the occasional writes won't be blocked and active
> +	 * writes can rampup the threshold quickly.
>  	 */
> -	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
> -		unsigned long limit = hard_dirty_limit(dom, dtc->thresh);
> -		u64 wb_scale_thresh = 0;
> -
> -		if (limit > dtc->dirty)
> -			wb_scale_thresh = (limit - dtc->dirty) / 100;
> -		wb_thresh = max(wb_thresh, min(wb_scale_thresh, wb_max_thresh / 4));
> +	if (thresh > dtc->dirty) {
> +		if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT))
> +			wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 100);
> +		else
> +			wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 8);
>  	}
>  
> +	wb_max_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
> +	if (wb_thresh > wb_max_thresh)
> +		wb_thresh = wb_max_thresh;
> +
>  	return wb_thresh;
>  }
>  
> @@ -969,6 +968,7 @@ unsigned long wb_calc_thresh(struct bdi_
>  {
>  	struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
>  
> +	domain_dirty_avail(&gdtc, true);
>  	return __wb_calc_thresh(&gdtc, thresh);
>  }
>  
> @@ -1145,12 +1145,6 @@ static void wb_position_ratio(struct dir
>  	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
>  		long long wb_pos_ratio;
>  
> -		if (dtc->wb_dirty < 8) {
> -			dtc->pos_ratio = min_t(long long, pos_ratio * 2,
> -					   2 << RATELIMIT_CALC_SHIFT);
> -			return;
> -		}
> -
>  		if (dtc->wb_dirty >= wb_thresh)
>  			return;
>  
> @@ -1222,14 +1216,6 @@ static void wb_position_ratio(struct dir
>  	if (unlikely(wb_thresh > dtc->thresh))
>  		wb_thresh = dtc->thresh;
>  	/*
> -	 * It's very possible that wb_thresh is close to 0 not because the
> -	 * device is slow, but that it has remained inactive for long time.
> -	 * Honour such devices a reasonable good (hopefully IO efficient)
> -	 * threshold, so that the occasional writes won't be blocked and active
> -	 * writes can rampup the threshold quickly.
> -	 */
> -	wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
> -	/*
>  	 * scale global setpoint to wb's:
>  	 *	wb_setpoint = setpoint * wb_thresh / thresh
>  	 */
> @@ -1484,17 +1470,10 @@ static void wb_update_dirty_ratelimit(st
>  	 * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
>  	 * Hence, to calculate "step" properly, we have to use wb_dirty as
>  	 * "dirty" and wb_setpoint as "setpoint".
> -	 *
> -	 * We rampup dirty_ratelimit forcibly if wb_dirty is low because
> -	 * it's possible that wb_thresh is close to zero due to inactivity
> -	 * of backing device.
>  	 */
>  	if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
>  		dirty = dtc->wb_dirty;
> -		if (dtc->wb_dirty < 8)
> -			setpoint = dtc->wb_dirty + 1;
> -		else
> -			setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
> +		setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
>  	}
>  
>  	if (dirty < setpoint) {
> _
> 
> Patches currently in -mm which might be from jimzhao.ai@xxxxxxxxx are
> 
> 
-- 
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR