Re: [PATCH 2/3] bcache: avoid unnecessary soft lockup in kworker update_writeback_rate()

Jens Axboe <axboe@xxxxxxxxx> · Fri, 27 May 2022 09:49:22 -0600

On 5/27/22 9:28 AM, Coly Li wrote:
> diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
> index d138a2d73240..c51671abe74e 100644
> --- a/drivers/md/bcache/writeback.c
> +++ b/drivers/md/bcache/writeback.c
> @@ -214,6 +214,7 @@ static void update_writeback_rate(struct work_struct *work)
>  					     struct cached_dev,
>  					     writeback_rate_update);
>  	struct cache_set *c = dc->disk.c;
> +	bool contention = false;
>  
>  	/*
>  	 * should check BCACHE_DEV_RATE_DW_RUNNING before calling
> @@ -243,13 +244,41 @@ static void update_writeback_rate(struct work_struct *work)
>  		 * in maximum writeback rate number(s).
>  		 */
>  		if (!set_at_max_writeback_rate(c, dc)) {
> -			down_read(&dc->writeback_lock);
> -			__update_writeback_rate(dc);
> -			update_gc_after_writeback(c);
> -			up_read(&dc->writeback_lock);
> +			/*
> +			 * When contention happens on dc->writeback_lock with
> +			 * the writeback thread, this kwork may be blocked for
> +			 * very long time if there are too many dirty data to
> +			 * writeback, and kerne message will complain a (bogus)
> +			 * software lockup kernel message. To avoid potential
> +			 * starving, if down_read_trylock() fails, writeback
> +			 * rate updating will be skipped for dc->retry_max times
> +			 * at most while delay this worker a bit longer time.
> +			 * If dc->retry_max times are tried and the trylock
> +			 * still fails, then call down_read() to wait for
> +			 * dc->writeback_lock.
> +			 */
> +			if (!down_read_trylock((&dc->writeback_lock))) {
> +				contention = true;
> +				dc->retry_nr++;
> +				if (dc->retry_nr > dc->retry_max)
> +					down_read(&dc->writeback_lock);
> +			}
> +
> +			if (!contention || dc->retry_nr > dc->retry_max) {
> +				__update_writeback_rate(dc);
> +				update_gc_after_writeback(c);
> +				up_read(&dc->writeback_lock);
> +				dc->retry_nr = 0;
> +			}
>  		}
>  	}

This is really not very pretty. First of all, why bother with storing a
max retry value in there? Doesn't seem like it'd ever be different per
'dc' anyway. Secondly, something like the below would be a lot more
readable. Totally untested.

diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 9ee0005874cd..cbc01372c7a1 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -235,19 +235,27 @@ static void update_writeback_rate(struct work_struct *work)
 		return;
 	}
 
-	if (atomic_read(&dc->has_dirty) && dc->writeback_percent) {
+	if (atomic_read(&dc->has_dirty) && dc->writeback_percent &&
+	    !set_at_max_writeback_rate(c, dc)) {
 		/*
 		 * If the whole cache set is idle, set_at_max_writeback_rate()
 		 * will set writeback rate to a max number. Then it is
 		 * unncessary to update writeback rate for an idle cache set
 		 * in maximum writeback rate number(s).
 		 */
-		if (!set_at_max_writeback_rate(c, dc)) {
-			down_read(&dc->writeback_lock);
+		do {
+			if (!down_read_trylock(&dc->writeback_lock)) {
+				dc->rate_update_retry++;
+				if (dc->rate_update_retry < MY_MAX)
+					break;
+				down_read(&dc->writeback_lock);
+				dc->rate_update_retry = 0;
+			}
+
 			__update_writeback_rate(dc);
 			update_gc_after_writeback(c);
 			up_read(&dc->writeback_lock);
-		}
+		} while (0);
 	}
 
-- 
Jens Axboe