Re:Re: [PATCH v2] Separate bch_moving_gc() from bch_btree_gc()

邹明哲 <mingzhe.zou@xxxxxxxxxxxx> · Thu, 29 Jun 2023 14:30:57 +0800 (GMT+08:00)

From: Eric Wheeler <bcache@xxxxxxxxxxxxxxxxxx>
Date: 2023-06-29 07:37:57
To:  Mingzhe Zou <mingzhe.zou@xxxxxxxxxxxx>
Cc:  colyli@xxxxxxx,linux-bcache@xxxxxxxxxxxxxxx,zoumingzhe@xxxxxx
Subject: Re: [PATCH v2] Separate bch_moving_gc() from bch_btree_gc()>On Wed, 28 Jun 2023, Mingzhe Zou wrote:
>
>> From: Mingzhe Zou <zoumingzhe@xxxxxx>
>> 
>> Moving gc uses cache->heap to defragment disk. Unlike btree gc,
>> moving gc only takes up part of the disk bandwidth.
>> 
>> The number of heap is constant. However, the buckets released by
>> each moving gc is limited. So bch_moving_gc() needs to be called
>> multiple times.
>> 
>> If bch_gc_thread() always calls bch_btree_gc(), it will block
>> the IO request.This patch allows bch_gc_thread() to only call
>> bch_moving_gc() when there are many fragments.
>> 
>> Signed-off-by: Mingzhe Zou <mingzhe.zou@xxxxxxxxxxxx>
>
>
>Hi Mingzhe,
>
>Could this be used to free buckets down to a minimum bucket count?
Hi Eric,

I am trying to solve the fragmentation problem by moving gc, which releases
some buckets. It maybe a minimum bucket count. However, if the number of
buckets can still support the continued work in writeback mode, moving gc
may not work. 
>
>See more below:
>
>
>
>> ---
>>  drivers/md/bcache/bcache.h   |  4 ++-
>>  drivers/md/bcache/btree.c    | 66 ++++++++++++++++++++++++++++++++++--
>>  drivers/md/bcache/movinggc.c |  2 ++
>>  3 files changed, 68 insertions(+), 4 deletions(-)
>> 
>> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
>> index 5a79bb3c272f..155deff0ce05 100644
>> --- a/drivers/md/bcache/bcache.h
>> +++ b/drivers/md/bcache/bcache.h
>> @@ -461,7 +461,8 @@ struct cache {
>>  	 * until a gc finishes - otherwise we could pointlessly burn a ton of
>>  	 * cpu
>>  	 */
>> -	unsigned int		invalidate_needs_gc;
>> +	unsigned int		invalidate_needs_gc:1;
>> +	unsigned int		only_moving_gc:1;
>>  
>>  	bool			discard; /* Get rid of? */
>>  
>> @@ -629,6 +630,7 @@ struct cache_set {
>>  	struct gc_stat		gc_stats;
>>  	size_t			nbuckets;
>>  	size_t			avail_nbuckets;
>> +	size_t			fragment_nbuckets;
>>  
>>  	struct task_struct	*gc_thread;
>>  	/* Where in the btree gc currently is */
>> diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
>> index 68b9d7ca864e..6698a4480e05 100644
>> --- a/drivers/md/bcache/btree.c
>> +++ b/drivers/md/bcache/btree.c
>> @@ -88,6 +88,7 @@
>>   * Test module load/unload
>>   */
>>  
>> +#define COPY_GC_PERCENT		5
>>  #define MAX_NEED_GC		64
>>  #define MAX_SAVE_PRIO		72
>>  #define MAX_GC_TIMES		100
>> @@ -1705,6 +1706,7 @@ static void btree_gc_start(struct cache_set *c)
>>  
>>  	mutex_lock(&c->bucket_lock);
>>  
>> +	set_gc_sectors(c);
>>  	c->gc_mark_valid = 0;
>>  	c->gc_done = ZERO_KEY;
>>  
>> @@ -1825,8 +1827,51 @@ static void bch_btree_gc(struct cache_set *c)
>>  	memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
>>  
>>  	trace_bcache_gc_end(c);
>> +}
>> +
>> +extern unsigned int bch_cutoff_writeback;
>> +extern unsigned int bch_cutoff_writeback_sync;
>> +
>> +static bool moving_gc_should_run(struct cache_set *c)
>> +{
>> +	struct bucket *b;
>> +	struct cache *ca = c->cache;
>> +	size_t moving_gc_threshold = ca->sb.bucket_size >> 2, frag_percent;
>> +	unsigned long used_buckets = 0, frag_buckets = 0, move_buckets = 0;
>> +	unsigned long dirty_sectors = 0, frag_sectors, used_sectors;
>> +
>> +	if (c->gc_stats.in_use > bch_cutoff_writeback_sync)
>> +		return true;
>> +
>> +	mutex_lock(&c->bucket_lock);
>> +	for_each_bucket(b, ca) {
>> +		if (GC_MARK(b) != GC_MARK_DIRTY)
>> +			continue;
>> +
>> +		used_buckets++;
>> +
>> +		used_sectors = GC_SECTORS_USED(b);
>> +		dirty_sectors += used_sectors;
>> +
>> +		if (used_sectors < ca->sb.bucket_size)
>> +			frag_buckets++;
>>  
>> -	bch_moving_gc(c);
>> +		if (used_sectors <= moving_gc_threshold)
>> +			move_buckets++;
>> +	}
>> +	mutex_unlock(&c->bucket_lock);
>> +
>> +	c->fragment_nbuckets = frag_buckets;
>> +	frag_sectors = used_buckets * ca->sb.bucket_size - dirty_sectors;
>> +	frag_percent = div_u64(frag_sectors * 100, ca->sb.bucket_size * c->nbuckets);
>> +
>> +	if (move_buckets > ca->heap.size)
>> +		return true;
>> +
>> +	if (frag_percent >= COPY_GC_PERCENT)
>> +		return true;
>
>For example, could we add this to `moving_gc_should_run`:
>
>+	if (used_buckets >= MAX_USED_BUCKETS)
>+		return true;
>
>to solve the issue in this thread:
>	https://www.spinics.net/lists/linux-bcache/msg11746.html
>
>where MAX_USED_BUCKETS is a placeholder for a sysfs tunable like 
>`cache_max_used_percent` ?
>
I think we can reuse bch_cutoff_writeback and bch_cutoff_writeback_sync,
because moving gc is to solve the fragmentation problem in writeback mode.

I will send v3 soon.

mingzhe
>-Eric
>
>
>> +
>> +	return false;
>>  }
>>  
>>  static bool gc_should_run(struct cache_set *c)
>> @@ -1839,6 +1884,19 @@ static bool gc_should_run(struct cache_set *c)
>>  	if (atomic_read(&c->sectors_to_gc) < 0)
>>  		return true;
>>  
>> +	/*
>> +	 * Moving gc uses cache->heap to defragment disk. Unlike btree gc,
>> +	 * moving gc only takes up part of the disk bandwidth.
>> +	 * The number of heap is constant. However, the buckets released by
>> +	 * each moving gc is limited. So bch_moving_gc() needs to be called
>> +	 * multiple times. If bch_gc_thread() always calls bch_btree_gc(),
>> +	 * it will block the IO request.
>> +	 */
>> +	if (c->copy_gc_enabled && moving_gc_should_run(c)) {
>> +		ca->only_moving_gc = 1;
>> +		return true;
>> +	}
>> +
>>  	return false;
>>  }
>>  
>> @@ -1856,8 +1914,10 @@ static int bch_gc_thread(void *arg)
>>  		    test_bit(CACHE_SET_IO_DISABLE, &c->flags))
>>  			break;
>>  
>> -		set_gc_sectors(c);
>> -		bch_btree_gc(c);
>> +		if (!c->cache->only_moving_gc)
>> +			bch_btree_gc(c);
>> +
>> +		bch_moving_gc(c);
>>  	}
>>  
>>  	wait_for_kthread_stop();
>> diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
>> index 9f32901fdad1..04da088cefe8 100644
>> --- a/drivers/md/bcache/movinggc.c
>> +++ b/drivers/md/bcache/movinggc.c
>> @@ -200,6 +200,8 @@ void bch_moving_gc(struct cache_set *c)
>>  	struct bucket *b;
>>  	unsigned long sectors_to_move, reserve_sectors;
>>  
>> +	c->cache->only_moving_gc = 0;
>> +
>>  	if (!c->copy_gc_enabled)
>>  		return;
>>  
>> -- 
>> 2.17.1.windows.2
>> 
>>