From: Eric Wheeler <bcache@xxxxxxxxxxxxxxxxxx> Date: 2023-06-29 07:37:57 To: Mingzhe Zou <mingzhe.zou@xxxxxxxxxxxx> Cc: colyli@xxxxxxx,linux-bcache@xxxxxxxxxxxxxxx,zoumingzhe@xxxxxx Subject: Re: [PATCH v2] Separate bch_moving_gc() from bch_btree_gc()>On Wed, 28 Jun 2023, Mingzhe Zou wrote: > >> From: Mingzhe Zou <zoumingzhe@xxxxxx> >> >> Moving gc uses cache->heap to defragment disk. Unlike btree gc, >> moving gc only takes up part of the disk bandwidth. >> >> The number of heap is constant. However, the buckets released by >> each moving gc is limited. So bch_moving_gc() needs to be called >> multiple times. >> >> If bch_gc_thread() always calls bch_btree_gc(), it will block >> the IO request.This patch allows bch_gc_thread() to only call >> bch_moving_gc() when there are many fragments. >> >> Signed-off-by: Mingzhe Zou <mingzhe.zou@xxxxxxxxxxxx> > > >Hi Mingzhe, > >Could this be used to free buckets down to a minimum bucket count? Hi Eric, I am trying to solve the fragmentation problem by moving gc, which releases some buckets. It maybe a minimum bucket count. However, if the number of buckets can still support the continued work in writeback mode, moving gc may not work. > >See more below: > > > >> --- >> drivers/md/bcache/bcache.h | 4 ++- >> drivers/md/bcache/btree.c | 66 ++++++++++++++++++++++++++++++++++-- >> drivers/md/bcache/movinggc.c | 2 ++ >> 3 files changed, 68 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h >> index 5a79bb3c272f..155deff0ce05 100644 >> --- a/drivers/md/bcache/bcache.h >> +++ b/drivers/md/bcache/bcache.h >> @@ -461,7 +461,8 @@ struct cache { >> * until a gc finishes - otherwise we could pointlessly burn a ton of >> * cpu >> */ >> - unsigned int invalidate_needs_gc; >> + unsigned int invalidate_needs_gc:1; >> + unsigned int only_moving_gc:1; >> >> bool discard; /* Get rid of? */ >> >> @@ -629,6 +630,7 @@ struct cache_set { >> struct gc_stat gc_stats; >> size_t nbuckets; >> size_t avail_nbuckets; >> + size_t fragment_nbuckets; >> >> struct task_struct *gc_thread; >> /* Where in the btree gc currently is */ >> diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c >> index 68b9d7ca864e..6698a4480e05 100644 >> --- a/drivers/md/bcache/btree.c >> +++ b/drivers/md/bcache/btree.c >> @@ -88,6 +88,7 @@ >> * Test module load/unload >> */ >> >> +#define COPY_GC_PERCENT 5 >> #define MAX_NEED_GC 64 >> #define MAX_SAVE_PRIO 72 >> #define MAX_GC_TIMES 100 >> @@ -1705,6 +1706,7 @@ static void btree_gc_start(struct cache_set *c) >> >> mutex_lock(&c->bucket_lock); >> >> + set_gc_sectors(c); >> c->gc_mark_valid = 0; >> c->gc_done = ZERO_KEY; >> >> @@ -1825,8 +1827,51 @@ static void bch_btree_gc(struct cache_set *c) >> memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); >> >> trace_bcache_gc_end(c); >> +} >> + >> +extern unsigned int bch_cutoff_writeback; >> +extern unsigned int bch_cutoff_writeback_sync; >> + >> +static bool moving_gc_should_run(struct cache_set *c) >> +{ >> + struct bucket *b; >> + struct cache *ca = c->cache; >> + size_t moving_gc_threshold = ca->sb.bucket_size >> 2, frag_percent; >> + unsigned long used_buckets = 0, frag_buckets = 0, move_buckets = 0; >> + unsigned long dirty_sectors = 0, frag_sectors, used_sectors; >> + >> + if (c->gc_stats.in_use > bch_cutoff_writeback_sync) >> + return true; >> + >> + mutex_lock(&c->bucket_lock); >> + for_each_bucket(b, ca) { >> + if (GC_MARK(b) != GC_MARK_DIRTY) >> + continue; >> + >> + used_buckets++; >> + >> + used_sectors = GC_SECTORS_USED(b); >> + dirty_sectors += used_sectors; >> + >> + if (used_sectors < ca->sb.bucket_size) >> + frag_buckets++; >> >> - bch_moving_gc(c); >> + if (used_sectors <= moving_gc_threshold) >> + move_buckets++; >> + } >> + mutex_unlock(&c->bucket_lock); >> + >> + c->fragment_nbuckets = frag_buckets; >> + frag_sectors = used_buckets * ca->sb.bucket_size - dirty_sectors; >> + frag_percent = div_u64(frag_sectors * 100, ca->sb.bucket_size * c->nbuckets); >> + >> + if (move_buckets > ca->heap.size) >> + return true; >> + >> + if (frag_percent >= COPY_GC_PERCENT) >> + return true; > >For example, could we add this to `moving_gc_should_run`: > >+ if (used_buckets >= MAX_USED_BUCKETS) >+ return true; > >to solve the issue in this thread: > https://www.spinics.net/lists/linux-bcache/msg11746.html > >where MAX_USED_BUCKETS is a placeholder for a sysfs tunable like >`cache_max_used_percent` ? > I think we can reuse bch_cutoff_writeback and bch_cutoff_writeback_sync, because moving gc is to solve the fragmentation problem in writeback mode. I will send v3 soon. mingzhe >-Eric > > >> + >> + return false; >> } >> >> static bool gc_should_run(struct cache_set *c) >> @@ -1839,6 +1884,19 @@ static bool gc_should_run(struct cache_set *c) >> if (atomic_read(&c->sectors_to_gc) < 0) >> return true; >> >> + /* >> + * Moving gc uses cache->heap to defragment disk. Unlike btree gc, >> + * moving gc only takes up part of the disk bandwidth. >> + * The number of heap is constant. However, the buckets released by >> + * each moving gc is limited. So bch_moving_gc() needs to be called >> + * multiple times. If bch_gc_thread() always calls bch_btree_gc(), >> + * it will block the IO request. >> + */ >> + if (c->copy_gc_enabled && moving_gc_should_run(c)) { >> + ca->only_moving_gc = 1; >> + return true; >> + } >> + >> return false; >> } >> >> @@ -1856,8 +1914,10 @@ static int bch_gc_thread(void *arg) >> test_bit(CACHE_SET_IO_DISABLE, &c->flags)) >> break; >> >> - set_gc_sectors(c); >> - bch_btree_gc(c); >> + if (!c->cache->only_moving_gc) >> + bch_btree_gc(c); >> + >> + bch_moving_gc(c); >> } >> >> wait_for_kthread_stop(); >> diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c >> index 9f32901fdad1..04da088cefe8 100644 >> --- a/drivers/md/bcache/movinggc.c >> +++ b/drivers/md/bcache/movinggc.c >> @@ -200,6 +200,8 @@ void bch_moving_gc(struct cache_set *c) >> struct bucket *b; >> unsigned long sectors_to_move, reserve_sectors; >> >> + c->cache->only_moving_gc = 0; >> + >> if (!c->copy_gc_enabled) >> return; >> >> -- >> 2.17.1.windows.2 >> >>