On Wed, 28 Jun 2023, Mingzhe Zou wrote: > From: Mingzhe Zou <zoumingzhe@xxxxxx> > > Moving gc uses cache->heap to defragment disk. Unlike btree gc, > moving gc only takes up part of the disk bandwidth. > > The number of heap is constant. However, the buckets released by > each moving gc is limited. So bch_moving_gc() needs to be called > multiple times. > > If bch_gc_thread() always calls bch_btree_gc(), it will block > the IO request.This patch allows bch_gc_thread() to only call > bch_moving_gc() when there are many fragments. > > Signed-off-by: Mingzhe Zou <mingzhe.zou@xxxxxxxxxxxx> Hi Mingzhe, Could this be used to free buckets down to a minimum bucket count? See more below: > --- > drivers/md/bcache/bcache.h | 4 ++- > drivers/md/bcache/btree.c | 66 ++++++++++++++++++++++++++++++++++-- > drivers/md/bcache/movinggc.c | 2 ++ > 3 files changed, 68 insertions(+), 4 deletions(-) > > diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h > index 5a79bb3c272f..155deff0ce05 100644 > --- a/drivers/md/bcache/bcache.h > +++ b/drivers/md/bcache/bcache.h > @@ -461,7 +461,8 @@ struct cache { > * until a gc finishes - otherwise we could pointlessly burn a ton of > * cpu > */ > - unsigned int invalidate_needs_gc; > + unsigned int invalidate_needs_gc:1; > + unsigned int only_moving_gc:1; > > bool discard; /* Get rid of? */ > > @@ -629,6 +630,7 @@ struct cache_set { > struct gc_stat gc_stats; > size_t nbuckets; > size_t avail_nbuckets; > + size_t fragment_nbuckets; > > struct task_struct *gc_thread; > /* Where in the btree gc currently is */ > diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c > index 68b9d7ca864e..6698a4480e05 100644 > --- a/drivers/md/bcache/btree.c > +++ b/drivers/md/bcache/btree.c > @@ -88,6 +88,7 @@ > * Test module load/unload > */ > > +#define COPY_GC_PERCENT 5 > #define MAX_NEED_GC 64 > #define MAX_SAVE_PRIO 72 > #define MAX_GC_TIMES 100 > @@ -1705,6 +1706,7 @@ static void btree_gc_start(struct cache_set *c) > > mutex_lock(&c->bucket_lock); > > + set_gc_sectors(c); > c->gc_mark_valid = 0; > c->gc_done = ZERO_KEY; > > @@ -1825,8 +1827,51 @@ static void bch_btree_gc(struct cache_set *c) > memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); > > trace_bcache_gc_end(c); > +} > + > +extern unsigned int bch_cutoff_writeback; > +extern unsigned int bch_cutoff_writeback_sync; > + > +static bool moving_gc_should_run(struct cache_set *c) > +{ > + struct bucket *b; > + struct cache *ca = c->cache; > + size_t moving_gc_threshold = ca->sb.bucket_size >> 2, frag_percent; > + unsigned long used_buckets = 0, frag_buckets = 0, move_buckets = 0; > + unsigned long dirty_sectors = 0, frag_sectors, used_sectors; > + > + if (c->gc_stats.in_use > bch_cutoff_writeback_sync) > + return true; > + > + mutex_lock(&c->bucket_lock); > + for_each_bucket(b, ca) { > + if (GC_MARK(b) != GC_MARK_DIRTY) > + continue; > + > + used_buckets++; > + > + used_sectors = GC_SECTORS_USED(b); > + dirty_sectors += used_sectors; > + > + if (used_sectors < ca->sb.bucket_size) > + frag_buckets++; > > - bch_moving_gc(c); > + if (used_sectors <= moving_gc_threshold) > + move_buckets++; > + } > + mutex_unlock(&c->bucket_lock); > + > + c->fragment_nbuckets = frag_buckets; > + frag_sectors = used_buckets * ca->sb.bucket_size - dirty_sectors; > + frag_percent = div_u64(frag_sectors * 100, ca->sb.bucket_size * c->nbuckets); > + > + if (move_buckets > ca->heap.size) > + return true; > + > + if (frag_percent >= COPY_GC_PERCENT) > + return true; For example, could we add this to `moving_gc_should_run`: + if (used_buckets >= MAX_USED_BUCKETS) + return true; to solve the issue in this thread: https://www.spinics.net/lists/linux-bcache/msg11746.html where MAX_USED_BUCKETS is a placeholder for a sysfs tunable like `cache_max_used_percent` ? -Eric > + > + return false; > } > > static bool gc_should_run(struct cache_set *c) > @@ -1839,6 +1884,19 @@ static bool gc_should_run(struct cache_set *c) > if (atomic_read(&c->sectors_to_gc) < 0) > return true; > > + /* > + * Moving gc uses cache->heap to defragment disk. Unlike btree gc, > + * moving gc only takes up part of the disk bandwidth. > + * The number of heap is constant. However, the buckets released by > + * each moving gc is limited. So bch_moving_gc() needs to be called > + * multiple times. If bch_gc_thread() always calls bch_btree_gc(), > + * it will block the IO request. > + */ > + if (c->copy_gc_enabled && moving_gc_should_run(c)) { > + ca->only_moving_gc = 1; > + return true; > + } > + > return false; > } > > @@ -1856,8 +1914,10 @@ static int bch_gc_thread(void *arg) > test_bit(CACHE_SET_IO_DISABLE, &c->flags)) > break; > > - set_gc_sectors(c); > - bch_btree_gc(c); > + if (!c->cache->only_moving_gc) > + bch_btree_gc(c); > + > + bch_moving_gc(c); > } > > wait_for_kthread_stop(); > diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c > index 9f32901fdad1..04da088cefe8 100644 > --- a/drivers/md/bcache/movinggc.c > +++ b/drivers/md/bcache/movinggc.c > @@ -200,6 +200,8 @@ void bch_moving_gc(struct cache_set *c) > struct bucket *b; > unsigned long sectors_to_move, reserve_sectors; > > + c->cache->only_moving_gc = 0; > + > if (!c->copy_gc_enabled) > return; > > -- > 2.17.1.windows.2 > >