>From d75d1b51aeedcc9349571be084c5f51d8f7e7612 Mon Sep 17 00:00:00 2001 From: Tang Junhui <tang.junhui.linux@xxxxxxxxx> Date: Thu, 24 Jan 2019 16:01:35 +0800 Subject: [PATCH] bcache: fix a race between register and allocator thread when register a new cache device, allocator get stucked, the call trace is bellow: Sangfor:EDS/eds-fe5192de /sf # cat /proc/3642/task/*/stack [<ffffffffa02b01c8>] bch_bucket_alloc+0x178/0x2b0 [bcache] [<ffffffffa02c737d>] bch_prio_write+0x18d/0x320 [bcache] [<ffffffffa02b0042>] bch_allocator_thread+0x432/0x440 [bcache] [<ffffffff810a1430>] kthread+0xc0/0xd0 [<ffffffff8173def7>] ret_from_fork+0x77/0xb0 [<ffffffffffffffff>] 0xffffffffffffffff There is a race between register and allocator thread: register thread allocator thread run_cache_set =>bch_initial_gc_finish alloc prio buckets =>bch_cache_allocator_start start allocator thread bch_allocator_thread =>bch_prio_write =>invalidate_buckets write prio using prio buckets fill bucket into ca->free_inc =>SET_CACHE_SYNC set cache sync mode =>bch_prio_write since cache sync mode is set, write prio to bucket, and the prio bucket has been used in register thread, no bucket can used. so the allocator thread stuck here. In this patch, register thread wait for the allocator thread to finish work, then write prio to buckets and set cache to sync mode. Signed-off-by: Tang Junhui <tang.junhui.linux@xxxxxxxxx> --- drivers/md/bcache/super.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index fa4058e..818d2f1 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1852,13 +1852,28 @@ static void run_cache_set(struct cache_set *c) bch_initial_gc_finish(c); err = "error starting allocator thread"; - for_each_cache(ca, c, i) - if (bch_cache_allocator_start(ca)) - goto err; - mutex_lock(&c->bucket_lock); - for_each_cache(ca, c, i) + for_each_cache(ca, c, i) { + DEFINE_WAIT(wait); + + if (bch_cache_allocator_start(ca)) { + mutex_unlock(&c->bucket_lock); + goto err; + } + + for (;;) { + prepare_to_wait(&c->bucket_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (!fifo_empty(&ca->free_inc)) + break; + mutex_unlock(&c->bucket_lock); + schedule(); + mutex_lock(&c->bucket_lock); + } + + finish_wait(&c->bucket_wait, &wait); bch_prio_write(ca); + } mutex_unlock(&c->bucket_lock); err = "cannot allocate new UUID bucket"; -- 1.8.3.1