From: Eric Wheeler <bcache@xxxxxxxxxxxxxxxxxx> Date: 2023-09-07 04:23:50 To: Mingzhe Zou <mingzhe.zou@xxxxxxxxxxxx> Cc: colyli@xxxxxxx,bcache@xxxxxxxxxxxxxxxxxx,linux-bcache@xxxxxxxxxxxxxxx,zoumingzhe@xxxxxx Subject: Re: [PATCH] bcache: fixup lock c->root error>On Wed, 6 Sep 2023, Mingzhe Zou wrote: > >> We had a problem with io hung because it was waiting for c->root to >> release the lock. >> >> crash> cache_set.root -l cache_set.list ffffa03fde4c0050 >> root = 0xffff802ef454c800 >> crash> btree -o 0xffff802ef454c800 | grep rw_semaphore >> [ffff802ef454c858] struct rw_semaphore lock; >> crash> struct rw_semaphore ffff802ef454c858 >> struct rw_semaphore { >> count = { >> counter = -4294967297 >> }, >> wait_list = { >> next = 0xffff00006786fc28, >> prev = 0xffff00005d0efac8 >> }, >> wait_lock = { >> raw_lock = { >> { >> val = { >> counter = 0 >> }, >> { >> locked = 0 '\000', >> pending = 0 '\000' >> }, >> { >> locked_pending = 0, >> tail = 0 >> } >> } >> } >> }, >> osq = { >> tail = { >> counter = 0 >> } >> }, >> owner = 0xffffa03fdc586603 >> } >> >> The "counter = -4294967297" means that lock count is -1 and a write lock >> is being attempted. Then, we found that there is a btree with a counter >> of 1 in btree_cache_freeable. >> >> crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache >> [ffffa03fde4c1140] struct list_head btree_cache; >> [ffffa03fde4c1150] struct list_head btree_cache_freeable; >> [ffffa03fde4c1160] struct list_head btree_cache_freed; >> [ffffa03fde4c1170] unsigned int btree_cache_used; >> [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait; >> [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock; >> crash> list -H ffffa03fde4c1140|wc -l >> 973 >> crash> list -H ffffa03fde4c1150|wc -l >> 1123 >> crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050 >> btree_cache_used = 2097 >> crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^ lock = {" > btree_cache.txt >> crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^ lock = {" > btree_cache_freeable.txt >> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd >> /var/crash/127.0.0.1-2023-08-04-16:40:28 >> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0" >> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0" >> counter = 1 >> >> We found that this is a bug in bch_sectors_dirty_init() when locking c->root: >> (1). Thread X has locked c->root(A) write. >> (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A). >> (3). Thread X bch_btree_set_root() changes c->root from A to B. >> (4). Thread X releases the lock(c->root A). >> (5). Thread Y successfully locks c->root(A). >> (6). Thread Y releases the lock(c->root B). >> >> down_write locked ---(1)----------------------┐ >> | | >> | down_read waiting ---(2)----┐ | >> | | ┌-------------┐ ┌-------------┐ >> bch_btree_set_root ===(3)========>> | c->root A | | c->root B | >> | | └-------------┘ └-------------┘ >> up_write ---(4)---------------------┘ | | >> | | | >> down_read locked ---(5)-----------┘ | >> | | >> up_read ---(6)-----------------------------┘ >> >> Since c->root may change, the correct steps to lock c->root should be >> the same as bch_root_usage(), compare after locking. >> >> static unsigned int bch_root_usage(struct cache_set *c) >> { >> unsigned int bytes = 0; >> struct bkey *k; >> struct btree *b; >> struct btree_iter iter; >> >> goto lock_root; >> >> do { >> rw_unlock(false, b); >> lock_root: >> b = c->root; >> rw_lock(false, b, b->level); >> } while (b != c->root); >> >> for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) >> bytes += bkey_bytes(k); >> >> rw_unlock(false, b); >> >> return (bytes * 100) / btree_bytes(c); >> } >> >> Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") > > > >Good work. b144e45fc576 is from v5.6. > >Please CC stable@xxxxxxxxxxxxxxx > >-- >Eric Wheeler > > Hi, Eric I sent V2. Thanks mingzhe > > > >> Signed-off-by: Mingzhe Zou <mingzhe.zou@xxxxxxxxxxxx> >> --- >> drivers/md/bcache/writeback.c | 14 +++++++++++--- >> 1 file changed, 11 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c >> index 24c049067f61..bac916ba08c8 100644 >> --- a/drivers/md/bcache/writeback.c >> +++ b/drivers/md/bcache/writeback.c >> @@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void) >> void bch_sectors_dirty_init(struct bcache_device *d) >> { >> int i; >> + struct btree *b = NULL; >> struct bkey *k = NULL; >> struct btree_iter iter; >> struct sectors_dirty_init op; >> struct cache_set *c = d->c; >> struct bch_dirty_init_state state; >> >> +retry_lock: >> + b = c->root; >> + rw_lock(0, b, b->level); >> + if (b != c->root) { >> + rw_unlock(0, b); >> + goto retry_lock; >> + } >> + >> /* Just count root keys if no leaf node */ >> - rw_lock(0, c->root, c->root->level); >> if (c->root->level == 0) { >> bch_btree_op_init(&op.op, -1); >> op.inode = d->id; >> @@ -994,7 +1002,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) >> k, &iter, bch_ptr_invalid) >> sectors_dirty_init_fn(&op.op, c->root, k); >> >> - rw_unlock(0, c->root); >> + rw_unlock(0, b); >> return; >> } >> >> @@ -1030,7 +1038,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) >> out: >> /* Must wait for all threads to stop. */ >> wait_event(state.wait, atomic_read(&state.started) == 0); >> - rw_unlock(0, c->root); >> + rw_unlock(0, b); >> } >> >> void bch_cached_dev_writeback_init(struct cached_dev *dc) >> -- >> 2.25.1 >> >>