Re: [PATCH v2] bcache: fixup lock c->root error

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Sep 08, 2023 at 11:41:08AM +0800, Mingzhe Zou wrote:
> We had a problem with io hung because it was waiting for c->root to
> release the lock.
> 
> crash> cache_set.root -l cache_set.list ffffa03fde4c0050
>   root = 0xffff802ef454c800
> crash> btree -o 0xffff802ef454c800 | grep rw_semaphore
>   [ffff802ef454c858] struct rw_semaphore lock;
> crash> struct rw_semaphore ffff802ef454c858
> struct rw_semaphore {
>   count = {
>     counter = -4294967297
>   },
>   wait_list = {
>     next = 0xffff00006786fc28,
>     prev = 0xffff00005d0efac8
>   },
>   wait_lock = {
>     raw_lock = {
>       {
>         val = {
>           counter = 0
>         },
>         {
>           locked = 0 '\000',
>           pending = 0 '\000'
>         },
>         {
>           locked_pending = 0,
>           tail = 0
>         }
>       }
>     }
>   },
>   osq = {
>     tail = {
>       counter = 0
>     }
>   },
>   owner = 0xffffa03fdc586603
> }
> 
> The "counter = -4294967297" means that lock count is -1 and a write lock
> is being attempted. Then, we found that there is a btree with a counter
> of 1 in btree_cache_freeable.
> 
> crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache
>   [ffffa03fde4c1140] struct list_head btree_cache;
>   [ffffa03fde4c1150] struct list_head btree_cache_freeable;
>   [ffffa03fde4c1160] struct list_head btree_cache_freed;
>   [ffffa03fde4c1170] unsigned int btree_cache_used;
>   [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait;
>   [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock;
> crash> list -H ffffa03fde4c1140|wc -l
> 973
> crash> list -H ffffa03fde4c1150|wc -l
> 1123
> crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050
>   btree_cache_used = 2097
> crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^  lock = {" > btree_cache.txt
> crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^  lock = {" > btree_cache_freeable.txt
> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd
> /var/crash/127.0.0.1-2023-08-04-16:40:28
> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0"
> [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0"
>       counter = 1
> 
> We found that this is a bug in bch_sectors_dirty_init() when locking c->root:
>     (1). Thread X has locked c->root(A) write.
>     (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A).
>     (3). Thread X bch_btree_set_root() changes c->root from A to B.
>     (4). Thread X releases the lock(c->root A).
>     (5). Thread Y successfully locks c->root(A).
>     (6). Thread Y releases the lock(c->root B).
> 
>         down_write locked ---(1)----------------------┐
>                 |                                     |
>                 |   down_read waiting ---(2)----┐     |
>                 |           |               ┌-------------┐ ┌-------------┐
>         bch_btree_set_root ===(3)========>> | c->root   A | | c->root   B |
>                 |           |               └-------------┘ └-------------┘
>             up_write ---(4)---------------------┘     |            |
>                             |                         |            |
>                     down_read locked ---(5)-----------┘            |
>                             |                                      |
>                         up_read ---(6)-----------------------------┘
> 
> Since c->root may change, the correct steps to lock c->root should be
> the same as bch_root_usage(), compare after locking.
> 
> static unsigned int bch_root_usage(struct cache_set *c)
> {
>         unsigned int bytes = 0;
>         struct bkey *k;
>         struct btree *b;
>         struct btree_iter iter;
> 
>         goto lock_root;
> 
>         do {
>                 rw_unlock(false, b);
> lock_root:
>                 b = c->root;
>                 rw_lock(false, b, b->level);
>         } while (b != c->root);
> 
>         for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
>                 bytes += bkey_bytes(k);
> 
>         rw_unlock(false, b);
> 
>         return (bytes * 100) / btree_bytes(c);
> }
> 
> Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded")
> Signed-off-by: Mingzhe Zou <mingzhe.zou@xxxxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx

Nice catch, added into my for-next queue.

Thanks for fixing up.

Coly Li


> ---
>  drivers/md/bcache/writeback.c | 14 +++++++++++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
> index 24c049067f61..bac916ba08c8 100644
> --- a/drivers/md/bcache/writeback.c
> +++ b/drivers/md/bcache/writeback.c
> @@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void)
>  void bch_sectors_dirty_init(struct bcache_device *d)
>  {
>  	int i;
> +	struct btree *b = NULL;
>  	struct bkey *k = NULL;
>  	struct btree_iter iter;
>  	struct sectors_dirty_init op;
>  	struct cache_set *c = d->c;
>  	struct bch_dirty_init_state state;
>  
> +retry_lock:
> +	b = c->root;
> +	rw_lock(0, b, b->level);
> +	if (b != c->root) {
> +		rw_unlock(0, b);
> +		goto retry_lock;
> +	}
> +
>  	/* Just count root keys if no leaf node */
> -	rw_lock(0, c->root, c->root->level);
>  	if (c->root->level == 0) {
>  		bch_btree_op_init(&op.op, -1);
>  		op.inode = d->id;
> @@ -994,7 +1002,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
>  				    k, &iter, bch_ptr_invalid)
>  			sectors_dirty_init_fn(&op.op, c->root, k);
>  
> -		rw_unlock(0, c->root);
> +		rw_unlock(0, b);
>  		return;
>  	}
>  
> @@ -1030,7 +1038,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
>  out:
>  	/* Must wait for all threads to stop. */
>  	wait_event(state.wait, atomic_read(&state.started) == 0);
> -	rw_unlock(0, c->root);
> +	rw_unlock(0, b);
>  }
>  
>  void bch_cached_dev_writeback_init(struct cached_dev *dc)
> -- 
> 2.17.1.windows.2
> 

-- 
Coly Li



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux