On Sun, Nov 24, 2024 at 12:46 PM Sasha Levin <sashal@xxxxxxxxxx> wrote: > > From: Robbie Ko <robbieko@xxxxxxxxxxxx> > > [ Upstream commit 99785998ed1cea142e20f4904ced26537a37bf74 ] Why is this being picked for stable? It's not a bug fix or anything critical. It's just a performance optimization, and it's not even one where we know (AFAIK) of any workload where it would give very significant gains to justify backporting to stable. Thanks. > > When crawling btree, if an eb cache miss occurs, we change to use the eb > read lock and release all previous locks (including the parent lock) to > reduce lock contention. > > If an eb cache miss occurs in a leaf and needs to execute IO, before this > change we released locks only from level 2 and up and we read a leaf's > content from disk while holding a lock on its parent (level 1), causing > the unnecessary lock contention on the parent, after this change we > release locks from level 1 and up, but we lock level 0, and read leaf's > content from disk. > > Because we have prepared the check parameters and the read lock of eb we > hold, we can ensure that no race will occur during the check and cause > unexpected errors. > > Reviewed-by: Filipe Manana <fdmanana@xxxxxxxx> > Signed-off-by: Robbie Ko <robbieko@xxxxxxxxxxxx> > Signed-off-by: David Sterba <dsterba@xxxxxxxx> > Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> > --- > fs/btrfs/ctree.c | 101 ++++++++++++++++++++++++++++++++--------------- > 1 file changed, 70 insertions(+), 31 deletions(-) > > diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c > index 0cc919d15b144..dd92acd66624f 100644 > --- a/fs/btrfs/ctree.c > +++ b/fs/btrfs/ctree.c > @@ -1515,12 +1515,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, > struct btrfs_tree_parent_check check = { 0 }; > u64 blocknr; > u64 gen; > - struct extent_buffer *tmp; > - int ret; > + struct extent_buffer *tmp = NULL; > + int ret = 0; > int parent_level; > - bool unlock_up; > + int err; > + bool read_tmp = false; > + bool tmp_locked = false; > + bool path_released = false; > > - unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]); > blocknr = btrfs_node_blockptr(*eb_ret, slot); > gen = btrfs_node_ptr_generation(*eb_ret, slot); > parent_level = btrfs_header_level(*eb_ret); > @@ -1551,68 +1553,105 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, > */ > if (btrfs_verify_level_key(tmp, > parent_level - 1, &check.first_key, gen)) { > - free_extent_buffer(tmp); > - return -EUCLEAN; > + ret = -EUCLEAN; > + goto out; > } > *eb_ret = tmp; > - return 0; > + tmp = NULL; > + ret = 0; > + goto out; > } > > if (p->nowait) { > - free_extent_buffer(tmp); > - return -EAGAIN; > + ret = -EAGAIN; > + goto out; > } > > - if (unlock_up) > + if (!p->skip_locking) { > btrfs_unlock_up_safe(p, level + 1); > - > - /* now we're allowed to do a blocking uptodate check */ > - ret = btrfs_read_extent_buffer(tmp, &check); > - if (ret) { > - free_extent_buffer(tmp); > + tmp_locked = true; > + btrfs_tree_read_lock(tmp); > btrfs_release_path(p); > - return ret; > + ret = -EAGAIN; > + path_released = true; > } > > - if (unlock_up) > - ret = -EAGAIN; > + /* Now we're allowed to do a blocking uptodate check. */ > + err = btrfs_read_extent_buffer(tmp, &check); > + if (err) { > + ret = err; > + goto out; > + } > > + if (ret == 0) { > + ASSERT(!tmp_locked); > + *eb_ret = tmp; > + tmp = NULL; > + } > goto out; > } else if (p->nowait) { > - return -EAGAIN; > + ret = -EAGAIN; > + goto out; > } > > - if (unlock_up) { > + if (!p->skip_locking) { > btrfs_unlock_up_safe(p, level + 1); > ret = -EAGAIN; > - } else { > - ret = 0; > } > > if (p->reada != READA_NONE) > reada_for_search(fs_info, p, level, slot, key->objectid); > > - tmp = read_tree_block(fs_info, blocknr, &check); > + tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level); > if (IS_ERR(tmp)) { > + ret = PTR_ERR(tmp); > + tmp = NULL; > + goto out; > + } > + read_tmp = true; > + > + if (!p->skip_locking) { > + ASSERT(ret == -EAGAIN); > + tmp_locked = true; > + btrfs_tree_read_lock(tmp); > btrfs_release_path(p); > - return PTR_ERR(tmp); > + path_released = true; > + } > + > + /* Now we're allowed to do a blocking uptodate check. */ > + err = btrfs_read_extent_buffer(tmp, &check); > + if (err) { > + ret = err; > + goto out; > } > + > /* > * If the read above didn't mark this buffer up to date, > * it will never end up being up to date. Set ret to EIO now > * and give up so that our caller doesn't loop forever > * on our EAGAINs. > */ > - if (!extent_buffer_uptodate(tmp)) > + if (!extent_buffer_uptodate(tmp)) { > ret = -EIO; > + goto out; > + } > > -out: > if (ret == 0) { > + ASSERT(!tmp_locked); > *eb_ret = tmp; > - } else { > - free_extent_buffer(tmp); > - btrfs_release_path(p); > + tmp = NULL; > + } > +out: > + if (tmp) { > + if (tmp_locked) > + btrfs_tree_read_unlock(tmp); > + if (read_tmp && ret && ret != -EAGAIN) > + free_extent_buffer_stale(tmp); > + else > + free_extent_buffer(tmp); > } > + if (ret && !path_released) > + btrfs_release_path(p); > > return ret; > } > @@ -2198,7 +2237,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, > } > > err = read_block_for_search(root, p, &b, level, slot, key); > - if (err == -EAGAIN) > + if (err == -EAGAIN && !p->nowait) > goto again; > if (err) { > ret = err; > @@ -2325,7 +2364,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, > } > > err = read_block_for_search(root, p, &b, level, slot, key); > - if (err == -EAGAIN) > + if (err == -EAGAIN && !p->nowait) > goto again; > if (err) { > ret = err; > -- > 2.43.0 > >