Handling of the done_offset to cow_file_range is a bit confusing, as it is not updated at all when the function succeeds, and the -EAGAIN status is used bother for the case where we need to wait for a zone finish and the one where the allocation was partially successful. Change the calling convention so that done_offset is always updated, and 0 is returned if some allocation was successful (partial allocation can still only happen for zoned devices), and waiting for a zone finish is done internally in cow_file_range instead of the caller. Also write a big fat comment explaining the logic. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- fs/btrfs/inode.c | 58 ++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 556f63e8496ff8..2a4b62398ee7a3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1364,7 +1364,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode, * compressed extent. */ unlock_page(locked_page); - return 1; + ret = 1; + goto done; } else if (ret < 0) { goto out_unlock; } @@ -1395,6 +1396,31 @@ static noinline int cow_file_range(struct btrfs_inode *inode, ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, min_alloc_size, 0, alloc_hint, &ins, 1, 1); + if (ret == -EAGAIN) { + /* + * btrfs_reserve_extent only returns -EAGAIN for zoned + * file systems, which is an indication that there are + * no active zones to allocate from at the moment. + * + * If this is the first loop iteration, wait for at + * least one zone to finish before retrying the + * allocation. Otherwise ask the caller to write out + * the already allocated blocks before coming back to + * us, or return -ENOSPC if it can't handle retries. + */ + ASSERT(btrfs_is_zoned(fs_info)); + if (start == orig_start) { + wait_on_bit_io(&inode->root->fs_info->flags, + BTRFS_FS_NEED_ZONE_FINISH, + TASK_UNINTERRUPTIBLE); + continue; + } + if (done_offset) { + *done_offset = start - 1; + return 0; + } + ret = -ENOSPC; + } if (ret < 0) goto out_unlock; cur_alloc_size = ins.offset; @@ -1478,6 +1504,9 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (ret) goto out_unlock; } +done: + if (done_offset) + *done_offset = end; return ret; out_drop_extent_cache: @@ -1486,21 +1515,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode, btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); out_unlock: - /* - * If done_offset is non-NULL and ret == -EAGAIN, we expect the - * caller to write out the successfully allocated region and retry. - */ - if (done_offset && ret == -EAGAIN) { - if (orig_start < start) - *done_offset = start - 1; - else - *done_offset = start; - return ret; - } else if (ret == -EAGAIN) { - /* Convert to -ENOSPC since the caller cannot retry. */ - ret = -ENOSPC; - } - /* * Now, we have three regions to clean up: * @@ -1711,19 +1725,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode, while (start <= end) { ret = cow_file_range(inode, locked_page, start, end, &done_offset, CFR_KEEP_LOCKED); - if (ret && ret != -EAGAIN) + if (ret) return ret; - if (ret == 0) - done_offset = end; - - if (done_offset == start) { - wait_on_bit_io(&inode->root->fs_info->flags, - BTRFS_FS_NEED_ZONE_FINISH, - TASK_UNINTERRUPTIBLE); - continue; - } - if (!locked_page_done) { __set_page_dirty_nobuffers(locked_page); account_page_redirty(locked_page); -- 2.39.2