On 2022/3/22 23:55, Christoph Hellwig wrote:
When a bio is split in btrfs_submit_direct, dip->file_offset contains the file offset for the first bio. But this means the start value used in btrfs_check_read_dio_bio is incorrect for subsequent bios. Add a file_offset field to struct btrfs_bio to pass along the correct offset. Given that check_data_csum only uses start of an error message this means problems with this miscalculation will only show up when I/O fails or checksums mismatch. > Signed-off-by: Christoph Hellwig <hch@xxxxxx>
Personally speaking, I really hate to add DIO specific value into btrfs_bio. Hopes we can later turn that btrfs_bio::file_offset into some union for other usages. Despite the extra memory usage, it looks good. Reviewed-by: Qu Wenruo <wqu@xxxxxxxx> Thanks, Qu
--- fs/btrfs/extent_io.c | 1 + fs/btrfs/inode.c | 13 +++++-------- fs/btrfs/volumes.h | 3 +++ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e9fa0f6d605ee..7ca4e9b80f023 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2662,6 +2662,7 @@ int btrfs_repair_one_sector(struct inode *inode, repair_bio = btrfs_bio_alloc(1); repair_bbio = btrfs_bio(repair_bio); + repair_bbio->file_offset = start; repair_bio->bi_opf = REQ_OP_READ; repair_bio->bi_end_io = failed_bio->bi_end_io; repair_bio->bi_iter.bi_sector = failrec->logical >> 9; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3ef8b63bb1b5c..93f00e9150ed0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7773,8 +7773,6 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); struct bio_vec bvec; struct bvec_iter iter; - const u64 orig_file_offset = dip->file_offset; - u64 start = orig_file_offset; u32 bio_offset = 0; blk_status_t err = BLK_STS_OK; @@ -7784,6 +7782,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); pgoff = bvec.bv_offset; for (i = 0; i < nr_sectors; i++) { + u64 start = bbio->file_offset + bio_offset; + ASSERT(pgoff < PAGE_SIZE); if (uptodate && (!csum || !check_data_csum(inode, bbio, @@ -7796,17 +7796,13 @@ static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, } else { int ret; - ASSERT((start - orig_file_offset) < UINT_MAX); - ret = btrfs_repair_one_sector(inode, - &bbio->bio, - start - orig_file_offset, - bvec.bv_page, pgoff, + ret = btrfs_repair_one_sector(inode, &bbio->bio, + bio_offset, bvec.bv_page, pgoff, start, bbio->mirror_num, submit_dio_repair_bio); if (ret) err = errno_to_blk_status(ret); } - start += sectorsize; ASSERT(bio_offset + sectorsize > bio_offset); bio_offset += sectorsize; pgoff += sectorsize; @@ -8009,6 +8005,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; + btrfs_bio(bio)->file_offset = file_offset; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { status = extract_ordered_extent(BTRFS_I(inode), bio, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 005c9e2a491a1..c22148bebc2f5 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -323,6 +323,9 @@ struct btrfs_fs_devices { struct btrfs_bio { unsigned int mirror_num; + /* for direct I/O */ + u64 file_offset; + /* @device is for stripe IO submission. */ struct btrfs_device *device; u8 *csum;