Add bio.atomic_write_unit, which is the min size which we can split a bio. Any bio needs to be split in a multiple of this size and also aligned to this size. In __bio_iov_iter_get_pages(), use atomic_write_unit to trim a bio to be a multiple of atomic_write_unit. In bio_split_rw(), we need to consider splitting as follows: - For a regular split which does not cross an atomic write boundary, same as in __bio_iov_iter_get_pages(), trim to be a multiple of atomic_write_unit - We also need to check for when a bio straddles an atomic write boundary. In this case, split to be start/end-aligned with the boundary. We need to ignore lim->max_sectors since to may be less than bio->write_atomic_unit, which we cannot tolerate. Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx> --- block/bio.c | 7 +++- block/blk-merge.c | 84 ++++++++++++++++++++++++++++++++++----- include/linux/blk_types.h | 2 + 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/block/bio.c b/block/bio.c index fd11614bba4d..fc2f29e1c14c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -247,6 +247,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, blk_opf_t opf) { bio->bi_next = NULL; + bio->atomic_write_unit = 0; bio->bi_bdev = bdev; bio->bi_opf = opf; bio->bi_flags = 0; @@ -815,6 +816,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_iter = bio_src->bi_iter; + bio->atomic_write_unit = bio_src->atomic_write_unit; if (bio->bi_bdev) { if (bio->bi_bdev == bio_src->bi_bdev && bio_flagged(bio_src, BIO_REMAPPED)) @@ -1273,7 +1275,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE); - trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1); + if (bio->atomic_write_unit) + trim = size & (bio->atomic_write_unit - 1); + else + trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1); iov_iter_revert(iter, trim); size -= trim; diff --git a/block/blk-merge.c b/block/blk-merge.c index 6460abdb2426..95ab6b644955 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -171,7 +171,17 @@ static inline unsigned get_max_io_size(struct bio *bio, { unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT; unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT; - unsigned max_sectors = lim->max_sectors, start, end; + unsigned max_sectors, start, end; + + /* + * We ignore lim->max_sectors for atomic writes simply because + * it may less than bio->write_atomic_unit, which we cannot + * tolerate. + */ + if (bio->bi_opf & REQ_ATOMIC) + max_sectors = lim->atomic_write_max_bytes >> SECTOR_SHIFT; + else + max_sectors = lim->max_sectors; if (lim->chunk_sectors) { max_sectors = min(max_sectors, @@ -256,6 +266,22 @@ static bool bvec_split_segs(const struct queue_limits *lim, return len > 0 || bv->bv_len > max_len; } +static bool bio_straddles_boundary(struct bio *bio, unsigned int bytes, + unsigned int boundary) +{ + loff_t start = bio->bi_iter.bi_sector << SECTOR_SHIFT; + loff_t end = start + bytes; + loff_t start_mod = start % boundary; + loff_t end_mod = end % boundary; + + if (end - start > boundary) + return true; + if ((start_mod > end_mod) && (start_mod && end_mod)) + return true; + + return false; +} + /** * bio_split_rw - split a bio in two bios * @bio: [in] bio to be split @@ -276,10 +302,15 @@ static bool bvec_split_segs(const struct queue_limits *lim, * responsible for ensuring that @bs is only destroyed after processing of the * split bio has finished. */ + + struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, unsigned *segs, struct bio_set *bs, unsigned max_bytes) { + unsigned int atomic_write_boundary = lim->atomic_write_boundary; + bool atomic_write = bio->bi_opf & REQ_ATOMIC; struct bio_vec bv, bvprv, *bvprvp = NULL; + bool straddles_boundary = false; struct bvec_iter iter; unsigned nsegs = 0, bytes = 0; @@ -291,14 +322,31 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset)) goto split; + if (atomic_write && atomic_write_boundary) { + straddles_boundary = bio_straddles_boundary(bio, + bytes + bv.bv_len, atomic_write_boundary); + } if (nsegs < lim->max_segments && bytes + bv.bv_len <= max_bytes && - bv.bv_offset + bv.bv_len <= PAGE_SIZE) { + bv.bv_offset + bv.bv_len <= PAGE_SIZE && + !straddles_boundary) { nsegs++; bytes += bv.bv_len; } else { - if (bvec_split_segs(lim, &bv, &nsegs, &bytes, - lim->max_segments, max_bytes)) + bool split_the_segs = + bvec_split_segs(lim, &bv, &nsegs, &bytes, + lim->max_segments, max_bytes); + + /* + * We may not actually straddle the boundary as we may + * have added less bytes than anticipated + */ + if (straddles_boundary) { + straddles_boundary = bio_straddles_boundary(bio, + bytes, atomic_write_boundary); + } + + if (split_the_segs || straddles_boundary) goto split; } @@ -321,12 +369,25 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, *segs = nsegs; - /* - * Individual bvecs might not be logical block aligned. Round down the - * split size so that each bio is properly block size aligned, even if - * we do not use the full hardware limits. - */ - bytes = ALIGN_DOWN(bytes, lim->logical_block_size); + if (straddles_boundary) { + loff_t new_end = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + bytes; + unsigned int trim = new_end & (atomic_write_boundary - 1); + bytes -= trim; + new_end = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + bytes; + BUG_ON(new_end % atomic_write_boundary); + } else if (bio->atomic_write_unit) { + unsigned int atomic_write_unit = bio->atomic_write_unit; + unsigned int trim = bytes % atomic_write_unit; + + bytes -= trim; + } else { + /* + * Individual bvecs might not be logical block aligned. Round down the + * split size so that each bio is properly block size aligned, even if + * we do not use the full hardware limits. + */ + bytes = ALIGN_DOWN(bytes, lim->logical_block_size); + } /* * Bio splitting may cause subtle trouble such as hang when doing sync @@ -355,7 +416,8 @@ struct bio *__bio_split_to_limits(struct bio *bio, const struct queue_limits *lim, unsigned int *nr_segs) { - struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split; + struct block_device *bi_bdev = bio->bi_bdev; + struct bio_set *bs = &bi_bdev->bd_disk->bio_split; struct bio *split; switch (bio_op(bio)) { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 347b52e00322..daa44eac9f14 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -303,6 +303,8 @@ struct bio { struct bio_set *bi_pool; + unsigned int atomic_write_unit; + /* * We can inline a number of vecs at the end of the bio, to avoid * double allocations for a small number of bio_vecs. This member -- 2.31.1