From: Keith Busch <kbusch@xxxxxxxxxx> Enable direct io to read partial sectors if the block device supports bit buckets. Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> --- block/fops.c | 69 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/block/fops.c b/block/fops.c index f37af5924cef..5eee8cef7ce0 100644 --- a/block/fops.c +++ b/block/fops.c @@ -46,9 +46,10 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, unsigned int nr_pages, - struct block_device *bdev, loff_t pos) + struct block_device *bdev, loff_t pos, u16 skip, u16 trunc) { struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; + u16 bucket_bytes = skip + trunc; bool should_dirty = false; struct bio bio; ssize_t ret; @@ -72,10 +73,19 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio.bi_ioprio = iocb->ki_ioprio; + if (bucket_bytes) { + bio_set_flag(&bio, BIO_BIT_BUCKET); + if (skip) + blk_add_bb_page(&bio, skip); + } + ret = bio_iov_iter_get_pages(&bio, iter); if (unlikely(ret)) goto out; - ret = bio.bi_iter.bi_size; + + if (trunc) + blk_add_bb_page(&bio, trunc); + ret = bio.bi_iter.bi_size - bucket_bytes; if (iov_iter_rw(iter) == WRITE) task_io_account_write(ret); @@ -157,13 +167,15 @@ static void blkdev_bio_end_io(struct bio *bio) } static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - unsigned int nr_pages, struct block_device *bdev, loff_t pos) + unsigned int nr_pages, struct block_device *bdev, loff_t pos, + u16 skip, u16 trunc) { struct blk_plug plug; struct blkdev_dio *dio; struct bio *bio; bool is_read = (iov_iter_rw(iter) == READ), is_sync; unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); + u16 bucket_bytes = skip + trunc; int ret = 0; if (iocb->ki_flags & IOCB_ALLOC_CACHE) @@ -199,6 +211,14 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, bio->bi_end_io = blkdev_bio_end_io; bio->bi_ioprio = iocb->ki_ioprio; + if (bucket_bytes) { + bio_set_flag(bio, BIO_BIT_BUCKET); + if (skip) { + blk_add_bb_page(bio, skip); + skip = 0; + } + } + ret = bio_iov_iter_get_pages(bio, iter); if (unlikely(ret)) { bio->bi_status = BLK_STS_IOERR; @@ -206,6 +226,11 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, break; } + if (trunc && !iov_iter_count(iter)) { + blk_add_bb_page(bio, trunc); + trunc = 0; + } + if (is_read) { if (dio->flags & DIO_SHOULD_DIRTY) bio_set_pages_dirty(bio); @@ -218,7 +243,8 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); + nr_pages = bio_iov_vecs_to_alloc_partial(iter, BIO_MAX_VECS, 0, + trunc); if (!nr_pages) { submit_bio(bio); break; @@ -244,7 +270,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (!ret) ret = blk_status_to_errno(dio->bio.bi_status); if (likely(!ret)) - ret = dio->size; + ret = dio->size - bucket_bytes; bio_put(&dio->bio); return ret; @@ -277,10 +303,11 @@ static void blkdev_bio_end_io_async(struct bio *bio) static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, struct iov_iter *iter, unsigned int nr_pages, - struct block_device *bdev, loff_t pos) + struct block_device *bdev, loff_t pos, u16 skip, u16 trunc) { bool is_read = iov_iter_rw(iter) == READ; unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); + u16 bucket_bytes = skip + trunc; struct blkdev_dio *dio; struct bio *bio; int ret = 0; @@ -296,6 +323,12 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, bio->bi_end_io = blkdev_bio_end_io_async; bio->bi_ioprio = iocb->ki_ioprio; + if (bucket_bytes) { + bio_set_flag(bio, BIO_BIT_BUCKET); + if (skip) + blk_add_bb_page(bio, skip); + } + if (iov_iter_is_bvec(iter)) { /* * Users don't rely on the iterator being in any particular @@ -311,7 +344,11 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, return ret; } } - dio->size = bio->bi_iter.bi_size; + + if (trunc) + blk_add_bb_page(bio, trunc); + + dio->size = bio->bi_iter.bi_size - bucket_bytes; if (is_read) { if (iter_is_iovec(iter)) { @@ -338,23 +375,29 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct block_device *bdev = iocb->ki_filp->private_data; loff_t pos = iocb->ki_pos; + u16 skip = 0, trunc = 0; unsigned int nr_pages; if (!iov_iter_count(iter)) return 0; - if (blkdev_dio_unaligned(bdev, pos, iter)) - return -EINVAL; + if (blkdev_dio_unaligned(bdev, pos, iter)) { + if (!blkdev_bit_bucket(bdev, pos, iov_iter_count(iter), iter, + &skip, &trunc)) + return -EINVAL; + nr_pages = bio_iov_vecs_to_alloc_partial(iter, BIO_MAX_VECS + 1, + skip, trunc); + } else + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); if (likely(nr_pages <= BIO_MAX_VECS)) { if (is_sync_kiocb(iocb)) return __blkdev_direct_IO_simple(iocb, iter, nr_pages, - bdev, pos); + bdev, pos, skip, trunc); return __blkdev_direct_IO_async(iocb, iter, nr_pages, bdev, - pos); + pos, skip, trunc); } return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages), bdev, - pos); + pos, skip, trunc); } static int blkdev_writepage(struct page *page, struct writeback_control *wbc) -- 2.30.2