From: Keith Busch <kbusch@xxxxxxxxxx> Use the address alignment requirements from the hardware for direct io instead of requiring addresses be aligned to the block size. User space can discover the alignment requirements from the dma_alignment queue attribute. User space can specify any hardware compatible DMA offset for each segment, but every segment length is still required to be a multiple of the block size. Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> --- block/bio.c | 12 ++++++++++++ block/fops.c | 14 +++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/block/bio.c b/block/bio.c index 55d2a9c4e312..c492881959d1 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1219,7 +1219,19 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); + /* + * Each segment in the iov is required to be a block size multiple. + * However, we may not be able to get the entire segment if it spans + * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the + * result to ensure the bio's total size is correct. The remainder of + * the iov data will be picked up in the next bio iteration. + * + * If the result is ever 0, that indicates the iov fails the segment + * size requirement and is an error. + */ size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); + if (size > 0) + size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev)); if (unlikely(size <= 0)) return size ? size : -EFAULT; diff --git a/block/fops.c b/block/fops.c index bd6c2e13a4e3..6ecbccc552b9 100644 --- a/block/fops.c +++ b/block/fops.c @@ -45,10 +45,10 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) static int blkdev_dio_aligned(struct block_device *bdev, loff_t pos, struct iov_iter *iter) { - if ((pos | iov_iter_alignment(iter)) & - (bdev_logical_block_size(bdev) - 1)) + if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1)) + return -EINVAL; + if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev)) return -EINVAL; - return 0; } @@ -88,6 +88,10 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, bio.bi_ioprio = iocb->ki_ioprio; ret = bio_iov_iter_get_pages(&bio, iter); + + /* check if iov is not aligned */ + if (unlikely(!ret && iov_iter_count(iter))) + ret = -EINVAL; if (unlikely(ret)) goto out; ret = bio.bi_iter.bi_size; @@ -333,6 +337,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, bio_iov_bvec_set(bio, iter); } else { ret = bio_iov_iter_get_pages(bio, iter); + + /* check if iov is not aligned */ + if (unlikely(!ret && iov_iter_count(iter))) + ret = -EINVAL; if (unlikely(ret)) { bio_put(bio); return ret; -- 2.30.2