[PATCHv3 6/6] block: relax direct io memory alignment

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Keith Busch <kbusch@xxxxxxxxxx>

Use the address alignment requirements from the hardware for direct io
instead of requiring addresses be aligned to the block size. User space
can discover the alignment requirements from the dma_alignment queue
attribute.

User space can specify any hardware compatible DMA offset for each
segment, but every segment length is still required to be a multiple of
the block size.

Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx>
---
v2->v3:

  Removed iomap support for now

  Added alignment help function instead of duplicating it (Christoph)

  Added comment explaining ALIGN_DOWN

  Added check for iov alignment in _async case

 block/bio.c  | 13 +++++++++++++
 block/fops.c | 41 ++++++++++++++++++++++++++++++-----------
 2 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 55d2a9c4e312..c8ea14ad87f6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1205,6 +1205,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
 	unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
 	unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
 	struct page **pages = (struct page **)bv;
 	ssize_t size, left;
@@ -1219,7 +1220,19 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
+	/*
+	 * Each segment in the iov is required to be a block size multiple.
+	 * However, we may not be able to get the entire segment if it spans
+	 * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the
+	 * result to ensure the bio's total size is correct. The remainder of
+	 * the iov data will be picked up in the next bio iteration.
+	 *
+	 * If the result is ever 0, that indicates the iov fails the segment
+	 * size requirement and is an error.
+	 */
 	size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+	if (size > 0)
+		size = ALIGN_DOWN(size, queue_logical_block_size(q));
 	if (unlikely(size <= 0))
 		return size ? size : -EFAULT;
 
diff --git a/block/fops.c b/block/fops.c
index b9b83030e0df..218e4a8b92aa 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -42,6 +42,16 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
 	return op;
 }
 
+static int blkdev_dio_aligned(struct block_device *bdev, loff_t pos,
+			      struct iov_iter *iter)
+{
+	if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+	if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev))
+		return -EINVAL;
+	return 0;
+}
+
 #define DIO_INLINE_BIO_VECS 4
 
 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
@@ -54,9 +64,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	struct bio bio;
 	ssize_t ret;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
-		return -EINVAL;
+	ret = blkdev_dio_aligned(bdev, pos, iter);
+	if (ret)
+		return ret;
 
 	if (nr_pages <= DIO_INLINE_BIO_VECS)
 		vecs = inline_vecs;
@@ -80,6 +90,11 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	ret = bio_iov_iter_get_pages(&bio, iter);
 	if (unlikely(ret))
 		goto out;
+	/* check if iov is not aligned */
+	if (unlikely(iov_iter_count(iter))) {
+		ret = -EINVAL;
+		goto out;
+	}
 	ret = bio.bi_iter.bi_size;
 
 	if (iov_iter_rw(iter) == WRITE)
@@ -171,11 +186,11 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
 	unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
 	loff_t pos = iocb->ki_pos;
-	int ret = 0;
+	int ret;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
-		return -EINVAL;
+	ret = blkdev_dio_aligned(bdev, pos, iter);
+	if (ret)
+		return ret;
 
 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
 		opf |= REQ_ALLOC_CACHE;
@@ -296,11 +311,11 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	struct blkdev_dio *dio;
 	struct bio *bio;
 	loff_t pos = iocb->ki_pos;
-	int ret = 0;
+	int ret;
 
-	if ((pos | iov_iter_alignment(iter)) &
-	    (bdev_logical_block_size(bdev) - 1))
-		return -EINVAL;
+	ret = blkdev_dio_aligned(bdev, pos, iter);
+	if (ret)
+		return ret;
 
 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
 		opf |= REQ_ALLOC_CACHE;
@@ -323,6 +338,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 		bio_iov_bvec_set(bio, iter);
 	} else {
 		ret = bio_iov_iter_get_pages(bio, iter);
+
+		/* check if iov is not aligned */
+		if (unlikely(iov_iter_count(iter)))
+			ret = -EINVAL;
 		if (unlikely(ret)) {
 			bio_put(bio);
 			return ret;
-- 
2.30.2





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux