On 2022/05/26 10:06, Keith Busch wrote: > From: Keith Busch <kbusch@xxxxxxxxxx> > > Use the address alignment requirements from the hardware for direct io > instead of requiring addresses be aligned to the block size. User space > can discover the alignment requirements from the dma_alignment queue > attribute. > > User space can specify any hardware compatible DMA offset for each > segment, but every segment length is still required to be a multiple of > the block size. > > Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> > --- > block/bio.c | 12 ++++++++++++ > block/fops.c | 14 +++++++++++--- > 2 files changed, 23 insertions(+), 3 deletions(-) > > diff --git a/block/bio.c b/block/bio.c > index 55d2a9c4e312..c492881959d1 100644 > --- a/block/bio.c > +++ b/block/bio.c > @@ -1219,7 +1219,19 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) > BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); > pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); > > + /* > + * Each segment in the iov is required to be a block size multiple. > + * However, we may not be able to get the entire segment if it spans > + * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the > + * result to ensure the bio's total size is correct. The remainder of > + * the iov data will be picked up in the next bio iteration. > + * > + * If the result is ever 0, that indicates the iov fails the segment > + * size requirement and is an error. > + */ > size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); > + if (size > 0) > + size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev)); > if (unlikely(size <= 0)) > return size ? size : -EFAULT; > > diff --git a/block/fops.c b/block/fops.c > index bd6c2e13a4e3..6ecbccc552b9 100644 > --- a/block/fops.c > +++ b/block/fops.c > @@ -45,10 +45,10 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) > static int blkdev_dio_aligned(struct block_device *bdev, loff_t pos, > struct iov_iter *iter) > { > - if ((pos | iov_iter_alignment(iter)) & > - (bdev_logical_block_size(bdev) - 1)) > + if ((pos | iov_iter_count(iter)) & (bdev_logical_block_size(bdev) - 1)) > + return -EINVAL; > + if (iov_iter_alignment(iter) & bdev_dma_alignment(bdev)) > return -EINVAL; > - > return 0; > } > > @@ -88,6 +88,10 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, > bio.bi_ioprio = iocb->ki_ioprio; > > ret = bio_iov_iter_get_pages(&bio, iter); > + > + /* check if iov is not aligned */ > + if (unlikely(!ret && iov_iter_count(iter))) > + ret = -EINVAL; > if (unlikely(ret)) > goto out; > ret = bio.bi_iter.bi_size; > @@ -333,6 +337,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, > bio_iov_bvec_set(bio, iter); > } else { > ret = bio_iov_iter_get_pages(bio, iter); > + > + /* check if iov is not aligned */ > + if (unlikely(!ret && iov_iter_count(iter))) > + ret = -EINVAL; > if (unlikely(ret)) { > bio_put(bio); > return ret; Looks OK to me. Reviewed-by: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> -- Damien Le Moal Western Digital Research