From: Keith Busch <kbusch@xxxxxxxxxx> This prepares for sources other than the inode to provide a write hint. The block layer will use it for direct IO if the requested hint is within the block device's allowed hints. The hint field in the kiocb structure fits in an existing 2-byte hole, so its size is not changed. Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> --- block/fops.c | 31 ++++++++++++++++++++++++++++--- include/linux/fs.h | 1 + 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/block/fops.c b/block/fops.c index 2d01c90076813..bb3855ee044f0 100644 --- a/block/fops.c +++ b/block/fops.c @@ -71,7 +71,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); } bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; - bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint; + bio.bi_write_hint = iocb->ki_write_hint; bio.bi_ioprio = iocb->ki_ioprio; if (iocb->ki_flags & IOCB_ATOMIC) bio.bi_opf |= REQ_ATOMIC; @@ -200,7 +200,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, for (;;) { bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; - bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint; + bio->bi_write_hint = iocb->ki_write_hint; bio->bi_private = dio; bio->bi_end_io = blkdev_bio_end_io; bio->bi_ioprio = iocb->ki_ioprio; @@ -316,7 +316,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, dio->flags = 0; dio->iocb = iocb; bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; - bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint; + bio->bi_write_hint = iocb->ki_write_hint; bio->bi_end_io = blkdev_bio_end_io_async; bio->bi_ioprio = iocb->ki_ioprio; @@ -362,6 +362,23 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, return -EIOCBQUEUED; } +static int blkdev_write_hint(struct kiocb *iocb, struct block_device *bdev) +{ + u16 hint = iocb->ki_write_hint; + + if (!hint) + return file_inode(iocb->ki_filp)->i_write_hint; + + if (hint > bdev_max_write_hints(bdev)) + return -EINVAL; + + if (bdev_is_partition(bdev) && + !test_bit(hint - 1, bdev->write_hint_mask)) + return -EINVAL; + + return hint; +} + static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); @@ -373,6 +390,14 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (blkdev_dio_invalid(bdev, iocb, iter)) return -EINVAL; + if (iov_iter_rw(iter) == WRITE) { + int hint = blkdev_write_hint(iocb, bdev); + + if (hint < 0) + return hint; + iocb->ki_write_hint = hint; + } + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); if (likely(nr_pages <= BIO_MAX_VECS)) { if (is_sync_kiocb(iocb)) diff --git a/include/linux/fs.h b/include/linux/fs.h index 4b5cad44a1268..1a00accf412e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -370,6 +370,7 @@ struct kiocb { void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ + u16 ki_write_hint; union { /* * Only used for async buffered reads, where it denotes the -- 2.43.5