Add flag IOMAP_ATOMIC_WRITE to indicate to the FS that an atomic write bio is being created and all the rules there need to be followed. It is the task of the FS iomap iter callbacks to ensure that the mapping created adheres to those rules, like size is power-of-2, is at a naturally-aligned offset, etc. In iomap_dio_bio_iter(), ensure that for a non-dsync iocb that the mapping is not dirty nor unmapped. A write should only produce a single bio, so error when it doesn't. Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx> --- fs/iomap/direct-io.c | 26 ++++++++++++++++++++++++-- fs/iomap/trace.h | 3 ++- include/linux/iomap.h | 1 + 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index bcd3f8cf5ea4..6ef25e26f1a1 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -275,10 +275,11 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio, static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, struct iomap_dio *dio) { + bool atomic_write = iter->flags & IOMAP_ATOMIC_WRITE; const struct iomap *iomap = &iter->iomap; struct inode *inode = iter->inode; unsigned int fs_block_size = i_blocksize(inode), pad; - loff_t length = iomap_length(iter); + const loff_t length = iomap_length(iter); loff_t pos = iter->pos; blk_opf_t bio_opf; struct bio *bio; @@ -292,6 +293,13 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, !bdev_iter_is_aligned(iomap->bdev, dio->submit.iter)) return -EINVAL; + if (atomic_write && !iocb_is_dsync(dio->iocb)) { + if (iomap->flags & IOMAP_F_DIRTY) + return -EIO; + if (iomap->type != IOMAP_MAPPED) + return -EIO; + } + if (iomap->type == IOMAP_UNWRITTEN) { dio->flags |= IOMAP_DIO_UNWRITTEN; need_zeroout = true; @@ -381,6 +389,9 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, GFP_KERNEL); bio->bi_iter.bi_sector = iomap_sector(iomap, pos); bio->bi_ioprio = dio->iocb->ki_ioprio; + if (atomic_write) + bio->bi_opf |= REQ_ATOMIC; + bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; @@ -397,6 +408,12 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, } n = bio->bi_iter.bi_size; + if (atomic_write && n != length) { + /* This bio should have covered the complete length */ + ret = -EINVAL; + bio_put(bio); + goto out; + } if (dio->flags & IOMAP_DIO_WRITE) { task_io_account_write(n); } else { @@ -554,6 +571,8 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct blk_plug plug; struct iomap_dio *dio; loff_t ret = 0; + bool is_read = iov_iter_rw(iter) == READ; + bool atomic_write = (iocb->ki_flags & IOCB_ATOMIC) && !is_read; trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before); @@ -579,7 +598,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (iocb->ki_flags & IOCB_NOWAIT) iomi.flags |= IOMAP_NOWAIT; - if (iov_iter_rw(iter) == READ) { + if (is_read) { /* reads can always complete inline */ dio->flags |= IOMAP_DIO_INLINE_COMP; @@ -605,6 +624,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (iocb->ki_flags & IOCB_DIO_CALLER_COMP) dio->flags |= IOMAP_DIO_CALLER_COMP; + if (atomic_write) + iomi.flags |= IOMAP_ATOMIC_WRITE; + if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) { ret = -EAGAIN; if (iomi.pos >= dio->i_size || diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index c16fd55f5595..f9932733c180 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -98,7 +98,8 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued); { IOMAP_REPORT, "REPORT" }, \ { IOMAP_FAULT, "FAULT" }, \ { IOMAP_DIRECT, "DIRECT" }, \ - { IOMAP_NOWAIT, "NOWAIT" } + { IOMAP_NOWAIT, "NOWAIT" }, \ + { IOMAP_ATOMIC_WRITE, "ATOMIC" } #define IOMAP_F_FLAGS_STRINGS \ { IOMAP_F_NEW, "NEW" }, \ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 96dd0acbba44..5138cede54fc 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -178,6 +178,7 @@ struct iomap_folio_ops { #else #define IOMAP_DAX 0 #endif /* CONFIG_FS_DAX */ +#define IOMAP_ATOMIC_WRITE (1 << 9) struct iomap_ops { /* -- 2.31.1