Flag IOMAP_ATOMIC_SW is not really required. The idea of having this flag is that the FS ->iomap_begin callback could check if this flag is set to decide whether to do a SW (FS-based) atomic write. But the FS can set which ->iomap_begin callback it wants when deciding to do a FS-based atomic write. Furthermore, it was thought that IOMAP_ATOMIC_HW is not a proper name, as the block driver can use SW-methods to emulate an atomic write. So change back to IOMAP_ATOMIC. The ->iomap_begin callback needs though to indicate to iomap core that REQ_ATOMIC needs to be set, so add IOMAP_F_ATOMIC_BIO for that. These changes were suggested by Christoph Hellwig and Dave Chinner. Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx> --- .../filesystems/iomap/operations.rst | 35 ++++++++++--------- fs/ext4/inode.c | 6 +++- fs/iomap/direct-io.c | 8 ++--- fs/iomap/trace.h | 2 +- fs/xfs/xfs_iomap.c | 4 +++ include/linux/iomap.h | 12 +++---- 6 files changed, 37 insertions(+), 30 deletions(-) diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index b08a79d11d9f..3b628e370d88 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -514,29 +514,32 @@ IOMAP_WRITE`` with any combination of the following enhancements: if the mapping is unwritten and the filesystem cannot handle zeroing the unaligned regions without exposing stale contents. - * ``IOMAP_ATOMIC_HW``: This write is being issued with torn-write - protection based on HW-offload support. - Only a single bio can be created for the write, and the write must - not be split into multiple I/O requests, i.e. flag REQ_ATOMIC must be - set. + * ``IOMAP_ATOMIC``: This write is being issued with torn-write + protection. + Torn-write protection may be provided based on HW-offload or by a + software mechanism provided by the filesystem. + + For HW-offload based support, only a single bio can be created for the + write, and the write must not be split into multiple I/O requests, i.e. + flag REQ_ATOMIC must be set. The file range to write must be aligned to satisfy the requirements of both the filesystem and the underlying block device's atomic commit capabilities. If filesystem metadata updates are required (e.g. unwritten extent - conversion or copy on write), all updates for the entire file range + conversion or copy-on-write), all updates for the entire file range must be committed atomically as well. - Only one space mapping is allowed per untorn write. - Untorn writes may be longer than a single file block. In all cases, + Untorn-writes may be longer than a single file block. In all cases, the mapping start disk block must have at least the same alignment as the write offset. - - * ``IOMAP_ATOMIC_SW``: This write is being issued with torn-write - protection via a software mechanism provided by the filesystem. - All the disk block alignment and single bio restrictions which apply - to IOMAP_ATOMIC_HW do not apply here. - SW-based untorn writes would typically be used as a fallback when - HW-based untorn writes may not be issued, e.g. the range of the write - covers multiple extents, meaning that it is not possible to issue + The filesystems must set IOMAP_F_ATOMIC_BIO to inform iomap core of an + untorn-write based on HW-offload. + + For untorn-writes based on a software mechanism provided by the + filesystem, all the disk block alignment and single bio restrictions + which apply for HW-offload based untorn-writes do not apply. + The mechanism would typically be used as a fallback for when + HW-offload based untorn-writes may not be issued, e.g. the range of the + write covers multiple extents, meaning that it is not possible to issue a single bio. All filesystem metadata updates for the entire file range must be committed atomically as well. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ba2f1e3db7c7..d04d8a7f12e7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3290,6 +3290,10 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, if (map->m_flags & EXT4_MAP_NEW) iomap->flags |= IOMAP_F_NEW; + /* HW-offload atomics are always used */ + if (flags & IOMAP_ATOMIC) + iomap->flags |= IOMAP_F_ATOMIC_BIO; + if (flags & IOMAP_DAX) iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; else @@ -3467,7 +3471,7 @@ static inline bool ext4_want_directio_fallback(unsigned flags, ssize_t written) return false; /* atomic writes are all-or-nothing */ - if (flags & IOMAP_ATOMIC_HW) + if (flags & IOMAP_ATOMIC) return false; /* can only try again if we wrote nothing */ diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b9f59ca43c15..6ac7a1534f7c 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -349,7 +349,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio) if (dio->flags & IOMAP_DIO_WRITE) { bio_opf |= REQ_OP_WRITE; - if (iter->flags & IOMAP_ATOMIC_HW) { + if (iomap->flags & IOMAP_F_ATOMIC_BIO) { /* * Ensure that the mapping covers the full write * length, otherwise it won't be submitted as a single @@ -677,10 +677,8 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, iomi.flags |= IOMAP_OVERWRITE_ONLY; } - if (dio_flags & IOMAP_DIO_ATOMIC_SW) - iomi.flags |= IOMAP_ATOMIC_SW; - else if (iocb->ki_flags & IOCB_ATOMIC) - iomi.flags |= IOMAP_ATOMIC_HW; + if (iocb->ki_flags & IOCB_ATOMIC) + iomi.flags |= IOMAP_ATOMIC; /* for data sync or sync, we need sync completion processing */ if (iocb_is_dsync(iocb)) { diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 69af89044ebd..9eab2c8ac3c5 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -99,7 +99,7 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued); { IOMAP_FAULT, "FAULT" }, \ { IOMAP_DIRECT, "DIRECT" }, \ { IOMAP_NOWAIT, "NOWAIT" }, \ - { IOMAP_ATOMIC_HW, "ATOMIC_HW" } + { IOMAP_ATOMIC, "ATOMIC" } #define IOMAP_F_FLAGS_STRINGS \ { IOMAP_F_NEW, "NEW" }, \ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 5dd0922fe2d1..ee40dc509413 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -828,6 +828,10 @@ xfs_direct_write_iomap_begin( if (offset + length > i_size_read(inode)) iomap_flags |= IOMAP_F_DIRTY; + /* HW-offload atomics are always used in this path */ + if (flags & IOMAP_ATOMIC) + iomap_flags |= IOMAP_F_ATOMIC_BIO; + /* * COW writes may allocate delalloc space or convert unwritten COW * extents, so we need to make sure to take the lock exclusively here. diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 9cd93530013c..02fe001feebb 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -60,6 +60,9 @@ struct vm_fault; * IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block * assigned to it yet and the file system will do that in the bio submission * handler, splitting the I/O as needed. + * + * IOMAP_F_ATOMIC_BIO indicates that (write) I/O will be issued as an atomic + * bio, i.e. set REQ_ATOMIC. */ #define IOMAP_F_NEW (1U << 0) #define IOMAP_F_DIRTY (1U << 1) @@ -73,6 +76,7 @@ struct vm_fault; #define IOMAP_F_XATTR (1U << 5) #define IOMAP_F_BOUNDARY (1U << 6) #define IOMAP_F_ANON_WRITE (1U << 7) +#define IOMAP_F_ATOMIC_BIO (1U << 8) /* * Flags set by the core iomap code during operations: @@ -189,9 +193,8 @@ struct iomap_folio_ops { #else #define IOMAP_DAX 0 #endif /* CONFIG_FS_DAX */ -#define IOMAP_ATOMIC_HW (1 << 9) /* HW-based torn-write protection */ +#define IOMAP_ATOMIC (1 << 9) /* torn-write protection */ #define IOMAP_DONTCACHE (1 << 10) -#define IOMAP_ATOMIC_SW (1 << 11)/* SW-based torn-write protection */ struct iomap_ops { /* @@ -503,11 +506,6 @@ struct iomap_dio_ops { */ #define IOMAP_DIO_PARTIAL (1 << 2) -/* - * Use software-based torn-write protection. - */ -#define IOMAP_DIO_ATOMIC_SW (1 << 3) - ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, void *private, size_t done_before); -- 2.31.1