On Thu, Feb 27, 2025 at 06:08:09PM +0000, John Garry wrote: > In cases of an atomic write occurs for misaligned or discontiguous disk > blocks, we will use a CoW-based method to issue the atomic write. > > So, for that case, return -EAGAIN to request that the write be issued in > CoW atomic write mode. The dio write path should detect this, similar to > how misaligned regular DIO writes are handled. > > For normal HW-based mode, when the range which we are atomic writing to > covers a shared data extent, try to allocate a new CoW fork. However, if > we find that what we allocated does not meet atomic write requirements > in terms of length and alignment, then fallback on the CoW-based mode > for the atomic write. > > Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx> Looks reasonable, Reviewed-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> --D > --- > fs/xfs/xfs_iomap.c | 143 ++++++++++++++++++++++++++++++++++++++++++++- > fs/xfs/xfs_iomap.h | 1 + > 2 files changed, 142 insertions(+), 2 deletions(-) > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index edfc038bf728..573108cbdc5c 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -795,6 +795,23 @@ imap_spans_range( > return true; > } > > +static bool > +xfs_bmap_valid_for_atomic_write( > + struct xfs_bmbt_irec *imap, > + xfs_fileoff_t offset_fsb, > + xfs_fileoff_t end_fsb) > +{ > + /* Misaligned start block wrt size */ > + if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount)) > + return false; > + > + /* Discontiguous or mixed extents */ > + if (!imap_spans_range(imap, offset_fsb, end_fsb)) > + return false; > + > + return true; > +} > + > static int > xfs_direct_write_iomap_begin( > struct inode *inode, > @@ -809,10 +826,13 @@ xfs_direct_write_iomap_begin( > struct xfs_bmbt_irec imap, cmap; > xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); > + xfs_fileoff_t orig_end_fsb = end_fsb; > + bool atomic_hw = flags & IOMAP_ATOMIC_HW; > int nimaps = 1, error = 0; > unsigned int reflink_flags = 0; > bool shared = false; > u16 iomap_flags = 0; > + bool needs_alloc; > unsigned int lockmode; > u64 seq; > > @@ -871,13 +891,37 @@ xfs_direct_write_iomap_begin( > &lockmode, reflink_flags); > if (error) > goto out_unlock; > - if (shared) > + if (shared) { > + if (atomic_hw && > + !xfs_bmap_valid_for_atomic_write(&cmap, > + offset_fsb, end_fsb)) { > + error = -EAGAIN; > + goto out_unlock; > + } > goto out_found_cow; > + } > end_fsb = imap.br_startoff + imap.br_blockcount; > length = XFS_FSB_TO_B(mp, end_fsb) - offset; > } > > - if (imap_needs_alloc(inode, flags, &imap, nimaps)) > + needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps); > + > + if (atomic_hw) { > + error = -EAGAIN; > + /* > + * Use CoW method for when we need to alloc > 1 block, > + * otherwise we might allocate less than what we need here and > + * have multiple mappings. > + */ > + if (needs_alloc && orig_end_fsb - offset_fsb > 1) > + goto out_unlock; > + > + if (!xfs_bmap_valid_for_atomic_write(&imap, offset_fsb, > + orig_end_fsb)) > + goto out_unlock; > + } > + > + if (needs_alloc) > goto allocate_blocks; > > /* > @@ -965,6 +1009,101 @@ const struct iomap_ops xfs_direct_write_iomap_ops = { > .iomap_begin = xfs_direct_write_iomap_begin, > }; > > +static int > +xfs_atomic_write_sw_iomap_begin( > + struct inode *inode, > + loff_t offset, > + loff_t length, > + unsigned flags, > + struct iomap *iomap, > + struct iomap *srcmap) > +{ > + struct xfs_inode *ip = XFS_I(inode); > + struct xfs_mount *mp = ip->i_mount; > + struct xfs_bmbt_irec imap, cmap; > + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); > + int nimaps = 1, error; > + unsigned int reflink_flags; > + bool shared = false; > + u16 iomap_flags = 0; > + unsigned int lockmode = XFS_ILOCK_EXCL; > + u64 seq; > + > + if (xfs_is_shutdown(mp)) > + return -EIO; > + > + reflink_flags = XFS_REFLINK_CONVERT | XFS_REFLINK_ATOMIC_SW; > + > + /* > + * Set IOMAP_F_DIRTY similar to xfs_atomic_write_iomap_begin() > + */ > + if (offset + length > i_size_read(inode)) > + iomap_flags |= IOMAP_F_DIRTY; > + > + error = xfs_ilock_for_iomap(ip, flags, &lockmode); > + if (error) > + return error; > + > + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, > + &nimaps, 0); > + if (error) > + goto out_unlock; > + > + error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, > + &lockmode, reflink_flags); > + /* > + * Don't check @shared. For atomic writes, we should error when > + * we don't get a COW mapping > + */ > + if (error) > + goto out_unlock; > + > + end_fsb = imap.br_startoff + imap.br_blockcount; > + > + length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); > + trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); > + if (imap.br_startblock != HOLESTARTBLOCK) { > + seq = xfs_iomap_inode_sequence(ip, 0); > + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); > + if (error) > + goto out_unlock; > + } > + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); > + xfs_iunlock(ip, lockmode); > + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); > + > +out_unlock: > + if (lockmode) > + xfs_iunlock(ip, lockmode); > + return error; > +} > + > +static int > +xfs_atomic_write_iomap_begin( > + struct inode *inode, > + loff_t offset, > + loff_t length, > + unsigned flags, > + struct iomap *iomap, > + struct iomap *srcmap) > +{ > + ASSERT(flags & IOMAP_WRITE); > + ASSERT(flags & IOMAP_DIRECT); > + > + if (flags & IOMAP_ATOMIC_SW) > + return xfs_atomic_write_sw_iomap_begin(inode, offset, length, > + flags, iomap, srcmap); > + > + ASSERT(flags & IOMAP_ATOMIC_HW); > + return xfs_direct_write_iomap_begin(inode, offset, length, flags, > + iomap, srcmap); > +} > + > +const struct iomap_ops xfs_atomic_write_iomap_ops = { > + .iomap_begin = xfs_atomic_write_iomap_begin, > +}; > + > static int > xfs_dax_write_iomap_end( > struct inode *inode, > diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h > index 8347268af727..b7fbbc909943 100644 > --- a/fs/xfs/xfs_iomap.h > +++ b/fs/xfs/xfs_iomap.h > @@ -53,5 +53,6 @@ extern const struct iomap_ops xfs_read_iomap_ops; > extern const struct iomap_ops xfs_seek_iomap_ops; > extern const struct iomap_ops xfs_xattr_iomap_ops; > extern const struct iomap_ops xfs_dax_write_iomap_ops; > +extern const struct iomap_ops xfs_atomic_write_iomap_ops; > > #endif /* __XFS_IOMAP_H__*/ > -- > 2.31.1 >