Re: [PATCH v3 08/12] xfs: Iomap SW-based atomic write support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Feb 27, 2025 at 06:08:09PM +0000, John Garry wrote:
> In cases of an atomic write occurs for misaligned or discontiguous disk
> blocks, we will use a CoW-based method to issue the atomic write.
> 
> So, for that case, return -EAGAIN to request that the write be issued in
> CoW atomic write mode. The dio write path should detect this, similar to
> how misaligned regular DIO writes are handled.
> 
> For normal HW-based mode, when the range which we are atomic writing to
> covers a shared data extent, try to allocate a new CoW fork. However, if
> we find that what we allocated does not meet atomic write requirements
> in terms of length and alignment, then fallback on the CoW-based mode
> for the atomic write.
> 
> Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx>

Looks reasonable,
Reviewed-by: "Darrick J. Wong" <djwong@xxxxxxxxxx>

--D

> ---
>  fs/xfs/xfs_iomap.c | 143 ++++++++++++++++++++++++++++++++++++++++++++-
>  fs/xfs/xfs_iomap.h |   1 +
>  2 files changed, 142 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index edfc038bf728..573108cbdc5c 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -795,6 +795,23 @@ imap_spans_range(
>  	return true;
>  }
>  
> +static bool
> +xfs_bmap_valid_for_atomic_write(
> +	struct xfs_bmbt_irec	*imap,
> +	xfs_fileoff_t		offset_fsb,
> +	xfs_fileoff_t		end_fsb)
> +{
> +	/* Misaligned start block wrt size */
> +	if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
> +		return false;
> +
> +	/* Discontiguous or mixed extents */
> +	if (!imap_spans_range(imap, offset_fsb, end_fsb))
> +		return false;
> +
> +	return true;
> +}
> +
>  static int
>  xfs_direct_write_iomap_begin(
>  	struct inode		*inode,
> @@ -809,10 +826,13 @@ xfs_direct_write_iomap_begin(
>  	struct xfs_bmbt_irec	imap, cmap;
>  	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
>  	xfs_fileoff_t		end_fsb = xfs_iomap_end_fsb(mp, offset, length);
> +	xfs_fileoff_t		orig_end_fsb = end_fsb;
> +	bool			atomic_hw = flags & IOMAP_ATOMIC_HW;
>  	int			nimaps = 1, error = 0;
>  	unsigned int		reflink_flags = 0;
>  	bool			shared = false;
>  	u16			iomap_flags = 0;
> +	bool			needs_alloc;
>  	unsigned int		lockmode;
>  	u64			seq;
>  
> @@ -871,13 +891,37 @@ xfs_direct_write_iomap_begin(
>  				&lockmode, reflink_flags);
>  		if (error)
>  			goto out_unlock;
> -		if (shared)
> +		if (shared) {
> +			if (atomic_hw &&
> +			    !xfs_bmap_valid_for_atomic_write(&cmap,
> +					offset_fsb, end_fsb)) {
> +				error = -EAGAIN;
> +				goto out_unlock;
> +			}
>  			goto out_found_cow;
> +		}
>  		end_fsb = imap.br_startoff + imap.br_blockcount;
>  		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
>  	}
>  
> -	if (imap_needs_alloc(inode, flags, &imap, nimaps))
> +	needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
> +
> +	if (atomic_hw) {
> +		error = -EAGAIN;
> +		/*
> +		 * Use CoW method for when we need to alloc > 1 block,
> +		 * otherwise we might allocate less than what we need here and
> +		 * have multiple mappings.
> +		*/
> +		if (needs_alloc && orig_end_fsb - offset_fsb > 1)
> +			goto out_unlock;
> +
> +		if (!xfs_bmap_valid_for_atomic_write(&imap, offset_fsb,
> +				orig_end_fsb))
> +			goto out_unlock;
> +	}
> +
> +	if (needs_alloc)
>  		goto allocate_blocks;
>  
>  	/*
> @@ -965,6 +1009,101 @@ const struct iomap_ops xfs_direct_write_iomap_ops = {
>  	.iomap_begin		= xfs_direct_write_iomap_begin,
>  };
>  
> +static int
> +xfs_atomic_write_sw_iomap_begin(
> +	struct inode		*inode,
> +	loff_t			offset,
> +	loff_t			length,
> +	unsigned		flags,
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
> +{
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct xfs_bmbt_irec	imap, cmap;
> +	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
> +	xfs_fileoff_t		end_fsb = xfs_iomap_end_fsb(mp, offset, length);
> +	int			nimaps = 1, error;
> +	unsigned int		reflink_flags;
> +	bool			shared = false;
> +	u16			iomap_flags = 0;
> +	unsigned int		lockmode = XFS_ILOCK_EXCL;
> +	u64			seq;
> +
> +	if (xfs_is_shutdown(mp))
> +		return -EIO;
> +
> +	reflink_flags = XFS_REFLINK_CONVERT | XFS_REFLINK_ATOMIC_SW;
> +
> +	/*
> +	 * Set IOMAP_F_DIRTY similar to xfs_atomic_write_iomap_begin()
> +	 */
> +	if (offset + length > i_size_read(inode))
> +		iomap_flags |= IOMAP_F_DIRTY;
> +
> +	error = xfs_ilock_for_iomap(ip, flags, &lockmode);
> +	if (error)
> +		return error;
> +
> +	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
> +			&nimaps, 0);
> +	if (error)
> +		goto out_unlock;
> +
> +	error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
> +			&lockmode, reflink_flags);
> +	/*
> +	 * Don't check @shared. For atomic writes, we should error when
> +	 * we don't get a COW mapping
> +	 */
> +	if (error)
> +		goto out_unlock;
> +
> +	end_fsb = imap.br_startoff + imap.br_blockcount;
> +
> +	length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
> +	trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
> +	if (imap.br_startblock != HOLESTARTBLOCK) {
> +		seq = xfs_iomap_inode_sequence(ip, 0);
> +		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
> +		if (error)
> +			goto out_unlock;
> +	}
> +	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
> +	xfs_iunlock(ip, lockmode);
> +	return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
> +
> +out_unlock:
> +	if (lockmode)
> +		xfs_iunlock(ip, lockmode);
> +	return error;
> +}
> +
> +static int
> +xfs_atomic_write_iomap_begin(
> +	struct inode		*inode,
> +	loff_t			offset,
> +	loff_t			length,
> +	unsigned		flags,
> +	struct iomap		*iomap,
> +	struct iomap		*srcmap)
> +{
> +	ASSERT(flags & IOMAP_WRITE);
> +	ASSERT(flags & IOMAP_DIRECT);
> +
> +	if (flags & IOMAP_ATOMIC_SW)
> +		return xfs_atomic_write_sw_iomap_begin(inode, offset, length,
> +				flags, iomap, srcmap);
> +
> +	ASSERT(flags & IOMAP_ATOMIC_HW);
> +	return xfs_direct_write_iomap_begin(inode, offset, length, flags,
> +			iomap, srcmap);
> +}
> +
> +const struct iomap_ops xfs_atomic_write_iomap_ops = {
> +	.iomap_begin		= xfs_atomic_write_iomap_begin,
> +};
> +
>  static int
>  xfs_dax_write_iomap_end(
>  	struct inode		*inode,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index 8347268af727..b7fbbc909943 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -53,5 +53,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
>  extern const struct iomap_ops xfs_seek_iomap_ops;
>  extern const struct iomap_ops xfs_xattr_iomap_ops;
>  extern const struct iomap_ops xfs_dax_write_iomap_ops;
> +extern const struct iomap_ops xfs_atomic_write_iomap_ops;
>  
>  #endif /* __XFS_IOMAP_H__*/
> -- 
> 2.31.1
> 




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux