In fsdax mode, WRITE and ZERO on a shared extent need CoW performed. After that, new allocated extents needs to be remapped to the file. So, add a CoW identification in ->iomap_begin(), and implement ->iomap_end() to do the remapping work. Signed-off-by: Shiyang Ruan <ruansy.fnst@xxxxxxxxxxx> Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx> Reviewed-by: Christoph Hellwig <hch@xxxxxx> --- fs/xfs/xfs_file.c | 33 ++++++++++++++++++++++++++++----- fs/xfs/xfs_iomap.c | 30 +++++++++++++++++++++++++++++- fs/xfs/xfs_iomap.h | 1 + 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index af954a5b71f8..fe9f92586acf 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -25,6 +25,7 @@ #include "xfs_iomap.h" #include "xfs_reflink.h" +#include <linux/dax.h> #include <linux/falloc.h> #include <linux/backing-dev.h> #include <linux/mman.h> @@ -669,7 +670,7 @@ xfs_file_dax_write( pos = iocb->ki_pos; trace_xfs_file_dax_write(iocb, from); - ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -1254,6 +1255,31 @@ xfs_file_llseek( return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); } +#ifdef CONFIG_FS_DAX +int +xfs_dax_fault( + struct vm_fault *vmf, + enum page_entry_size pe_size, + bool write_fault, + pfn_t *pfn) +{ + return dax_iomap_fault(vmf, pe_size, pfn, NULL, + (write_fault && !vmf->cow_page) ? + &xfs_dax_write_iomap_ops : + &xfs_read_iomap_ops); +} +#else +int +xfs_dax_fault( + struct vm_fault *vmf, + enum page_entry_size pe_size, + bool write_fault, + pfn_t *pfn) +{ + return 0; +} +#endif + /* * Locking for serialisation of IO during page faults. This results in a lock * ordering of: @@ -1285,10 +1311,7 @@ __xfs_filemap_fault( pfn_t pfn; xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, - (write_fault && !vmf->cow_page) ? - &xfs_direct_write_iomap_ops : - &xfs_read_iomap_ops); + ret = xfs_dax_fault(vmf, pe_size, write_fault, &pfn); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 5a393259a3a3..4c07f5e718fb 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -773,7 +773,8 @@ xfs_direct_write_iomap_begin( /* may drop and re-acquire the ilock */ error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, - &lockmode, flags & IOMAP_DIRECT); + &lockmode, + (flags & IOMAP_DIRECT) || IS_DAX(inode)); if (error) goto out_unlock; if (shared) @@ -867,6 +868,33 @@ const struct iomap_ops xfs_direct_write_iomap_ops = { .iomap_begin = xfs_direct_write_iomap_begin, }; +static int +xfs_dax_write_iomap_end( + struct inode *inode, + loff_t pos, + loff_t length, + ssize_t written, + unsigned flags, + struct iomap *iomap) +{ + struct xfs_inode *ip = XFS_I(inode); + + if (!xfs_is_cow_inode(ip)) + return 0; + + if (!written) { + xfs_reflink_cancel_cow_range(ip, pos, length, true); + return 0; + } + + return xfs_reflink_end_cow(ip, pos, written); +} + +const struct iomap_ops xfs_dax_write_iomap_ops = { + .iomap_begin = xfs_direct_write_iomap_begin, + .iomap_end = xfs_dax_write_iomap_end, +}; + static int xfs_buffered_write_iomap_begin( struct inode *inode, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index e88dc162c785..c782e8c0479c 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -51,5 +51,6 @@ extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops; +extern const struct iomap_ops xfs_dax_write_iomap_ops; #endif /* __XFS_IOMAP_H__*/ -- 2.35.1