Hi Darrick, How about the V2? thanks, wengang > On Jun 24, 2022, at 12:10 PM, Wengang Wang <wen.gang.wang@xxxxxxxxxx> wrote: > > During a reflink operation, the IOLOCK and MMAPLOCK of the source file > are held in exclusive mode for the duration. This prevents reads on the > source file, which could be a very long time if the source file has > millions of extents. > > As the source of copy, besides some necessary modification (say dirty page > flushing), it plays readonly role. Locking source file exclusively through > out the full reflink copy is unreasonable. > > This patch downgrades exclusive locks on source file to shared modes after > page cache flushing and before cloning the extents. To avoid source file > change after lock downgradation, direct write paths take IOLOCK_EXCL on > seeing reflink copy happening to the files. > > Signed-off-by: Wengang Wang <wen.gang.wang@xxxxxxxxxx> > --- > V2 changes: > Commit message > Make direct write paths take IOLOCK_EXCL when reflink copy is happening > Tiny changes > --- > fs/xfs/xfs_file.c | 33 ++++++++++++++++++++++++++++++--- > fs/xfs/xfs_inode.c | 31 +++++++++++++++++++++++++++++++ > fs/xfs/xfs_inode.h | 11 +++++++++++ > 3 files changed, 72 insertions(+), 3 deletions(-) > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c > index 5a171c0b244b..6ca7118ee274 100644 > --- a/fs/xfs/xfs_file.c > +++ b/fs/xfs/xfs_file.c > @@ -514,8 +514,10 @@ xfs_file_dio_write_aligned( > struct iov_iter *from) > { > unsigned int iolock = XFS_IOLOCK_SHARED; > + int remapping; > ssize_t ret; > > +relock: > ret = xfs_ilock_iocb(iocb, iolock); > if (ret) > return ret; > @@ -523,14 +525,25 @@ xfs_file_dio_write_aligned( > if (ret) > goto out_unlock; > > + remapping = xfs_iflags_test(ip, XFS_IREMAPPING); > + > /* > * We don't need to hold the IOLOCK exclusively across the IO, so demote > * the iolock back to shared if we had to take the exclusive lock in > * xfs_file_write_checks() for other reasons. > + * But take IOLOCK_EXCL when reflink copy is going on > */ > if (iolock == XFS_IOLOCK_EXCL) { > - xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); > - iolock = XFS_IOLOCK_SHARED; > + if (!remapping) { > + xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); > + iolock = XFS_IOLOCK_SHARED; > + } > + } else { /* iolock == XFS_ILOCK_SHARED */ > + if (remapping) { > + xfs_iunlock(ip, iolock); > + iolock = XFS_IOLOCK_EXCL; > + goto relock; > + } > } > trace_xfs_file_direct_write(iocb, from); > ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, > @@ -1125,6 +1138,19 @@ xfs_file_remap_range( > if (ret || len == 0) > return ret; > > + /* > + * Set XFS_IREMAPPING flag to source file before we downgrade > + * the locks, so that all direct writes know they have to take > + * IOLOCK_EXCL. > + */ > + xfs_iflags_set(src, XFS_IREMAPPING); > + > + /* > + * From now on, we read only from src, so downgrade locks to allow > + * read operations go. > + */ > + xfs_ilock_io_mmap_downgrade_src(src, dest); > + > trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); > > ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, > @@ -1152,7 +1178,8 @@ xfs_file_remap_range( > if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out)) > xfs_log_force_inode(dest); > out_unlock: > - xfs_iunlock2_io_mmap(src, dest); > + xfs_iflags_clear(src, XFS_IREMAPPING); > + xfs_iunlock2_io_mmap_src_shared(src, dest); > if (ret) > trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); > return remapped > 0 ? remapped : ret; > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > index 52d6f2c7d58b..1cbd4a594f28 100644 > --- a/fs/xfs/xfs_inode.c > +++ b/fs/xfs/xfs_inode.c > @@ -3786,6 +3786,16 @@ xfs_ilock2_io_mmap( > return 0; > } > > +/* Downgrade the locks on src file if src and dest are not the same one. */ > +void > +xfs_ilock_io_mmap_downgrade_src( > + struct xfs_inode *src, > + struct xfs_inode *dest) > +{ > + if (src != dest) > + xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); > +} > + > /* Unlock both inodes to allow IO and mmap activity. */ > void > xfs_iunlock2_io_mmap( > @@ -3798,3 +3808,24 @@ xfs_iunlock2_io_mmap( > if (ip1 != ip2) > inode_unlock(VFS_I(ip1)); > } > + > +/* > + * Unlock the exclusive locks on dest file. > + * Also unlock the shared locks on src if src and dest are not the same one > + */ > +void > +xfs_iunlock2_io_mmap_src_shared( > + struct xfs_inode *src, > + struct xfs_inode *dest) > +{ > + struct inode *src_inode = VFS_I(src); > + struct inode *dest_inode = VFS_I(dest); > + > + inode_unlock(dest_inode); > + filemap_invalidate_unlock(dest_inode->i_mapping); > + if (src == dest) > + return; > + > + inode_unlock_shared(src_inode); > + filemap_invalidate_unlock_shared(src_inode->i_mapping); > +} > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h > index 7be6f8e705ab..c07d4b42cf9d 100644 > --- a/fs/xfs/xfs_inode.h > +++ b/fs/xfs/xfs_inode.h > @@ -262,6 +262,13 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) > */ > #define XFS_INACTIVATING (1 << 13) > > +/* > + * A flag indicating reflink copy / remapping is happening to the file as > + * source. When set, all direct IOs should take IOLOCK_EXCL to avoid > + * interphering the remapping. > + */ > +#define XFS_IREMAPPING (1 << 14) > + > /* All inode state flags related to inode reclaim. */ > #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ > XFS_IRECLAIM | \ > @@ -512,5 +519,9 @@ void xfs_end_io(struct work_struct *work); > > int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); > void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); > +void xfs_ilock_io_mmap_downgrade_src(struct xfs_inode *src, > + struct xfs_inode *dest); > +void xfs_iunlock2_io_mmap_src_shared(struct xfs_inode *src, > + struct xfs_inode *dest); > > #endif /* __XFS_INODE_H__ */ > -- > 2.21.0 (Apple Git-122.2) >