Hi, Could anyone please review this patch? thanks, wengang > On Jun 17, 2022, at 6:16 PM, Wengang Wang <wen.gang.wang@xxxxxxxxxx> wrote: > > inode io lock and mmap lock are exclusively locked durning reflink copy, > read operations are blocked during that time. In case the reflink copy > needs a long time to finish, read operations could be blocked for that long > too. > > The real case is that reflinks take serveral minutes or even longer with > huge source files. Those source files are hundreds of GB long and badly > fragmented, so the reflink copy needs to process more than one million > extents. > > As the source of copy, besides some neccessary modification happens (say > dirty page flushing), it plays readonly role. Locking source file > exclusively through out the reflink copy is unnecessary. > > This patch downgrade exclusive locks on source file to shared modes after > page cache flushing and before cloing the extents. > > Signed-off-by: Wengang Wang <wen.gang.wang@xxxxxxxxxx> > --- > fs/xfs/xfs_file.c | 8 +++++++- > fs/xfs/xfs_inode.c | 36 ++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_inode.h | 4 ++++ > 3 files changed, 47 insertions(+), 1 deletion(-) > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c > index 5a171c0b244b..99bbb188deb4 100644 > --- a/fs/xfs/xfs_file.c > +++ b/fs/xfs/xfs_file.c > @@ -1125,6 +1125,12 @@ xfs_file_remap_range( > if (ret || len == 0) > return ret; > > + /* > + * From now on, we read only from src, so downgrade locks on src > + * to allow read operations in parallel. > + */ > + xfs_ilock_io_mmap_downgrade_src(src, dest); > + > trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); > > ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, > @@ -1152,7 +1158,7 @@ xfs_file_remap_range( > if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out)) > xfs_log_force_inode(dest); > out_unlock: > - xfs_iunlock2_io_mmap(src, dest); > + xfs_iunlock2_io_mmap_src_shared(src, dest); > if (ret) > trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); > return remapped > 0 ? remapped : ret; > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > index 52d6f2c7d58b..721abefbb1fa 100644 > --- a/fs/xfs/xfs_inode.c > +++ b/fs/xfs/xfs_inode.c > @@ -3786,6 +3786,22 @@ xfs_ilock2_io_mmap( > return 0; > } > > +/* > + * Downgrade the locks on src file if src and dest are not the same one. > + */ > +void > +xfs_ilock_io_mmap_downgrade_src( > + struct xfs_inode *src, > + struct xfs_inode *dest) > +{ > + if (src != dest) { > + struct inode *inode = VFS_I(src); > + > + downgrade_write(&inode->i_mapping->invalidate_lock); > + downgrade_write(&inode->i_rwsem); > + } > +} > + > /* Unlock both inodes to allow IO and mmap activity. */ > void > xfs_iunlock2_io_mmap( > @@ -3798,3 +3814,23 @@ xfs_iunlock2_io_mmap( > if (ip1 != ip2) > inode_unlock(VFS_I(ip1)); > } > + > +/* Unlock the exclusive locks on dest file. > + * Also unlock the shared locks on src if src and dest are not the same one > + */ > +void > +xfs_iunlock2_io_mmap_src_shared( > + struct xfs_inode *src, > + struct xfs_inode *dest) > +{ > + struct inode *src_inode = VFS_I(src); > + struct inode *dest_inode = VFS_I(dest); > + > + inode_unlock(dest_inode); > + up_write(&dest_inode->i_mapping->invalidate_lock); > + if (src == dest) > + return; > + > + inode_unlock_shared(src_inode); > + up_read(&src_inode->i_mapping->invalidate_lock); > +} > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h > index 7be6f8e705ab..02b44e1a7e4e 100644 > --- a/fs/xfs/xfs_inode.h > +++ b/fs/xfs/xfs_inode.h > @@ -512,5 +512,9 @@ void xfs_end_io(struct work_struct *work); > > int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); > void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); > +void xfs_ilock_io_mmap_downgrade_src(struct xfs_inode *src, > + struct xfs_inode *dest); > +void xfs_iunlock2_io_mmap_src_shared(struct xfs_inode *src, > + struct xfs_inode *dest); > > #endif /* __XFS_INODE_H__ */ > -- > 2.21.0 (Apple Git-122.2) >