On Wed, Jan 22, 2025 at 06:43:21AM +0100, Christoph Hellwig wrote: > The XFS_IOC_EXCHANGE_RANGE ioctl with the XFS_EXCHANGE_RANGE_TO_EOF flag > operates on a range bounded by the end of the file. This means the > actual amount of blocks exchanged is derived from the inode size, which > is only stable with the IOLOCK (i_rwsem) held. Do that, it currently > calls remap_verify_area from inside the sb write protection which nests > outside the IOLOCK. But this makes fsnotify_file_area_perm which is > called from remap_verify_area unhappy when the kernel is built with > lockdep and the recently added CONFIG_FANOTIFY_ACCESS_PERMISSIONS > option. > > Fix this by always calling remap_verify_area before taking the write > protection, and passing a 0 size to remap_verify_area similar to > the FICLONE/FICLONERANGE ioctls when they are asked to clone until > the file end. > > (Note: the size argument gets passed to fsnotify_file_area_perm, but > then isn't actually used there). > > Fixes: 9a64d9b3109d ("xfs: introduce new file range exchange ioctl") > Signed-off-by: Christoph Hellwig <hch@xxxxxx> I shudder to think of what happens if security_file_permission tries to take i_rwsem in the old _TO_EOF case -- that sounds like a livelock vector. How about: Cc: <stable@xxxxxxxxxxxxxxx> # v6.10 Reviewed-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> --D > --- > fs/xfs/xfs_exchrange.c | 71 ++++++++++++++++-------------------------- > 1 file changed, 27 insertions(+), 44 deletions(-) > > diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c > index f340a2015c4c..0b41bdfecdfb 100644 > --- a/fs/xfs/xfs_exchrange.c > +++ b/fs/xfs/xfs_exchrange.c > @@ -329,22 +329,6 @@ xfs_exchrange_mappings( > * successfully but before locks are dropped. > */ > > -/* Verify that we have security clearance to perform this operation. */ > -static int > -xfs_exchange_range_verify_area( > - struct xfs_exchrange *fxr) > -{ > - int ret; > - > - ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, > - true); > - if (ret) > - return ret; > - > - return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, > - true); > -} > - > /* > * Performs necessary checks before doing a range exchange, having stabilized > * mutable inode attributes via i_rwsem. > @@ -355,11 +339,13 @@ xfs_exchange_range_checks( > unsigned int alloc_unit) > { > struct inode *inode1 = file_inode(fxr->file1); > + loff_t size1 = i_size_read(inode1); > struct inode *inode2 = file_inode(fxr->file2); > + loff_t size2 = i_size_read(inode2); > uint64_t allocmask = alloc_unit - 1; > int64_t test_len; > uint64_t blen; > - loff_t size1, size2, tmp; > + loff_t tmp; > int error; > > /* Don't touch certain kinds of inodes */ > @@ -368,24 +354,25 @@ xfs_exchange_range_checks( > if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) > return -ETXTBSY; > > - size1 = i_size_read(inode1); > - size2 = i_size_read(inode2); > - > /* Ranges cannot start after EOF. */ > if (fxr->file1_offset > size1 || fxr->file2_offset > size2) > return -EINVAL; > > - /* > - * If the caller said to exchange to EOF, we set the length of the > - * request large enough to cover everything to the end of both files. > - */ > if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { > + /* > + * If the caller said to exchange to EOF, we set the length of > + * the request large enough to cover everything to the end of > + * both files. > + */ > fxr->length = max_t(int64_t, size1 - fxr->file1_offset, > size2 - fxr->file2_offset); > - > - error = xfs_exchange_range_verify_area(fxr); > - if (error) > - return error; > + } else { > + /* > + * Otherwise we require both ranges to end within EOF. > + */ > + if (fxr->file1_offset + fxr->length > size1 || > + fxr->file2_offset + fxr->length > size2) > + return -EINVAL; > } > > /* > @@ -401,15 +388,6 @@ xfs_exchange_range_checks( > check_add_overflow(fxr->file2_offset, fxr->length, &tmp)) > return -EINVAL; > > - /* > - * We require both ranges to end within EOF, unless we're exchanging > - * to EOF. > - */ > - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && > - (fxr->file1_offset + fxr->length > size1 || > - fxr->file2_offset + fxr->length > size2)) > - return -EINVAL; > - > /* > * Make sure we don't hit any file size limits. If we hit any size > * limits such that test_length was adjusted, we abort the whole > @@ -747,6 +725,7 @@ xfs_exchange_range( > { > struct inode *inode1 = file_inode(fxr->file1); > struct inode *inode2 = file_inode(fxr->file2); > + loff_t check_len = fxr->length; > int ret; > > BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & > @@ -779,14 +758,18 @@ xfs_exchange_range( > return -EBADF; > > /* > - * If we're not exchanging to EOF, we can check the areas before > - * stabilizing both files' i_size. > + * If we're exchanging to EOF we can't calculate the length until taking > + * the iolock. Pass a 0 length to remap_verify_area similar to the > + * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well. > */ > - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { > - ret = xfs_exchange_range_verify_area(fxr); > - if (ret) > - return ret; > - } > + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) > + check_len = 0; > + ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true); > + if (ret) > + return ret; > + ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true); > + if (ret) > + return ret; > > /* Update cmtime if the fd/inode don't forbid it. */ > if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) > -- > 2.45.2 > >