On Wed, Nov 08, 2023 at 01:45:26PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@xxxxxxxxxx> > > I'm abandoning (for now) efforts to bring atomic file content exchanges > to the VFS. The goal here is to reduce friction in getting online fsck > merged, so Dave and I want to take this back to being a private XFS > ioctl so we can explore with it for a while before committing it to the > stable KABI. > > Shift all the existing FIEXCHANGE usage to XFS_IOC_EXCHANGE_RANGE, and > try to pick it up from xfs_fs_staging.h if the system xfslibs-dev > package has such an animal. > > Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> > --- Hi Darrick, Could you please rebase this patchset onto latest fstests for-next branch? I can't merge it onto for-next or master branch, always hit: $ git am -s ./20231108_djwong_fstests_fiexchange_is_now_an_xfs_ioctl.mbx Applying: misc: privatize the FIEXCHANGE ioctl for now error: patch failed: configure.ac:70 error: configure.ac: patch does not apply error: patch failed: include/builddefs.in:72 error: include/builddefs.in: patch does not apply error: patch failed: m4/package_libcdev.m4:155 error: m4/package_libcdev.m4: patch does not apply error: patch failed: src/Makefile:98 error: src/Makefile: patch does not apply Patch failed at 0001 misc: privatize the FIEXCHANGE ioctl for now Can you rebase this patchset onto latest for-next branch, better to after: [PATCH 1/1] generic: test reads racing with slow reflink operations I've merged it. And of course, you please keep the RVB from Christoph Hellwig :) Thanks, Zorro > configure.ac | 2 +- > doc/group-names.txt | 2 +- > include/builddefs.in | 2 +- > ltp/Makefile | 4 ++-- > ltp/fsstress.c | 10 +++++----- > ltp/fsx.c | 20 ++++++++++---------- > m4/package_libcdev.m4 | 19 ------------------- > m4/package_xfslibs.m4 | 14 ++++++++++++++ > src/Makefile | 4 ++++ > src/fiexchange.h | 44 ++++++++++++++++++++++---------------------- > src/global.h | 4 +--- > src/vfs/Makefile | 4 ++++ > tests/generic/724 | 2 +- > tests/xfs/122.out | 1 + > tests/xfs/791 | 2 +- > 15 files changed, 68 insertions(+), 66 deletions(-) > > > diff --git a/configure.ac b/configure.ac > index 7333045330..b22fc52bff 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -70,7 +70,7 @@ AC_HAVE_SEEK_DATA > AC_HAVE_BMV_OF_SHARED > AC_HAVE_NFTW > AC_HAVE_RLIMIT_NOFILE > -AC_HAVE_FIEXCHANGE > +AC_HAVE_XFS_IOC_EXCHANGE_RANGE > AC_HAVE_FICLONE > > AC_CHECK_FUNCS([renameat2]) > diff --git a/doc/group-names.txt b/doc/group-names.txt > index c3dcca3755..fec6bf71ab 100644 > --- a/doc/group-names.txt > +++ b/doc/group-names.txt > @@ -52,7 +52,7 @@ enospc ENOSPC error reporting > exportfs file handles > fiemap fiemap ioctl > filestreams XFS filestreams allocator > -fiexchange FIEXCHANGE_RANGE ioctl > +fiexchange XFS_IOC_EXCHANGE_RANGE ioctl > freeze filesystem freeze tests > fsck general fsck tests > fsmap FS_IOC_GETFSMAP ioctl > diff --git a/include/builddefs.in b/include/builddefs.in > index 446350d5fc..ce95fe7d4b 100644 > --- a/include/builddefs.in > +++ b/include/builddefs.in > @@ -72,7 +72,7 @@ HAVE_SEEK_DATA = @have_seek_data@ > HAVE_NFTW = @have_nftw@ > HAVE_BMV_OF_SHARED = @have_bmv_of_shared@ > HAVE_RLIMIT_NOFILE = @have_rlimit_nofile@ > -HAVE_FIEXCHANGE = @have_fiexchange@ > +HAVE_XFS_IOC_EXCHANGE_RANGE = @have_xfs_ioc_exchange_range@ > HAVE_FICLONE = @have_ficlone@ > > GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall > diff --git a/ltp/Makefile b/ltp/Makefile > index c2b70d896e..c0b2824076 100644 > --- a/ltp/Makefile > +++ b/ltp/Makefile > @@ -36,8 +36,8 @@ ifeq ($(HAVE_COPY_FILE_RANGE),yes) > LCFLAGS += -DHAVE_COPY_FILE_RANGE > endif > > -ifeq ($(HAVE_FIEXCHANGE),yes) > -LCFLAGS += -DHAVE_FIEXCHANGE > +ifeq ($(HAVE_XFS_IOC_EXCHANGE_RANGE),yes) > +LCFLAGS += -DHAVE_XFS_IOC_EXCHANGE_RANGE > endif > > default: depend $(TARGETS) > diff --git a/ltp/fsstress.c b/ltp/fsstress.c > index abe2874253..2681ed2b08 100644 > --- a/ltp/fsstress.c > +++ b/ltp/fsstress.c > @@ -2592,8 +2592,8 @@ xchgrange_f( > opnum_t opno, > long r) > { > -#ifdef FIEXCHANGE_RANGE > - struct file_xchg_range fxr = { 0 }; > +#ifdef XFS_IOC_EXCHANGE_RANGE > + struct xfs_exch_range fxr = { 0 }; > static __u64 swap_flags = 0; > struct pathname fpath1; > struct pathname fpath2; > @@ -2721,10 +2721,10 @@ xchgrange_f( > fxr.flags = swap_flags; > > retry: > - ret = ioctl(fd2, FIEXCHANGE_RANGE, &fxr); > + ret = ioctl(fd2, XFS_IOC_EXCHANGE_RANGE, &fxr); > e = ret < 0 ? errno : 0; > - if (e == EOPNOTSUPP && !(swap_flags & FILE_XCHG_RANGE_NONATOMIC)) { > - swap_flags = FILE_XCHG_RANGE_NONATOMIC; > + if (e == EOPNOTSUPP && !(swap_flags & XFS_EXCH_RANGE_NONATOMIC)) { > + swap_flags = XFS_EXCH_RANGE_NONATOMIC; > fxr.flags |= swap_flags; > goto retry; > } > diff --git a/ltp/fsx.c b/ltp/fsx.c > index a30e2a8dbc..777ba0de5d 100644 > --- a/ltp/fsx.c > +++ b/ltp/fsx.c > @@ -1389,27 +1389,27 @@ do_insert_range(unsigned offset, unsigned length) > } > #endif > > -#ifdef FIEXCHANGE_RANGE > +#ifdef XFS_IOC_EXCHANGE_RANGE > static __u64 swap_flags = 0; > > int > test_xchg_range(void) > { > - struct file_xchg_range fsr = { > + struct xfs_exch_range fsr = { > .file1_fd = fd, > - .flags = FILE_XCHG_RANGE_DRY_RUN | swap_flags, > + .flags = XFS_EXCH_RANGE_DRY_RUN | swap_flags, > }; > int ret, e; > > retry: > - ret = ioctl(fd, FIEXCHANGE_RANGE, &fsr); > + ret = ioctl(fd, XFS_IOC_EXCHANGE_RANGE, &fsr); > e = ret < 0 ? errno : 0; > - if (e == EOPNOTSUPP && !(swap_flags & FILE_XCHG_RANGE_NONATOMIC)) { > + if (e == EOPNOTSUPP && !(swap_flags & XFS_EXCH_RANGE_NONATOMIC)) { > /* > * If the call fails with atomic mode, try again with non > * atomic mode. > */ > - swap_flags = FILE_XCHG_RANGE_NONATOMIC; > + swap_flags = XFS_EXCH_RANGE_NONATOMIC; > fsr.flags |= swap_flags; > goto retry; > } > @@ -1427,7 +1427,7 @@ test_xchg_range(void) > void > do_xchg_range(unsigned offset, unsigned length, unsigned dest) > { > - struct file_xchg_range fsr = { > + struct xfs_exch_range fsr = { > .file1_fd = fd, > .file1_offset = offset, > .file2_offset = dest, > @@ -1470,10 +1470,10 @@ do_xchg_range(unsigned offset, unsigned length, unsigned dest) > testcalls, offset, offset+length, length, dest); > } > > - if (ioctl(fd, FIEXCHANGE_RANGE, &fsr) == -1) { > + if (ioctl(fd, XFS_IOC_EXCHANGE_RANGE, &fsr) == -1) { > prt("exchange range: 0x%x to 0x%x at 0x%x\n", offset, > offset + length, dest); > - prterr("do_xchg_range: FIEXCHANGE_RANGE"); > + prterr("do_xchg_range: XFS_IOC_EXCHANGE_RANGE"); > report_failure(161); > goto out_free; > } > @@ -2452,7 +2452,7 @@ usage(void) > #ifdef HAVE_COPY_FILE_RANGE > " -E: Do not use copy range calls\n" > #endif > -#ifdef FIEXCHANGE_RANGE > +#ifdef XFS_IOC_EXCHANGE_RANGE > " -0: Do not use exchange range calls\n" > #endif > " -K: Do not use keep size\n\ > diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4 > index 91eb64db21..d5d88b8e44 100644 > --- a/m4/package_libcdev.m4 > +++ b/m4/package_libcdev.m4 > @@ -155,25 +155,6 @@ AC_DEFUN([AC_HAVE_RLIMIT_NOFILE], > AC_SUBST(have_rlimit_nofile) > ]) > > -# > -# Check if we have a FIEXCHANGE_RANGE ioctl (Linux) > -# > -AC_DEFUN([AC_HAVE_FIEXCHANGE], > - [ AC_MSG_CHECKING([for FIEXCHANGE_RANGE]) > - AC_LINK_IFELSE([AC_LANG_PROGRAM([[ > -#define _GNU_SOURCE > -#include <sys/syscall.h> > -#include <sys/ioctl.h> > -#include <unistd.h> > -#include <linux/fs.h> > -#include <linux/fiexchange.h> > - ]], [[ > - struct file_xchg_range fxr; > - ioctl(-1, FIEXCHANGE_RANGE, &fxr); > - ]])],[have_fiexchange=yes > - AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)]) > - AC_SUBST(have_fiexchange) > - > # Check if we have FICLONE > AC_DEFUN([AC_HAVE_FICLONE], > [ AC_MSG_CHECKING([for FICLONE]) > diff --git a/m4/package_xfslibs.m4 b/m4/package_xfslibs.m4 > index 8ef58cc064..1549360df6 100644 > --- a/m4/package_xfslibs.m4 > +++ b/m4/package_xfslibs.m4 > @@ -119,3 +119,17 @@ AC_DEFUN([AC_HAVE_BMV_OF_SHARED], > AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)]) > AC_SUBST(have_bmv_of_shared) > ]) > + > +# Check if we have XFS_IOC_EXCHANGE_RANGE > +AC_DEFUN([AC_HAVE_XFS_IOC_EXCHANGE_RANGE], > + [ AC_MSG_CHECKING([for XFS_IOC_EXCHANGE_RANGE]) > + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ > +#define _GNU_SOURCE > +#include <xfs/xfs.h> > + ]], [[ > + struct xfs_exch_range obj; > + ioctl(-1, XFS_IOC_EXCHANGE_RANGE, &obj); > + ]])],[have_xfs_ioc_exchange_range=yes > + AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)]) > + AC_SUBST(have_xfs_ioc_exchange_range) > + ]) > diff --git a/src/Makefile b/src/Makefile > index 49dd2f6c1e..8160a0e8ec 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -98,6 +98,10 @@ ifeq ($(HAVE_FICLONE),yes) > TARGETS += t_reflink_read_race > endif > > +ifeq ($(HAVE_XFS_IOC_EXCHANGE_RANGE),yes) > +LCFLAGS += -DHAVE_XFS_IOC_EXCHANGE_RANGE > +endif > + > CFILES = $(TARGETS:=.c) > LDIRT = $(TARGETS) fssum > > diff --git a/src/fiexchange.h b/src/fiexchange.h > index 29b3ac0ff5..6a3ae8964d 100644 > --- a/src/fiexchange.h > +++ b/src/fiexchange.h > @@ -16,13 +16,13 @@ > * called against (which we'll call file2). Filesystems must be able to > * restart and complete the operation even after the system goes down. > */ > -struct file_xchg_range { > +struct xfs_exch_range { > __s64 file1_fd; > __s64 file1_offset; /* file1 offset, bytes */ > __s64 file2_offset; /* file2 offset, bytes */ > __s64 length; /* bytes to exchange */ > > - __u64 flags; /* see FILE_XCHG_RANGE_* below */ > + __u64 flags; /* see XFS_EXCH_RANGE_* below */ > > /* file2 metadata for optional freshness checks */ > __s64 file2_ino; /* inode number */ > @@ -38,39 +38,39 @@ struct file_xchg_range { > * Atomic exchange operations are not required. This relaxes the requirement > * that the filesystem must be able to complete the operation after a crash. > */ > -#define FILE_XCHG_RANGE_NONATOMIC (1 << 0) > +#define XFS_EXCH_RANGE_NONATOMIC (1 << 0) > > /* > * Check that file2's inode number, mtime, and ctime against the values > * provided, and return -EBUSY if there isn't an exact match. > */ > -#define FILE_XCHG_RANGE_FILE2_FRESH (1 << 1) > +#define XFS_EXCH_RANGE_FILE2_FRESH (1 << 1) > > /* > * Check that the file1's length is equal to file1_offset + length, and that > * file2's length is equal to file2_offset + length. Returns -EDOM if there > * isn't an exact match. > */ > -#define FILE_XCHG_RANGE_FULL_FILES (1 << 2) > +#define XFS_EXCH_RANGE_FULL_FILES (1 << 2) > > /* > * Exchange file data all the way to the ends of both files, and then exchange > * the file sizes. This flag can be used to replace a file's contents with a > * different amount of data. length will be ignored. > */ > -#define FILE_XCHG_RANGE_TO_EOF (1 << 3) > +#define XFS_EXCH_RANGE_TO_EOF (1 << 3) > > /* Flush all changes in file data and file metadata to disk before returning. */ > -#define FILE_XCHG_RANGE_FSYNC (1 << 4) > +#define XFS_EXCH_RANGE_FSYNC (1 << 4) > > /* Dry run; do all the parameter verification but do not change anything. */ > -#define FILE_XCHG_RANGE_DRY_RUN (1 << 5) > +#define XFS_EXCH_RANGE_DRY_RUN (1 << 5) > > /* > - * Do not exchange any part of the range where file1's mapping is a hole. This > - * can be used to emulate scatter-gather atomic writes with a temp file. > + * Only exchange ranges where file1's range maps to a written extent. This can > + * be used to emulate scatter-gather atomic writes with a temp file. > */ > -#define FILE_XCHG_RANGE_SKIP_FILE1_HOLES (1 << 6) > +#define XFS_EXCH_RANGE_FILE1_WRITTEN (1 << 6) > > /* > * Commit the contents of file1 into file2 if file2 has the same inode number, > @@ -83,19 +83,19 @@ struct file_xchg_range { > * commit is complete. > * > * This flag should not be combined with NONATOMIC. It can be combined with > - * SKIP_FILE1_HOLES. > + * FILE1_WRITTEN. > */ > -#define FILE_XCHG_RANGE_COMMIT (FILE_XCHG_RANGE_FILE2_FRESH | \ > - FILE_XCHG_RANGE_FSYNC) > +#define XFS_EXCH_RANGE_COMMIT (XFS_EXCH_RANGE_FILE2_FRESH | \ > + XFS_EXCH_RANGE_FSYNC) > > -#define FILE_XCHG_RANGE_ALL_FLAGS (FILE_XCHG_RANGE_NONATOMIC | \ > - FILE_XCHG_RANGE_FILE2_FRESH | \ > - FILE_XCHG_RANGE_FULL_FILES | \ > - FILE_XCHG_RANGE_TO_EOF | \ > - FILE_XCHG_RANGE_FSYNC | \ > - FILE_XCHG_RANGE_DRY_RUN | \ > - FILE_XCHG_RANGE_SKIP_FILE1_HOLES) > +#define XFS_EXCH_RANGE_ALL_FLAGS (XFS_EXCH_RANGE_NONATOMIC | \ > + XFS_EXCH_RANGE_FILE2_FRESH | \ > + XFS_EXCH_RANGE_FULL_FILES | \ > + XFS_EXCH_RANGE_TO_EOF | \ > + XFS_EXCH_RANGE_FSYNC | \ > + XFS_EXCH_RANGE_DRY_RUN | \ > + XFS_EXCH_RANGE_FILE1_WRITTEN) > > -#define FIEXCHANGE_RANGE _IOWR('X', 129, struct file_xchg_range) > +#define XFS_IOC_EXCHANGE_RANGE _IOWR('X', 129, struct xfs_exch_range) > > #endif /* _LINUX_FIEXCHANGE_H */ > diff --git a/src/global.h b/src/global.h > index 49570ef117..4f92308d6c 100644 > --- a/src/global.h > +++ b/src/global.h > @@ -171,9 +171,7 @@ > #include <sys/mman.h> > #endif > > -#ifdef HAVE_FIEXCHANGE > -# include <linux/fiexchange.h> > -#else > +#ifndef HAVE_XFS_IOC_EXCHANGE_RANGE > # include "fiexchange.h" > #endif > > diff --git a/src/vfs/Makefile b/src/vfs/Makefile > index 4841da1286..868540f578 100644 > --- a/src/vfs/Makefile > +++ b/src/vfs/Makefile > @@ -19,6 +19,10 @@ ifeq ($(HAVE_URING), true) > LLDLIBS += -luring > endif > > +ifeq ($(HAVE_XFS_IOC_EXCHANGE_RANGE),yes) > +LCFLAGS += -DHAVE_XFS_IOC_EXCHANGE_RANGE > +endif > + > default: depend $(TARGETS) > > depend: .dep > diff --git a/tests/generic/724 b/tests/generic/724 > index 8d7dc4e12a..67e0dba446 100755 > --- a/tests/generic/724 > +++ b/tests/generic/724 > @@ -5,7 +5,7 @@ > # FS QA Test No. 724 > # > # Test scatter-gather atomic file writes. We create a temporary file, write > -# sparsely to it, then use FILE_SWAP_RANGE_SKIP_FILE1_HOLES flag to swap > +# sparsely to it, then use XFS_EXCH_RANGE_FILE1_WRITTEN flag to swap > # atomicallly only the ranges that we wrote. > > . ./common/preamble > diff --git a/tests/xfs/122.out b/tests/xfs/122.out > index 21549db7fd..89f7b735b0 100644 > --- a/tests/xfs/122.out > +++ b/tests/xfs/122.out > @@ -90,6 +90,7 @@ sizeof(struct xfs_disk_dquot) = 104 > sizeof(struct xfs_dqblk) = 136 > sizeof(struct xfs_dsb) = 264 > sizeof(struct xfs_dsymlink_hdr) = 56 > +sizeof(struct xfs_exch_range) = 120 > sizeof(struct xfs_extent_data) = 24 > sizeof(struct xfs_extent_data_info) = 32 > sizeof(struct xfs_fs_eofblocks) = 128 > diff --git a/tests/xfs/791 b/tests/xfs/791 > index d82314ee08..4944c1517c 100755 > --- a/tests/xfs/791 > +++ b/tests/xfs/791 > @@ -5,7 +5,7 @@ > # FS QA Test No. 791 > # > # Test scatter-gather atomic file writes. We create a temporary file, write > -# sparsely to it, then use FILE_SWAP_RANGE_SKIP_FILE1_HOLES flag to swap > +# sparsely to it, then use XFS_EXCH_RANGE_FILE1_WRITTEN flag to swap > # atomicallly only the ranges that we wrote. Inject an error so that we can > # test that log recovery finishes the swap. > >