On Fri, Dec 30, 2022 at 02:19:49PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <djwong@xxxxxxxxxx> > > Upgrade fsx to support exchanging file contents. > > Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> > --- Hi Darrick, I've merged most of patches of [NYE DELUGE 2/4], now I'm trying to merge the rest of it this time. This patch will get build warning [1] from autoconf, can you rebase this patch to current for-next branch, and use autoupdate to update the configure.ac and lib/autoconf/general.m4 ? Thanks, Zorro [1] autoconf configure.ac:73: warning: The macro `AC_TRY_LINK' is obsolete. configure.ac:73: You should run autoupdate. ./lib/autoconf/general.m4:2920: AC_TRY_LINK is expanded from... m4/package_libcdev.m4:161: AC_HAVE_FIEXCHANGE is expanded from... configure.ac:73: the top level ./configure \ --libexecdir=/usr/lib \ --exec_prefix=/var/lib > configure.ac | 1 > include/builddefs.in | 1 > ltp/Makefile | 4 + > ltp/fsx.c | 160 ++++++++++++++++++++++++++++++++++++++++++++++++- > m4/package_libcdev.m4 | 21 ++++++ > src/fiexchange.h | 101 +++++++++++++++++++++++++++++++ > src/global.h | 6 ++ > 7 files changed, 292 insertions(+), 2 deletions(-) > create mode 100644 src/fiexchange.h > > > diff --git a/configure.ac b/configure.ac > index e92bd6b26d..4687d8a3c0 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -70,6 +70,7 @@ AC_HAVE_SEEK_DATA > AC_HAVE_BMV_OF_SHARED > AC_HAVE_NFTW > AC_HAVE_RLIMIT_NOFILE > +AC_HAVE_FIEXCHANGE [snip] > setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */ > > while ((ch = getopt_long(argc, argv, > - "b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ", > + "0b:c:dfg:i:j:kl:m:no:p:qr:s:t:w:xyABD:EFJKHzCILN:OP:RS:UWXZ", Looks like we nearly used up most of letters for fsx, to avoid some operations. Maybe we can use a single option (e.g. -a means avoid) and suboptions to help that. For example "-a xchg_range,clone_range,dedupe_range" to avoid these 3 operations. Or use long option, e.g. --no-xchg-range, --no-clone-range to replace short ones. What do you think? (Anyway, that's not the problem of this patch) Thanks, Zorro > longopts, NULL)) != EOF) > switch (ch) { > case 'b': > @@ -2747,6 +2898,9 @@ main(int argc, char **argv) > case 'I': > insert_range_calls = 0; > break; > + case '0': > + xchg_range_calls = 0; > + break; > case 'J': > clone_range_calls = 0; > break; > @@ -2988,6 +3142,8 @@ main(int argc, char **argv) > dedupe_range_calls = test_dedupe_range(); > if (copy_range_calls) > copy_range_calls = test_copy_range(); > + if (xchg_range_calls) > + xchg_range_calls = test_xchg_range(); > > while (numops == -1 || numops--) > if (!test()) > diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4 > index e1b381c16f..db663970c2 100644 > --- a/m4/package_libcdev.m4 > +++ b/m4/package_libcdev.m4 > @@ -157,3 +157,24 @@ AC_DEFUN([AC_HAVE_RLIMIT_NOFILE], > AC_MSG_RESULT(no)) > AC_SUBST(have_rlimit_nofile) > ]) > + > +# > +# Check if we have a FIEXCHANGE_RANGE ioctl (Linux) > +# > +AC_DEFUN([AC_HAVE_FIEXCHANGE], > + [ AC_MSG_CHECKING([for FIEXCHANGE_RANGE]) > + AC_TRY_LINK([ > +#define _GNU_SOURCE > +#include <sys/syscall.h> > +#include <sys/ioctl.h> > +#include <unistd.h> > +#include <linux/fs.h> > +#include <linux/fiexchange.h> > + ], [ > + struct file_xchg_range fxr; > + ioctl(-1, FIEXCHANGE_RANGE, &fxr); > + ], have_fiexchange=yes > + AC_MSG_RESULT(yes), > + AC_MSG_RESULT(no)) > + AC_SUBST(have_fiexchange) > + ]) > diff --git a/src/fiexchange.h b/src/fiexchange.h > new file mode 100644 > index 0000000000..29b3ac0ff5 > --- /dev/null > +++ b/src/fiexchange.h > @@ -0,0 +1,101 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */ > +/* > + * FIEXCHANGE ioctl definitions, to facilitate exchanging parts of files. > + * > + * Copyright (C) 2022 Oracle. All Rights Reserved. > + * > + * Author: Darrick J. Wong <djwong@xxxxxxxxxx> > + */ > +#ifndef _LINUX_FIEXCHANGE_H > +#define _LINUX_FIEXCHANGE_H > + > +#include <linux/types.h> > + > +/* > + * Exchange part of file1 with part of the file that this ioctl that is being > + * called against (which we'll call file2). Filesystems must be able to > + * restart and complete the operation even after the system goes down. > + */ > +struct file_xchg_range { > + __s64 file1_fd; > + __s64 file1_offset; /* file1 offset, bytes */ > + __s64 file2_offset; /* file2 offset, bytes */ > + __s64 length; /* bytes to exchange */ > + > + __u64 flags; /* see FILE_XCHG_RANGE_* below */ > + > + /* file2 metadata for optional freshness checks */ > + __s64 file2_ino; /* inode number */ > + __s64 file2_mtime; /* modification time */ > + __s64 file2_ctime; /* change time */ > + __s32 file2_mtime_nsec; /* mod time, nsec */ > + __s32 file2_ctime_nsec; /* change time, nsec */ > + > + __u64 pad[6]; /* must be zeroes */ > +}; > + > +/* > + * Atomic exchange operations are not required. This relaxes the requirement > + * that the filesystem must be able to complete the operation after a crash. > + */ > +#define FILE_XCHG_RANGE_NONATOMIC (1 << 0) > + > +/* > + * Check that file2's inode number, mtime, and ctime against the values > + * provided, and return -EBUSY if there isn't an exact match. > + */ > +#define FILE_XCHG_RANGE_FILE2_FRESH (1 << 1) > + > +/* > + * Check that the file1's length is equal to file1_offset + length, and that > + * file2's length is equal to file2_offset + length. Returns -EDOM if there > + * isn't an exact match. > + */ > +#define FILE_XCHG_RANGE_FULL_FILES (1 << 2) > + > +/* > + * Exchange file data all the way to the ends of both files, and then exchange > + * the file sizes. This flag can be used to replace a file's contents with a > + * different amount of data. length will be ignored. > + */ > +#define FILE_XCHG_RANGE_TO_EOF (1 << 3) > + > +/* Flush all changes in file data and file metadata to disk before returning. */ > +#define FILE_XCHG_RANGE_FSYNC (1 << 4) > + > +/* Dry run; do all the parameter verification but do not change anything. */ > +#define FILE_XCHG_RANGE_DRY_RUN (1 << 5) > + > +/* > + * Do not exchange any part of the range where file1's mapping is a hole. This > + * can be used to emulate scatter-gather atomic writes with a temp file. > + */ > +#define FILE_XCHG_RANGE_SKIP_FILE1_HOLES (1 << 6) > + > +/* > + * Commit the contents of file1 into file2 if file2 has the same inode number, > + * mtime, and ctime as the arguments provided to the call. The old contents of > + * file2 will be moved to file1. > + * > + * With this flag, all committed information can be retrieved even if the > + * system crashes or is rebooted. This includes writing through or flushing a > + * disk cache if present. The call blocks until the device reports that the > + * commit is complete. > + * > + * This flag should not be combined with NONATOMIC. It can be combined with > + * SKIP_FILE1_HOLES. > + */ > +#define FILE_XCHG_RANGE_COMMIT (FILE_XCHG_RANGE_FILE2_FRESH | \ > + FILE_XCHG_RANGE_FSYNC) > + > +#define FILE_XCHG_RANGE_ALL_FLAGS (FILE_XCHG_RANGE_NONATOMIC | \ > + FILE_XCHG_RANGE_FILE2_FRESH | \ > + FILE_XCHG_RANGE_FULL_FILES | \ > + FILE_XCHG_RANGE_TO_EOF | \ > + FILE_XCHG_RANGE_FSYNC | \ > + FILE_XCHG_RANGE_DRY_RUN | \ > + FILE_XCHG_RANGE_SKIP_FILE1_HOLES) > + > +#define FIEXCHANGE_RANGE _IOWR('X', 129, struct file_xchg_range) > + > +#endif /* _LINUX_FIEXCHANGE_H */ > diff --git a/src/global.h b/src/global.h > index b44070993c..49570ef117 100644 > --- a/src/global.h > +++ b/src/global.h > @@ -171,6 +171,12 @@ > #include <sys/mman.h> > #endif > > +#ifdef HAVE_FIEXCHANGE > +# include <linux/fiexchange.h> > +#else > +# include "fiexchange.h" > +#endif > + > static inline unsigned long long > rounddown_64(unsigned long long x, unsigned int y) > { >