[PATCH 13/19] libfrog: convert xfs_io swapext command to use new libfrog wrapper

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Darrick J. Wong <djwong@xxxxxxxxxx>

Create an abstraction layer for the two swapext ioctls and port xfs_io
to use it.  Now we're insulated from the differences between the XFS v0
ioctl and the new vfs ioctl.

Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
---
 configure.ac            |    1 
 include/builddefs.in    |    1 
 io/Makefile             |    4 +
 io/swapext.c            |   55 ++++++++-------
 libfrog/Makefile        |    6 ++
 libfrog/fiexchange.h    |  105 ++++++++++++++++++++++++++++++
 libfrog/file_exchange.c |  167 +++++++++++++++++++++++++++++++++++++++++++++++
 libfrog/file_exchange.h |   14 ++++
 libfrog/fsgeom.h        |    6 ++
 m4/package_libcdev.m4   |   20 ++++++
 10 files changed, 352 insertions(+), 27 deletions(-)
 create mode 100644 libfrog/fiexchange.h
 create mode 100644 libfrog/file_exchange.c
 create mode 100644 libfrog/file_exchange.h


diff --git a/configure.ac b/configure.ac
index 6c704464061..f4f1563da8b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -254,6 +254,7 @@ AC_HAVE_LIBURCU_ATOMIC64
 AC_HAVE_MEMFD_CLOEXEC
 AC_HAVE_O_TMPFILE
 AC_HAVE_MKOSTEMP_CLOEXEC
+AC_HAVE_FIEXCHANGE
 
 AC_CONFIG_FILES([include/builddefs])
 AC_OUTPUT
diff --git a/include/builddefs.in b/include/builddefs.in
index 60c1320af37..c0de6000c2a 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -130,6 +130,7 @@ HAVE_LIBURCU_ATOMIC64 = @have_liburcu_atomic64@
 HAVE_MEMFD_CLOEXEC = @have_memfd_cloexec@
 HAVE_O_TMPFILE = @have_o_tmpfile@
 HAVE_MKOSTEMP_CLOEXEC = @have_mkostemp_cloexec@
+HAVE_FIEXCHANGE = @have_fiexchange@
 
 GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
 #	   -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-decl
diff --git a/io/Makefile b/io/Makefile
index 498174cfc43..229f8f377b3 100644
--- a/io/Makefile
+++ b/io/Makefile
@@ -112,6 +112,10 @@ ifeq ($(HAVE_STATFS_FLAGS),yes)
 LCFLAGS += -DHAVE_STATFS_FLAGS
 endif
 
+ifeq ($(HAVE_FIEXCHANGE),yes)
+LCFLAGS += -DHAVE_FIEXCHANGE
+endif
+
 default: depend $(LTCOMMAND)
 
 include $(BUILDRULES)
diff --git a/io/swapext.c b/io/swapext.c
index a4153bb7d42..3f8a5c7b4d4 100644
--- a/io/swapext.c
+++ b/io/swapext.c
@@ -10,7 +10,8 @@
 #include "io.h"
 #include "libfrog/logging.h"
 #include "libfrog/fsgeom.h"
-#include "libfrog/bulkstat.h"
+#include "libfrog/fiexchange.h"
+#include "libfrog/file_exchange.h"
 
 static cmdinfo_t swapext_cmd;
 
@@ -28,47 +29,47 @@ swapext_f(
 	int			argc,
 	char			**argv)
 {
-	struct xfs_fd		fxfd = XFS_FD_INIT(file->fd);
-	struct xfs_bulkstat	bulkstat;
-	int			fd;
-	int			error;
-	struct xfs_swapext	sx;
+	struct xfs_fd		xfd = XFS_FD_INIT(file->fd);
+	struct file_xchg_range	fxr;
 	struct stat		stat;
+	uint64_t		flags = FILE_XCHG_RANGE_FILE2_FRESH |
+					FILE_XCHG_RANGE_FULL_FILES;
+	int			fd;
+	int			ret;
 
 	/* open the donor file */
 	fd = openfile(argv[1], NULL, 0, 0, NULL);
 	if (fd < 0)
 		return 0;
 
-	/*
-	 * stat the target file to get the inode number and use the latter to
-	 * get the bulkstat info for the swapext cmd.
-	 */
-	error = fstat(file->fd, &stat);
-	if (error) {
+	ret = -xfd_prepare_geometry(&xfd);
+	if (ret) {
+		xfrog_perror(ret, "xfd_prepare_geometry");
+		exitcode = 1;
+		goto out;
+	}
+
+	ret = fstat(file->fd, &stat);
+	if (ret) {
 		perror("fstat");
+		exitcode = 1;
 		goto out;
 	}
 
-	error = -xfrog_bulkstat_single(&fxfd, stat.st_ino, 0, &bulkstat);
-	if (error) {
-		xfrog_perror(error, "bulkstat");
+	ret = xfrog_file_exchange_prep(&xfd, flags, 0, fd, 0, stat.st_size,
+			&fxr);
+	if (ret) {
+		xfrog_perror(ret, "xfrog_file_exchange_prep");
+		exitcode = 1;
 		goto out;
 	}
-	error = -xfrog_bulkstat_v5_to_v1(&fxfd, &sx.sx_stat, &bulkstat);
-	if (error) {
-		xfrog_perror(error, "bulkstat conversion");
+
+	ret = xfrog_file_exchange(&xfd, &fxr);
+	if (ret) {
+		xfrog_perror(ret, "swapext");
+		exitcode = 1;
 		goto out;
 	}
-	sx.sx_version = XFS_SX_VERSION;
-	sx.sx_fdtarget = file->fd;
-	sx.sx_fdtmp = fd;
-	sx.sx_offset = 0;
-	sx.sx_length = stat.st_size;
-	error = ioctl(file->fd, XFS_IOC_SWAPEXT, &sx);
-	if (error)
-		perror("swapext");
-
 out:
 	close(fd);
 	return 0;
diff --git a/libfrog/Makefile b/libfrog/Makefile
index 0110708239a..66d2afe56fe 100644
--- a/libfrog/Makefile
+++ b/libfrog/Makefile
@@ -18,6 +18,7 @@ bitmap.c \
 bulkstat.c \
 convert.c \
 crc32.c \
+file_exchange.c \
 fsgeom.c \
 list_sort.c \
 linux.c \
@@ -39,6 +40,7 @@ crc32c.h \
 crc32cselftest.h \
 crc32defs.h \
 crc32table.h \
+file_exchange.h \
 fsgeom.h \
 logging.h \
 paths.h \
@@ -54,6 +56,10 @@ ifeq ($(HAVE_GETMNTENT),yes)
 LCFLAGS += -DHAVE_GETMNTENT
 endif
 
+ifeq ($(HAVE_FIEXCHANGE),yes)
+LCFLAGS += -DHAVE_FIEXCHANGE
+endif
+
 LDIRT = gen_crc32table crc32table.h crc32selftest
 
 default: crc32selftest ltdepend $(LTLIBRARY)
diff --git a/libfrog/fiexchange.h b/libfrog/fiexchange.h
new file mode 100644
index 00000000000..04ec42777d7
--- /dev/null
+++ b/libfrog/fiexchange.h
@@ -0,0 +1,105 @@
+#ifdef HAVE_FIEXCHANGE
+# include <linux/fiexchange.h>
+#endif
+
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */
+/*
+ * FIEXCHANGE ioctl definitions, to facilitate exchanging parts of files.
+ *
+ * Copyright (C) 2022 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <djwong@xxxxxxxxxx>
+ */
+#ifndef _LINUX_FIEXCHANGE_H
+#define _LINUX_FIEXCHANGE_H
+
+#include <linux/types.h>
+
+/*
+ * Exchange part of file1 with part of the file that this ioctl that is being
+ * called against (which we'll call file2).  Filesystems must be able to
+ * restart and complete the operation even after the system goes down.
+ */
+struct file_xchg_range {
+	__s64		file1_fd;
+	__s64		file1_offset;	/* file1 offset, bytes */
+	__s64		file2_offset;	/* file2 offset, bytes */
+	__s64		length;		/* bytes to exchange */
+
+	__u64		flags;		/* see FILE_XCHG_RANGE_* below */
+
+	/* file2 metadata for optional freshness checks */
+	__s64		file2_ino;	/* inode number */
+	__s64		file2_mtime;	/* modification time */
+	__s64		file2_ctime;	/* change time */
+	__s32		file2_mtime_nsec; /* mod time, nsec */
+	__s32		file2_ctime_nsec; /* change time, nsec */
+
+	__u64		pad[6];		/* must be zeroes */
+};
+
+/*
+ * Atomic exchange operations are not required.  This relaxes the requirement
+ * that the filesystem must be able to complete the operation after a crash.
+ */
+#define FILE_XCHG_RANGE_NONATOMIC	(1 << 0)
+
+/*
+ * Check that file2's inode number, mtime, and ctime against the values
+ * provided, and return -EBUSY if there isn't an exact match.
+ */
+#define FILE_XCHG_RANGE_FILE2_FRESH	(1 << 1)
+
+/*
+ * Check that the file1's length is equal to file1_offset + length, and that
+ * file2's length is equal to file2_offset + length.  Returns -EDOM if there
+ * isn't an exact match.
+ */
+#define FILE_XCHG_RANGE_FULL_FILES	(1 << 2)
+
+/*
+ * Exchange file data all the way to the ends of both files, and then exchange
+ * the file sizes.  This flag can be used to replace a file's contents with a
+ * different amount of data.  length will be ignored.
+ */
+#define FILE_XCHG_RANGE_TO_EOF		(1 << 3)
+
+/* Flush all changes in file data and file metadata to disk before returning. */
+#define FILE_XCHG_RANGE_FSYNC		(1 << 4)
+
+/* Dry run; do all the parameter verification but do not change anything. */
+#define FILE_XCHG_RANGE_DRY_RUN		(1 << 5)
+
+/*
+ * Do not exchange any part of the range where file1's mapping is a hole.  This
+ * can be used to emulate scatter-gather atomic writes with a temp file.
+ */
+#define FILE_XCHG_RANGE_SKIP_FILE1_HOLES (1 << 6)
+
+/*
+ * Commit the contents of file1 into file2 if file2 has the same inode number,
+ * mtime, and ctime as the arguments provided to the call.  The old contents of
+ * file2 will be moved to file1.
+ *
+ * With this flag, all committed information can be retrieved even if the
+ * system crashes or is rebooted.  This includes writing through or flushing a
+ * disk cache if present.  The call blocks until the device reports that the
+ * commit is complete.
+ *
+ * This flag should not be combined with NONATOMIC.  It can be combined with
+ * SKIP_FILE1_HOLES.
+ */
+#define FILE_XCHG_RANGE_COMMIT		(FILE_XCHG_RANGE_FILE2_FRESH | \
+					 FILE_XCHG_RANGE_FSYNC)
+
+#define FILE_XCHG_RANGE_ALL_FLAGS	(FILE_XCHG_RANGE_NONATOMIC | \
+					 FILE_XCHG_RANGE_FILE2_FRESH | \
+					 FILE_XCHG_RANGE_FULL_FILES | \
+					 FILE_XCHG_RANGE_TO_EOF | \
+					 FILE_XCHG_RANGE_FSYNC | \
+					 FILE_XCHG_RANGE_DRY_RUN | \
+					 FILE_XCHG_RANGE_SKIP_FILE1_HOLES)
+
+#define FIEXCHANGE_RANGE	_IOWR('X', 129, struct file_xchg_range)
+
+#endif /* _LINUX_FIEXCHANGE_H */
diff --git a/libfrog/file_exchange.c b/libfrog/file_exchange.c
new file mode 100644
index 00000000000..00277f8f0fc
--- /dev/null
+++ b/libfrog/file_exchange.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2022 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@xxxxxxxxxx>
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <string.h>
+#include "xfs.h"
+#include "fsgeom.h"
+#include "bulkstat.h"
+#include "fiexchange.h"
+#include "file_exchange.h"
+
+/* Prepare the freshness component of a swapext request. */
+static int
+xfrog_file_exchange_prep_freshness(
+	struct xfs_fd		*dest,
+	struct file_xchg_range	*req)
+{
+	struct stat		stat;
+	struct xfs_bulkstat	bulkstat;
+	int			error;
+
+	error = fstat(dest->fd, &stat);
+	if (error)
+		return -errno;
+	req->file2_ino = stat.st_ino;
+
+	/*
+	 * Try to fill out the [cm]time data from bulkstat.  We prefer this
+	 * approach because bulkstat v5 gives us 64-bit time even on 32-bit.
+	 *
+	 * However, we'll take our chances on the C library if the filesystem
+	 * supports 64-bit time but we ended up with bulkstat v5 emulation.
+	 */
+	error = xfrog_bulkstat_single(dest, stat.st_ino, 0, &bulkstat);
+	if (!error &&
+	    !((dest->fsgeom.flags & XFS_FSOP_GEOM_FLAGS_BIGTIME) &&
+	      bulkstat.bs_version < XFS_BULKSTAT_VERSION_V5)) {
+		req->file2_mtime = bulkstat.bs_mtime;
+		req->file2_ctime = bulkstat.bs_ctime;
+		req->file2_mtime_nsec = bulkstat.bs_mtime_nsec;
+		req->file2_ctime_nsec = bulkstat.bs_ctime_nsec;
+		return 0;
+	}
+
+	/* Otherwise, use the stat information and hope for the best. */
+	req->file2_mtime = stat.st_mtime;
+	req->file2_ctime = stat.st_ctime;
+	req->file2_mtime_nsec = stat.st_mtim.tv_nsec;
+	req->file2_ctime_nsec = stat.st_ctim.tv_nsec;
+	return 0;
+}
+
+/* Prepare an extent swap request. */
+int
+xfrog_file_exchange_prep(
+	struct xfs_fd		*dest,
+	uint64_t		flags,
+	int64_t			file2_offset,
+	int			file1_fd,
+	int64_t			file1_offset,
+	int64_t			length,
+	struct file_xchg_range	*req)
+{
+	memset(req, 0, sizeof(*req));
+	req->file1_fd = file1_fd;
+	req->file1_offset = file1_offset;
+	req->length = length;
+	req->file2_offset = file2_offset;
+	req->flags = flags;
+
+	if (flags & FILE_XCHG_RANGE_FILE2_FRESH)
+		return xfrog_file_exchange_prep_freshness(dest, req);
+
+	return 0;
+}
+
+/* Swap two files' extents with the vfs swaprange ioctl. */
+static int
+xfrog_file_exchange_vfs(
+	struct xfs_fd		*xfd,
+	struct file_xchg_range	*req)
+{
+	int			ret;
+
+	ret = ioctl(xfd->fd, FIEXCHANGE_RANGE, req);
+	if (ret) {
+		/* the old swapext ioctl returned EFAULT for bad length */
+		if (errno == EDOM)
+			return -EFAULT;
+		return -errno;
+	}
+	return 0;
+}
+
+/*
+ * The old swapext ioctl did not provide atomic swap; it required that the
+ * supplied offset and length matched both files' lengths; and it also required
+ * that the sx_stat information match the dest file.  It doesn't support any
+ * other flags.
+ */
+#define FILE_XCHG_RANGE_SWAPEXT0	(FILE_XCHG_RANGE_NONATOMIC | \
+					 FILE_XCHG_RANGE_FULL_FILES | \
+					 FILE_XCHG_RANGE_FILE2_FRESH)
+
+/* Swap two files' extents with the old xfs swaprange ioctl. */
+static int
+xfrog_file_exchange0(
+	struct xfs_fd		*xfd,
+	struct file_xchg_range	*req)
+{
+	struct xfs_swapext	sx = {
+		.sx_version	= XFS_SX_VERSION,
+		.sx_fdtarget	= xfd->fd,
+		.sx_fdtmp	= req->file1_fd,
+		.sx_length	= req->length,
+	};
+	int			ret;
+
+	if (req->file1_offset != req->file2_offset)
+		return -EINVAL;
+	if (req->flags != FILE_XCHG_RANGE_SWAPEXT0)
+		return -EOPNOTSUPP;
+
+	sx.sx_stat.bs_ino = req->file2_ino;
+	sx.sx_stat.bs_ctime.tv_sec = req->file2_ctime;
+	sx.sx_stat.bs_ctime.tv_nsec = req->file2_ctime_nsec;
+	sx.sx_stat.bs_mtime.tv_sec = req->file2_mtime;
+	sx.sx_stat.bs_mtime.tv_nsec = req->file2_mtime_nsec;
+
+	ret = ioctl(xfd->fd, XFS_IOC_SWAPEXT, &sx);
+	if (ret)
+		return -errno;
+	return 0;
+}
+
+/* Swap extents between an XFS file and a donor fd. */
+int
+xfrog_file_exchange(
+	struct xfs_fd		*xfd,
+	struct file_xchg_range	*req)
+{
+	int			error;
+
+	if (xfd->flags & XFROG_FLAG_FORCE_SWAPEXT)
+		goto try_v1;
+
+	error = xfrog_file_exchange_vfs(xfd, req);
+	if ((error != -ENOTTY && error != -EOPNOTSUPP) ||
+	    (xfd->flags & XFROG_FLAG_FORCE_FIEXCHANGE))
+		return error;
+
+	/* If the vfs ioctl wasn't found, we punt to v0. */
+	switch (error) {
+	case -EOPNOTSUPP:
+	case -ENOTTY:
+		xfd->flags |= XFROG_FLAG_FORCE_SWAPEXT;
+		break;
+	}
+
+try_v1:
+	return xfrog_file_exchange0(xfd, req);
+}
diff --git a/libfrog/file_exchange.h b/libfrog/file_exchange.h
new file mode 100644
index 00000000000..a77d67514e8
--- /dev/null
+++ b/libfrog/file_exchange.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2022 Oracle. Inc.
+ * All Rights Reserved.
+ */
+#ifndef __LIBFROG_FILE_EXCHANGE_H__
+#define __LIBFROG_FILE_EXCHANGE_H__
+
+int xfrog_file_exchange_prep(struct xfs_fd *file2, uint64_t flags,
+		int64_t file2_offset, int file1_fd, int64_t file1_offset,
+		int64_t length, struct file_xchg_range *req);
+int xfrog_file_exchange(struct xfs_fd *xfd, struct file_xchg_range *req);
+
+#endif	/* __LIBFROG_FILE_EXCHANGE_H__ */
diff --git a/libfrog/fsgeom.h b/libfrog/fsgeom.h
index ca38324e853..9dfa986ff08 100644
--- a/libfrog/fsgeom.h
+++ b/libfrog/fsgeom.h
@@ -50,6 +50,12 @@ struct xfs_fd {
 /* Only use v5 bulkstat/inumbers ioctls. */
 #define XFROG_FLAG_BULKSTAT_FORCE_V5	(1 << 1)
 
+/* Only use the old XFS swapext ioctl for file data exchanges. */
+#define XFROG_FLAG_FORCE_SWAPEXT	(1 << 2)
+
+/* Only use FIEXCHANGE_RANGE for file data exchanges. */
+#define XFROG_FLAG_FORCE_FIEXCHANGE	(1 << 3)
+
 /* Static initializers */
 #define XFS_FD_INIT(_fd)	{ .fd = (_fd), }
 #define XFS_FD_INIT_EMPTY	XFS_FD_INIT(-1)
diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
index 119d1bda74d..062730a1f06 100644
--- a/m4/package_libcdev.m4
+++ b/m4/package_libcdev.m4
@@ -557,3 +557,23 @@ AC_DEFUN([AC_HAVE_MKOSTEMP_CLOEXEC],
        AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
     AC_SUBST(have_mkostemp_cloexec)
   ])
+
+#
+# Check if we have a FIEXCHANGE_RANGE ioctl (Linux)
+#
+AC_DEFUN([AC_HAVE_FIEXCHANGE],
+  [ AC_MSG_CHECKING([for FIEXCHANGE_RANGE])
+    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#define _GNU_SOURCE
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <linux/fs.h>
+#include <linux/fiexchange.h>
+    ]], [[
+         struct file_xchg_range fxr;
+         ioctl(-1, FIEXCHANGE_RANGE, &fxr);
+    ]])],[have_fiexchange=yes
+       AC_MSG_RESULT(yes)],[AC_MSG_RESULT(no)])
+    AC_SUBST(have_fiexchange)
+  ])




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux