[PATCH v2 03/51] xfs_io: support reflink and dedupe of file ranges

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Wire up xfs_io to use the XFS clone-range ioctl to make files share
data blocks; or the XFS extent-same ioctl to deduplicate file blocks.

v2: Send along the operation description to the io time reporting function

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 io/Makefile       |    2 
 io/init.c         |    1 
 io/io.h           |    2 
 io/reflink.c      |  325 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 man/man8/xfs_io.8 |   59 ++++++++++
 5 files changed, 388 insertions(+), 1 deletion(-)
 create mode 100644 io/reflink.c

diff --git a/io/Makefile b/io/Makefile
index a08a782..513f8c9 100644
--- a/io/Makefile
+++ b/io/Makefile
@@ -11,7 +11,7 @@ HFILES = init.h io.h
 CFILES = init.c \
 	attr.c bmap.c file.c freeze.c fsync.c getrusage.c imap.c link.c \
 	mmap.c open.c parent.c pread.c prealloc.c pwrite.c seek.c shutdown.c \
-	sync.c truncate.c
+	sync.c truncate.c reflink.c
 
 LLDLIBS = $(LIBXCMD) $(LIBHANDLE)
 LTDEPENDENCIES = $(LIBXCMD) $(LIBHANDLE)
diff --git a/io/init.c b/io/init.c
index 13f35c4..51f1f5c 100644
--- a/io/init.c
+++ b/io/init.c
@@ -83,6 +83,7 @@ init_commands(void)
 	sync_init();
 	sync_range_init();
 	truncate_init();
+	reflink_init();
 }
 
 static int
diff --git a/io/io.h b/io/io.h
index b115e4a..172b1f8 100644
--- a/io/io.h
+++ b/io/io.h
@@ -161,3 +161,5 @@ extern void		readdir_init(void);
 #else
 #define readdir_init()		do { } while (0)
 #endif
+
+extern void		reflink_init(void);
diff --git a/io/reflink.c b/io/reflink.c
new file mode 100644
index 0000000..5ba1c93
--- /dev/null
+++ b/io/reflink.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2015 Oracle, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <sys/uio.h>
+#include <xfs/xfs.h>
+#include "command.h"
+#include "input.h"
+#include "init.h"
+#include "io.h"
+
+static cmdinfo_t dedupe_cmd;
+static cmdinfo_t reflink_cmd;
+
+static void
+dedupe_help(void)
+{
+	printf(_("\n\
+ Links a range of bytes (in block size increments) from a file into a range\n\
+ of bytes in the open file.  The contents of both file ranges must match.\n\
+\n\
+ Example:\n\
+ 'dedupe some_file 0 4096 32768' - links 32768 bytes from some_file at\n\
+                                    offset 0 to into the open file at\n\
+                                    position 4096\n\
+\n\
+ Reflink a range of blocks from a given input file to the open file.  Both\n\
+ files share the same range of physical disk blocks; a write to the shared\n\
+ range of either file should result in the write landing in a new block and\n\
+ that range of the file being remapped (i.e. copy-on-write).  Both files\n\
+ must reside on the same filesystem, and the contents of both ranges must\n\
+ match.\n\
+"));
+}
+
+static uint64_t
+dedupe_ioctl(
+	int		fd,
+	uint64_t	soffset,
+	uint64_t	doffset,
+	uint64_t	len,
+	int		*ops)
+{
+	struct xfs_extent_data		*args;
+	struct xfs_extent_data_info	*info;
+	int				error;
+	uint64_t			deduped = 0;
+
+	args = calloc(1, sizeof(struct xfs_extent_data) +
+			 sizeof(struct xfs_extent_data_info));
+	if (!args)
+		goto done;
+	info = (struct xfs_extent_data_info *)(args + 1);
+	args->logical_offset = soffset;
+	args->length = len;
+	args->dest_count = 1;
+	info->fd = file->fd;
+	info->logical_offset = doffset;
+
+	while (args->length > 0) {
+		error = ioctl(fd, XFS_IOC_FILE_EXTENT_SAME, args);
+		if (error) {
+			perror("XFS_IOC_FILE_EXTENT_SAME");
+			goto done;
+		}
+		if (info->status < 0) {
+			printf("dedupe: %s\n", _(strerror(-info->status)));
+			goto done;
+		}
+		if (info->status == XFS_EXTENT_DATA_DIFFERS) {
+			printf(_("Extents did not match.\n"));
+			goto done;
+		}
+		if (info->bytes_deduped == 0 ||
+		    info->bytes_deduped > args->length)
+			break;
+
+		(*ops)++;
+		args->logical_offset += info->bytes_deduped;
+		info->logical_offset += info->bytes_deduped;
+		args->length -= info->bytes_deduped;
+		deduped += info->bytes_deduped;
+	}
+done:
+	free(args);
+	return deduped;
+}
+
+static int
+dedupe_f(
+	int		argc,
+	char		**argv)
+{
+	off64_t		soffset, doffset;
+	long long	count, total;
+	char		*infile;
+	int		condensed, quiet_flag;
+	size_t		fsblocksize, fssectsize;
+	struct timeval	t1, t2;
+	int		c, ops = 0, fd = -1;
+
+	condensed = quiet_flag = 0;
+	init_cvtnum(&fsblocksize, &fssectsize);
+
+	while ((c = getopt(argc, argv, "Cq")) != EOF) {
+		switch (c) {
+		case 'C':
+			condensed = 1;
+			break;
+		case 'q':
+			quiet_flag = 1;
+			break;
+		default:
+			return command_usage(&dedupe_cmd);
+		}
+	}
+	if (optind != argc - 4)
+		return command_usage(&dedupe_cmd);
+	infile = argv[optind];
+	optind++;
+	soffset = cvtnum(fsblocksize, fssectsize, argv[optind]);
+	if (soffset < 0) {
+		printf(_("non-numeric src offset argument -- %s\n"), argv[optind]);
+		return 0;
+	}
+	optind++;
+	doffset = cvtnum(fsblocksize, fssectsize, argv[optind]);
+	if (doffset < 0) {
+		printf(_("non-numeric dest offset argument -- %s\n"), argv[optind]);
+		return 0;
+	}
+	optind++;
+	count = cvtnum(fsblocksize, fssectsize, argv[optind]);
+	if (count < 1) {
+		printf(_("non-positive length argument -- %s\n"), argv[optind]);
+		return 0;
+	}
+
+	fd = openfile(infile, NULL, IO_READONLY, 0);
+	if (fd < 0)
+		return 0;
+
+	gettimeofday(&t1, NULL);
+	total = dedupe_ioctl(fd, soffset, doffset, count, &ops);
+	if (ops == 0 || quiet_flag)
+		goto done;
+	gettimeofday(&t2, NULL);
+	t2 = tsub(t2, t1);
+
+	report_io_times("deduped", &t2, (long long)doffset, count, total, ops,
+			condensed);
+done:
+	close(fd);
+	return 0;
+}
+
+static void
+reflink_help(void)
+{
+	printf(_("\n\
+ Links a range of bytes (in block size increments) from a file into a range\n\
+ of bytes in the open file.  The two extent ranges need not contain identical\n\
+ data.\n\
+\n\
+ Example:\n\
+ 'reflink some_file 0 4096 32768' - links 32768 bytes from some_file at\n\
+                                    offset 0 to into the open file at\n\
+                                    position 4096\n\
+ 'reflink some_file' - links all bytes from some_file into the open file\n\
+                       at position 0\n\
+\n\
+ Reflink a range of blocks from a given input file to the open file.  Both\n\
+ files share the same range of physical disk blocks; a write to the shared\n\
+ range of either file should result in the write landing in a new block and\n\
+ that range of the file being remapped (i.e. copy-on-write).  Both files\n\
+ must reside on the same filesystem.\n\
+"));
+}
+
+static uint64_t
+reflink_ioctl(
+	int			fd,
+	uint64_t		soffset,
+	uint64_t		doffset,
+	uint64_t		len,
+	int			*ops)
+{
+	struct xfs_clone_args	args;
+	int			error;
+
+	if (len) {
+		args.src_fd = fd;
+		args.src_offset = soffset;
+		args.src_length = len;
+		args.dest_offset = doffset;
+		error = ioctl(file->fd, XFS_IOC_CLONE_RANGE, &args);
+		if (error)
+			perror("XFS_IOC_CLONE_RANGE");
+	} else {
+		error = ioctl(file->fd, XFS_IOC_CLONE, fd);
+		if (error)
+			perror("XFS_IOC_CLONE");
+	}
+	if (!error)
+		(*ops)++;
+	return error ? 0 : len;
+}
+
+static int
+reflink_f(
+	int		argc,
+	char		**argv)
+{
+	off64_t		soffset, doffset;
+	long long	count = 0, total;
+	char		*infile = NULL;
+	int		condensed, quiet_flag;
+	size_t		fsblocksize, fssectsize;
+	struct timeval	t1, t2;
+	int		c, ops = 0, fd = -1;
+
+	condensed = quiet_flag = 0;
+	doffset = soffset = 0;
+	init_cvtnum(&fsblocksize, &fssectsize);
+
+	while ((c = getopt(argc, argv, "Cq")) != EOF) {
+		switch (c) {
+		case 'C':
+			condensed = 1;
+			break;
+		case 'q':
+			quiet_flag = 1;
+			break;
+		default:
+			return command_usage(&reflink_cmd);
+		}
+	}
+	if (optind != argc - 4 && optind != argc - 1)
+		return command_usage(&reflink_cmd);
+	infile = argv[optind];
+	optind++;
+	if (optind == argc)
+		goto clone_all;
+	soffset = cvtnum(fsblocksize, fssectsize, argv[optind]);
+	if (soffset < 0) {
+		printf(_("non-numeric src offset argument -- %s\n"), argv[optind]);
+		return 0;
+	}
+	optind++;
+	doffset = cvtnum(fsblocksize, fssectsize, argv[optind]);
+	if (doffset < 0) {
+		printf(_("non-numeric dest offset argument -- %s\n"), argv[optind]);
+		return 0;
+	}
+	optind++;
+	count = cvtnum(fsblocksize, fssectsize, argv[optind]);
+	if (count < 1) {
+		printf(_("non-positive length argument -- %s\n"), argv[optind]);
+		return 0;
+	}
+
+clone_all:
+	fd = openfile(infile, NULL, IO_READONLY, 0);
+	if (fd < 0)
+		return 0;
+
+	gettimeofday(&t1, NULL);
+	total = reflink_ioctl(fd, soffset, doffset, count, &ops);
+	if (ops == 0 || quiet_flag)
+		goto done;
+	gettimeofday(&t2, NULL);
+	t2 = tsub(t2, t1);
+
+	report_io_times("linked", &t2, (long long)doffset, count, total, ops,
+			condensed);
+done:
+	close(fd);
+	return 0;
+}
+
+void
+reflink_init(void)
+{
+	reflink_cmd.name = "reflink";
+	reflink_cmd.altname = "rl";
+	reflink_cmd.cfunc = reflink_f;
+	reflink_cmd.argmin = 4;
+	reflink_cmd.argmax = -1;
+	reflink_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK;
+	reflink_cmd.args =
+_("infile src_off dst_off len");
+	reflink_cmd.oneline =
+		_("reflinks a number of bytes at a specified offset");
+	reflink_cmd.help = reflink_help;
+
+	add_command(&reflink_cmd);
+
+	dedupe_cmd.name = "dedupe";
+	dedupe_cmd.altname = "dd";
+	dedupe_cmd.cfunc = dedupe_f;
+	dedupe_cmd.argmin = 4;
+	dedupe_cmd.argmax = -1;
+	dedupe_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK;
+	dedupe_cmd.args =
+_("infile src_off dst_off len");
+	dedupe_cmd.oneline =
+		_("dedupes a number of bytes at a specified offset");
+	dedupe_cmd.help = dedupe_help;
+
+	add_command(&dedupe_cmd);
+}
diff --git a/man/man8/xfs_io.8 b/man/man8/xfs_io.8
index 416206f..e0a901f 100644
--- a/man/man8/xfs_io.8
+++ b/man/man8/xfs_io.8
@@ -490,6 +490,65 @@ Recursively display all the specified segments starting at the specified
 .B \-s
 Display the starting lseek(2) offset. This offset will be a calculated value when
 both data and holes are displayed together or performing a recusively display.
+.RE
+.PD
+.TP
+.TP
+.BI "reflink  [ \-C ] [ \-q ] src_file [src_offset dst_offset length]"
+On filesystems that support the
+.B XFS_IOC_CLONE_RANGE
+or
+.B BTRFS_IOC_CLONE_RANGE
+ioctls, map
+.I length
+bytes at offset
+.I dst_offset
+in the open file to the same physical blocks that are mapped at offset
+.I src_offset
+in the file
+.I src_file
+, replacing any contents that may already have been there.  If a program
+writes into a reflinked block range of either file, the dirty blocks will be
+cloned, written to, and remapped ("copy on write") in the affected file,
+leaving the other file(s) unchanged.  If src_offset, dst_offset, and length
+are omitted, all contents of src_file will be reflinked into the open file.
+.RS 1.0i
+.PD 0
+.TP 0.4i
+.B \-C
+Print timing statistics in a condensed format.
+.TP
+.B \-q
+Do not print timing statistics at all.
+.RE
+.PD
+.TP
+.TP
+.BI "dedupe  [ \-C ] [ \-q ] src_file src_offset dst_offset length"
+On filesystems that support the
+.B XFS_IOC_FILE_EXTENT_SAME
+or
+.B BTRFS_IOC_FILE_EXTENT_SAME
+ioctls, map
+.I length
+bytes at offset
+.I dst_offset
+in the open file to the same physical blocks that are mapped at offset
+.I src_offset
+in the file
+.I src_file
+, but only if the contents of both ranges are identical.  This is known as
+block-based deduplication.  If a program writes into a reflinked block range of
+either file, the dirty blocks will be cloned, written to, and remapped ("copy
+on write") in the affected file, leaving the other file(s) unchanged.
+.RS 1.0i
+.PD 0
+.TP 0.4i
+.B \-C
+Print timing statistics in a condensed format.
+.TP
+.B \-q
+Do not print timing statistics at all.
 .TP
 
 .SH MEMORY MAPPED I/O COMMANDS

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux