[PATCH 1/2] xfsprogs: introduce defrag command to spaceman

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Non-exclusive defragment
Here we are introducing the non-exclusive manner to defragment a file,
especially for huge files, without blocking IO to it long.
Non-exclusive defragmentation divides the whole file into small segments.
For each segment, we lock the file, defragment the segment and unlock the file.
Defragmenting the small segment doesn’t take long. File IO requests can get
served between defragmenting segments before blocked long.  Also we put
(user adjustable) idle time between defragmenting two consecutive segments to
balance the defragmentation and file IOs.

Signed-off-by: Wengang Wang <wen.gang.wang@xxxxxxxxxx>
---
 spaceman/Makefile |   2 +-
 spaceman/defrag.c | 394 ++++++++++++++++++++++++++++++++++++++++++++++
 spaceman/init.c   |   1 +
 spaceman/space.h  |   1 +
 4 files changed, 397 insertions(+), 1 deletion(-)
 create mode 100644 spaceman/defrag.c

diff --git a/spaceman/Makefile b/spaceman/Makefile
index 1f048d54..9c00b20a 100644
--- a/spaceman/Makefile
+++ b/spaceman/Makefile
@@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs
 
 LTCOMMAND = xfs_spaceman
 HFILES = init.h space.h
-CFILES = info.c init.c file.c health.c prealloc.c trim.c
+CFILES = info.c init.c file.c health.c prealloc.c trim.c defrag.c
 LSRCFILES = xfs_info.sh
 
 LLDLIBS = $(LIBXCMD) $(LIBFROG)
diff --git a/spaceman/defrag.c b/spaceman/defrag.c
new file mode 100644
index 00000000..fdc9b108
--- /dev/null
+++ b/spaceman/defrag.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2024 Oracle.
+ * All Rights Reserved.
+ */
+
+#include "libxfs.h"
+#include <linux/fiemap.h>
+#include <linux/fsmap.h>
+#include "libfrog/fsgeom.h"
+#include "command.h"
+#include "init.h"
+#include "libfrog/paths.h"
+#include "space.h"
+#include "input.h"
+
+#define MAPSIZE 512
+
+/* defrag segment size in units of 512 bytes */
+#define PIECE_SIZE 32768 /* 16MiB */
+#define TARGET_EXT_SIZE  (PIECE_SIZE/2)
+
+/*
+ * the defrag segment
+ * it includes some contiguous extents.
+ * no holes included,
+ * no unwritten extents included
+ * the size is limited by PIECE_SIZE, but can exceed that a bit.
+ */
+struct defrag_segment {
+	long long	ds_offset;	/* segment offset in units of 512 bytes */
+	long long	ds_length;	/* length of segment in units of 512 bytes */
+	int		ds_nr;		/* number of extents in this segment */
+};
+
+/* used to fetch bmap */
+static struct getbmapx	g_mapx[MAPSIZE];
+/* current offset of the file in units of 512 bytes, used to fetch bmap */
+static long long 	g_offset = 0;
+/* index to indentify next extent, use to get next extent */
+static int		g_next_idx = -1;
+/* sleep time in us between segment, overwritten by paramter */
+static useconds_t	g_idle_time = 0;
+/*
+ * numnber of extents. only the segments that contain more extents than this
+ * are defrag targets. overwritten by paramter
+ */
+static int		g_nr_ext = 1;
+
+/*
+ * get next extent in the file.
+ * Note: next call will get the same extent unless move_next_extent() is called.
+ * returns:
+ * -1:	error happened.
+ * 0:	extent returned
+ * 1:	no more extent left
+ */
+static int
+defrag_get_next_extent(int fd, struct getbmapx *map_out)
+{
+	int err = 0;
+
+	/* don't have extent cached in g_mapx, fetch from kernel */
+	if (g_next_idx == -1) {
+		g_mapx[0].bmv_offset = g_offset;
+		g_mapx[0].bmv_length = -1LL;
+		g_mapx[0].bmv_count = MAPSIZE;
+		g_mapx[0].bmv_iflags = BMV_IF_NO_HOLES | BMV_IF_PREALLOC;
+		err = ioctl(fd, XFS_IOC_GETBMAPX, g_mapx);
+		if (err == -1) {
+			perror("XFS_IOC_GETBMAPX failed");
+			goto out;
+		}
+		if (g_mapx[0].bmv_entries == 0) {
+			err = 1;
+			goto out;
+		}
+
+		g_next_idx = 1;
+		g_offset = g_mapx[g_mapx[0].bmv_entries].bmv_offset +
+				g_mapx[g_mapx[0].bmv_entries].bmv_length;
+	}
+	map_out->bmv_offset = g_mapx[g_next_idx].bmv_offset;
+	map_out->bmv_length = g_mapx[g_next_idx].bmv_length;
+	map_out->bmv_oflags = g_mapx[g_next_idx].bmv_oflags;
+out:
+	return err;
+}
+
+/*
+ * move to next extent
+ */
+static void
+defrag_move_next_extent()
+{
+	if (g_next_idx == g_mapx[0].bmv_entries)
+		g_next_idx = -1;
+	else
+		g_next_idx += 1;
+}
+
+/*
+ * check if the given extent is a defrag target.
+ * no need to check for holes as we are using BMV_IF_NO_HOLES
+ */
+static bool
+defrag_is_target(struct getbmapx *mapx)
+{
+	if (mapx->bmv_oflags & BMV_OF_PREALLOC)
+		return false;
+	return mapx->bmv_length < TARGET_EXT_SIZE;
+}
+
+/*
+ * get next segment to defragment.
+ * returns:
+ * -1	error happened.
+ * 0	segment returned.
+ * 1	no more segments to return
+ */
+static int
+defrag_get_next_segment(int fd, struct defrag_segment *out)
+{
+	struct getbmapx mapx;
+	int	ret;
+
+	out->ds_offset = 0;
+	out->ds_length = 0;
+	out->ds_nr = 0;
+
+	do {
+		ret = defrag_get_next_extent(fd, &mapx);
+		if (ret != 0) {
+			/* return current segment if its not empty */
+			if (ret == 1 && out->ds_nr > 0)
+				ret = 0;
+			break;
+		}
+
+		/*
+		 * If the extent is not a defrag target, skip it.
+		 * go to next extent if the segment is empty;
+		 * otherwise return the segment.
+		 */
+		if (!defrag_is_target(&mapx)) {
+			defrag_move_next_extent();
+			if (out->ds_nr == 0)
+				continue;
+			else
+				break;
+		}
+
+		/* the extent is the first in this segment */
+		if (out->ds_nr == 0) {
+			out->ds_offset = mapx.bmv_offset;
+			out->ds_length = mapx.bmv_length;
+			out->ds_nr = 1;
+			defrag_move_next_extent();
+			continue;
+		}
+
+		/*
+		 * now the extent is not the first one, check for hole.
+		 * if there is hole before this extent, return current segment.
+		 */
+		if (out->ds_offset + out->ds_length != mapx.bmv_offset)
+			break;
+
+		out->ds_length += mapx.bmv_length;
+		out->ds_nr += 1;
+		defrag_move_next_extent();
+	} while (out->ds_length < PIECE_SIZE);
+
+	return ret;
+}
+
+/*
+ * check if the target is a xfs file
+ * returns:
+ * ture -- yes
+ * false -- no
+ */
+static bool
+defrag_check_file(char *path)
+{
+	struct stat stat_s;
+	struct statfs statfs_s;
+
+	if (access(path, F_OK|W_OK) == -1) {
+		if (errno == ENOENT)
+			fprintf(stderr, "file \"%s\" doesn't exist\n", path);
+		else
+			fprintf(stderr, "no access to \"%s\", %s\n", path, strerror(errno));
+		return false;
+	}
+
+	if (stat(path, &stat_s) == -1) {
+		fprintf(stderr, "failed to get file info on \"%s\":  %s errno=%d\n",
+			path, strerror(errno), errno);
+		return false;
+	}
+
+	if (!S_ISREG(stat_s.st_mode)) {
+		fprintf(stderr, "\"%s\" is not a regular file\n", path);
+		return false;
+	}
+
+	if (statfs(path, &statfs_s) == -1) {
+		fprintf(stderr, "failed to get FS info on \"%s\":  %s errno=%d\n",
+			path, strerror(errno), errno);
+		return false;
+	}
+
+	if (statfs_s.f_type != XFS_SUPER_MAGIC) {
+		fprintf(stderr, "\"%s\" is not a xfs file\n", path);
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * defragment a file
+ * return 0 if successfully done, 1 otherwise.
+ * params:
+ * file_path:	the file path to be defragmented
+ */
+static int
+defrag_xfs_defrag(char *file_path)
+{
+	long	nr_seg_defrag = 0, nr_ext_defrag = 0;
+	int	scratch_fd = -1, defrag_fd = -1;
+	char	*defrag_dir;
+	char	tmp_file_path[PATH_MAX+1];
+	int	ret = 0;
+
+	if (!defrag_check_file(file_path)) {
+		ret = 1;
+		goto out;
+	}
+
+	defrag_fd = open(file_path, O_RDWR);
+	if (defrag_fd == -1) {
+		fprintf(stderr, "Opening %s failed. %s\n", file_path, strerror(errno));
+		ret = -1;
+		goto out;
+	}
+
+	defrag_dir = dirname(file_path);
+	snprintf(tmp_file_path, PATH_MAX, "%s/.xfsdf_%d", defrag_dir, getpid());
+	tmp_file_path[PATH_MAX] = 0;
+	scratch_fd = open(tmp_file_path, O_CREAT|O_EXCL|O_RDWR, 0666);
+	if (scratch_fd == -1) {
+		fprintf(stderr, "Opening temporary file %s failed. %s\n",
+			tmp_file_path, strerror(errno));
+		ret = -1;
+		goto out;
+	}
+
+	do {
+		struct defrag_segment segment;
+		struct file_clone_range	clone;
+		long long seg_size, seg_off;
+
+		ret = defrag_get_next_segment(defrag_fd, &segment);
+		/* error happened or no more segments */
+		if (ret != 0)
+			break;
+		/* skip this segment if it contains less extents than specified */
+		if (segment.ds_nr <= g_nr_ext)
+			continue;
+
+		/* to bytes */
+		seg_off = segment.ds_offset * 512;
+		seg_size = segment.ds_length * 512;
+
+		clone.src_fd = defrag_fd;
+		clone.src_offset = seg_off;
+		clone.src_length = seg_size;
+		clone.dest_offset = seg_off;
+
+		nr_seg_defrag += 1;
+		nr_ext_defrag += segment.ds_nr;
+		ret = ioctl(scratch_fd, FICLONERANGE, &clone);
+		if (ret != 0) {
+			fprintf(stderr, "FICLONERANGE failed %s, errno=%d\n",
+				strerror(errno), errno);
+			goto out;
+		}
+
+		/*
+		 * For the shared range to be unshared via a copy-on-write
+		 * operation in the file to be defragged. This causes the
+		 * file needing to be defragged to have new extents allocated
+		 * and the data to be copied over and written out.
+		 */
+		ret = fallocate(defrag_fd, FALLOC_FL_UNSHARE_RANGE, seg_off, seg_size);
+		if (ret != 0) {
+			fprintf(stderr, "UNSHARE_RANGE failed %s, errno=%d\n",
+				strerror(errno), errno);
+			goto out;
+		}
+
+		ret = fdatasync(defrag_fd);
+		if (ret != 0) {
+			fprintf(stderr, "fdatasync failed %s, errno=%d\n",
+				strerror(errno), errno);
+			goto out;
+		}
+
+		/*
+		 * Punch out the original extents we shared to the
+		 * scratch file so they are returned to free space.
+		 */
+		ret = fallocate(scratch_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, seg_off, seg_size);
+		if (ret != 0) {
+			fprintf(stderr, "PUNCH_HOLE failed %s, errno=%d\n",
+				strerror(errno), errno);
+			goto out;
+		}
+		if (g_idle_time)
+			usleep(g_idle_time);
+	} while (true);
+out:
+	if (scratch_fd != -1) {
+		close(scratch_fd);
+		unlink(tmp_file_path);
+	}
+	if (defrag_fd != -1)
+		close(defrag_fd);
+	if (ret == 1)
+		ret = 0;
+
+	if (ret == 0)
+		printf("Defragmented %ld segments, %ld extents\n", nr_seg_defrag, nr_ext_defrag);
+	else
+		ret = 1;
+	return ret;
+}
+
+static cmdinfo_t defrag_cmd;
+
+static int
+defrag_f(int argc, char **argv)
+{
+	int	i;
+	int	c;
+
+	while ((c = getopt(argc, argv, "i:e:")) != EOF) {
+		switch(c) {
+		case 'i':
+			g_idle_time = atoi(optarg) * 1000;
+			break;
+		case 'e':
+			g_nr_ext = atoi(optarg);
+			break;
+		default:
+			printf("c is %c\n", c);
+			command_usage(&defrag_cmd);
+			return 1;
+		}
+	}
+
+	for (i = 0; i < filecount; i++)
+		defrag_xfs_defrag(filetable[i].name);
+	return 0;
+}
+
+static void defrag_help(void)
+{
+	printf(_(
+"\n"
+"Defragemnt file\n"
+"\n"
+" -i interval	-- sleep _interval_ ms between dedfragmenting segments.\n"
+"                  0 by default\n"
+" -e extnr	-- only segments with more than _extnr_ are defragment\n"
+"                  targets. 1 by default\n"));
+}
+
+void defrag_init(void)
+{
+	defrag_cmd.name		= "defrag";
+	defrag_cmd.altname	= "dfg";
+	defrag_cmd.cfunc	= defrag_f;
+	defrag_cmd.argmin	= 0;
+	defrag_cmd.argmax	= 4;
+	defrag_cmd.args		= "[-i interval] [-e extnr]";
+	defrag_cmd.flags	= CMD_FLAG_ONESHOT;
+	defrag_cmd.oneline	= _("Defragment file");
+	defrag_cmd.help		= defrag_help;
+
+	add_command(&defrag_cmd);
+}
diff --git a/spaceman/init.c b/spaceman/init.c
index cf1ff3cb..396f965c 100644
--- a/spaceman/init.c
+++ b/spaceman/init.c
@@ -35,6 +35,7 @@ init_commands(void)
 	trim_init();
 	freesp_init();
 	health_init();
+	defrag_init();
 }
 
 static int
diff --git a/spaceman/space.h b/spaceman/space.h
index 723209ed..c288aeb9 100644
--- a/spaceman/space.h
+++ b/spaceman/space.h
@@ -26,6 +26,7 @@ extern void	help_init(void);
 extern void	prealloc_init(void);
 extern void	quit_init(void);
 extern void	trim_init(void);
+extern void	defrag_init(void);
 #ifdef HAVE_GETFSMAP
 extern void	freesp_init(void);
 #else
-- 
2.39.3 (Apple Git-145)





[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux