Hi Darrick, Thanks for review, pls check in lines. > On Jul 9, 2024, at 2:18 PM, Darrick J. Wong <djwong@xxxxxxxxxx> wrote: > > On Tue, Jul 09, 2024 at 12:10:20PM -0700, Wengang Wang wrote: >> Content-Type: text/plain; charset=UTF-8 >> Content-Transfer-Encoding: 8bit >> >> Non-exclusive defragment >> Here we are introducing the non-exclusive manner to defragment a file, >> especially for huge files, without blocking IO to it long. >> Non-exclusive defragmentation divides the whole file into small segments. >> For each segment, we lock the file, defragment the segment and unlock the file. >> Defragmenting the small segment doesn’t take long. File IO requests can get >> served between defragmenting segments before blocked long. Also we put >> (user adjustable) idle time between defragmenting two consecutive segments to >> balance the defragmentation and file IOs. >> >> The first patch in the set checks for valid target files >> >> Valid target files to defrag must: >> 1. be accessible for read/write >> 2. be regular files >> 3. be in XFS filesystem >> 4. the containing XFS has reflink enabled. This is not checked >> before starting defragmentation, but error would be reported >> later. >> >> Signed-off-by: Wengang Wang <wen.gang.wang@xxxxxxxxxx> >> --- >> spaceman/Makefile | 2 +- >> spaceman/defrag.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++ >> spaceman/init.c | 1 + >> spaceman/space.h | 1 + >> 4 files changed, 201 insertions(+), 1 deletion(-) >> create mode 100644 spaceman/defrag.c >> >> diff --git a/spaceman/Makefile b/spaceman/Makefile >> index 1f048d54..9c00b20a 100644 >> --- a/spaceman/Makefile >> +++ b/spaceman/Makefile >> @@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs >> >> LTCOMMAND = xfs_spaceman >> HFILES = init.h space.h >> -CFILES = info.c init.c file.c health.c prealloc.c trim.c >> +CFILES = info.c init.c file.c health.c prealloc.c trim.c defrag.c >> LSRCFILES = xfs_info.sh >> >> LLDLIBS = $(LIBXCMD) $(LIBFROG) >> diff --git a/spaceman/defrag.c b/spaceman/defrag.c >> new file mode 100644 >> index 00000000..c9732984 >> --- /dev/null >> +++ b/spaceman/defrag.c >> @@ -0,0 +1,198 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* >> + * Copyright (c) 2024 Oracle. >> + * All Rights Reserved. >> + */ >> + >> +#include "libxfs.h" >> +#include <linux/fiemap.h> >> +#include <linux/fsmap.h> >> +#include "libfrog/fsgeom.h" >> +#include "command.h" >> +#include "init.h" >> +#include "libfrog/paths.h" >> +#include "space.h" >> +#include "input.h" >> + >> +/* defrag segment size limit in units of 512 bytes */ >> +#define MIN_SEGMENT_SIZE_LIMIT 8192 /* 4MiB */ >> +#define DEFAULT_SEGMENT_SIZE_LIMIT 32768 /* 16MiB */ >> +static int g_segment_size_lmt = DEFAULT_SEGMENT_SIZE_LIMIT; >> + >> +/* size of the defrag target file */ >> +static off_t g_defrag_file_size = 0; >> + >> +/* stats for the target file extents before defrag */ >> +struct ext_stats { >> + long nr_ext_total; >> + long nr_ext_unwritten; >> + long nr_ext_shared; >> +}; >> +static struct ext_stats g_ext_stats; >> + >> +/* >> + * check if the target is a valid file to defrag >> + * also store file size >> + * returns: >> + * true for yes and false for no >> + */ >> +static bool >> +defrag_check_file(char *path) >> +{ >> + struct statfs statfs_s; >> + struct stat stat_s; >> + >> + if (access(path, F_OK|W_OK) == -1) { >> + if (errno == ENOENT) >> + fprintf(stderr, "file \"%s\" doesn't exist\n", path); >> + else >> + fprintf(stderr, "no access to \"%s\", %s\n", path, >> + strerror(errno)); >> + return false; >> + } >> + >> + if (stat(path, &stat_s) == -1) { >> + fprintf(stderr, "failed to get file info on \"%s\": %s\n", >> + path, strerror(errno)); >> + return false; >> + } >> + >> + g_defrag_file_size = stat_s.st_size; >> + >> + if (!S_ISREG(stat_s.st_mode)) { >> + fprintf(stderr, "\"%s\" is not a regular file\n", path); >> + return false; >> + } >> + >> + if (statfs(path, &statfs_s) == -1) { > > statfs is deprecated, please use fstatvfs. OK, will move to fstatvfs. > >> + fprintf(stderr, "failed to get FS info on \"%s\": %s\n", >> + path, strerror(errno)); >> + return false; >> + } >> + >> + if (statfs_s.f_type != XFS_SUPER_MAGIC) { >> + fprintf(stderr, "\"%s\" is not a xfs file\n", path); >> + return false; >> + } >> + >> + return true; >> +} >> + >> +/* >> + * defragment a file >> + * return 0 if successfully done, 1 otherwise >> + */ >> +static int >> +defrag_xfs_defrag(char *file_path) { > > defrag_xfs_path() ? OK. > >> + int max_clone_us = 0, max_unshare_us = 0, max_punch_us = 0; >> + long nr_seg_defrag = 0, nr_ext_defrag = 0; >> + int scratch_fd = -1, defrag_fd = -1; >> + char tmp_file_path[PATH_MAX+1]; >> + char *defrag_dir; >> + struct fsxattr fsx; >> + int ret = 0; >> + >> + fsx.fsx_nextents = 0; >> + memset(&g_ext_stats, 0, sizeof(g_ext_stats)); >> + >> + if (!defrag_check_file(file_path)) { >> + ret = 1; >> + goto out; >> + } >> + >> + defrag_fd = open(file_path, O_RDWR); >> + if (defrag_fd == -1) { > > Not sure why you check the path before opening it -- all those file and > statvfs attributes that you collect there can change (or the entire fs > gets unmounted) until you've pinned the fs by opening the file. The idea comes from internal reviews hoping some explicit reasons why Defrag failed. Those reasons include: 1) if user has permission to access the target file. 2) if the species path exist (when moving to spaceman, spaceman takes care of it) 3) if the specified is a regular file 4) if the target file is a XFS file Thing might change after checking and opening, but that’s very rare case and user is responsible for that change rather than this tool. > >> + fprintf(stderr, "Opening %s failed. %s\n", file_path, >> + strerror(errno)); >> + ret = 1; >> + goto out; >> + } >> + >> + defrag_dir = dirname(file_path); >> + snprintf(tmp_file_path, PATH_MAX, "%s/.xfsdefrag_%d", defrag_dir, >> + getpid()); >> + tmp_file_path[PATH_MAX] = 0; >> + scratch_fd = open(tmp_file_path, O_CREAT|O_EXCL|O_RDWR, 0600); > > O_TMPFILE? Then you don't have to do this .xfsdefrag_XXX stuff. > My first first version was using O_TMPFILE. But clone failed somehow (Don’t remember the details). I retried O_TMPFILE, it’s working now. So will move to use O_TMPFILE. >> + if (scratch_fd == -1) { >> + fprintf(stderr, "Opening temporary file %s failed. %s\n", >> + tmp_file_path, strerror(errno)); >> + ret = 1; >> + goto out; >> + } >> +out: >> + if (scratch_fd != -1) { >> + close(scratch_fd); >> + unlink(tmp_file_path); >> + } >> + if (defrag_fd != -1) { >> + ioctl(defrag_fd, FS_IOC_FSGETXATTR, &fsx); >> + close(defrag_fd); >> + } >> + >> + printf("Pre-defrag %ld extents detected, %ld are \"unwritten\"," >> + "%ld are \"shared\"\n", >> + g_ext_stats.nr_ext_total, g_ext_stats.nr_ext_unwritten, >> + g_ext_stats.nr_ext_shared); >> + printf("Tried to defragment %ld extents in %ld segments\n", >> + nr_ext_defrag, nr_seg_defrag); >> + printf("Time stats(ms): max clone: %d, max unshare: %d," >> + " max punch_hole: %d\n", >> + max_clone_us/1000, max_unshare_us/1000, max_punch_us/1000); >> + printf("Post-defrag %u extents detected\n", fsx.fsx_nextents); >> + return ret; >> +} >> + >> + >> +static void defrag_help(void) >> +{ >> + printf(_( >> +"\n" >> +"Defragemnt files on XFS where reflink is enabled. IOs to the target files \n" > > "Defragment" OK. > >> +"can be served durning the defragmentations.\n" >> +"\n" >> +" -s segment_size -- specify the segment size in MiB, minmum value is 4 \n" >> +" default is 16\n")); >> +} >> + >> +static cmdinfo_t defrag_cmd; >> + >> +static int >> +defrag_f(int argc, char **argv) >> +{ >> + int i; >> + int c; >> + >> + while ((c = getopt(argc, argv, "s:")) != EOF) { >> + switch(c) { >> + case 's': >> + g_segment_size_lmt = atoi(optarg) * 1024 * 1024 / 512; >> + if (g_segment_size_lmt < MIN_SEGMENT_SIZE_LIMIT) { >> + g_segment_size_lmt = MIN_SEGMENT_SIZE_LIMIT; >> + printf("Using minimium segment size %d\n", >> + g_segment_size_lmt); >> + } >> + break; >> + default: >> + command_usage(&defrag_cmd); >> + return 1; >> + } >> + } >> + >> + for (i = 0; i < filecount; i++) >> + defrag_xfs_defrag(filetable[i].name); > > Pass in the whole filetable[i] and then you've already got an open fd > and some validation that it's an xfs filesystem. Good to know. > >> + return 0; >> +} >> +void defrag_init(void) >> +{ >> + defrag_cmd.name = "defrag"; >> + defrag_cmd.altname = "dfg"; >> + defrag_cmd.cfunc = defrag_f; >> + defrag_cmd.argmin = 0; >> + defrag_cmd.argmax = 4; >> + defrag_cmd.args = "[-s segment_size]"; >> + defrag_cmd.flags = CMD_FLAG_ONESHOT; > > IIRC if you don't set CMD_FLAG_FOREIGN_OK then the command processor > won't let this command get run against a non-xfs file. > OK. Thanks, Winging > --D > >> + defrag_cmd.oneline = _("Defragment XFS files"); >> + defrag_cmd.help = defrag_help; >> + >> + add_command(&defrag_cmd); >> +} >> diff --git a/spaceman/init.c b/spaceman/init.c >> index cf1ff3cb..396f965c 100644 >> --- a/spaceman/init.c >> +++ b/spaceman/init.c >> @@ -35,6 +35,7 @@ init_commands(void) >> trim_init(); >> freesp_init(); >> health_init(); >> + defrag_init(); >> } >> >> static int >> diff --git a/spaceman/space.h b/spaceman/space.h >> index 723209ed..c288aeb9 100644 >> --- a/spaceman/space.h >> +++ b/spaceman/space.h >> @@ -26,6 +26,7 @@ extern void help_init(void); >> extern void prealloc_init(void); >> extern void quit_init(void); >> extern void trim_init(void); >> +extern void defrag_init(void); >> #ifdef HAVE_GETFSMAP >> extern void freesp_init(void); >> #else >> -- >> 2.39.3 (Apple Git-146)