> On Jul 11, 2024, at 2:54 PM, Wengang Wang <wen.gang.wang@xxxxxxxxxx> wrote: > > Hi Darrick, > Thanks for review, pls check in lines. > >> On Jul 9, 2024, at 2:18 PM, Darrick J. Wong <djwong@xxxxxxxxxx> wrote: >> >> On Tue, Jul 09, 2024 at 12:10:20PM -0700, Wengang Wang wrote: >>> Content-Type: text/plain; charset=UTF-8 >>> Content-Transfer-Encoding: 8bit >>> >>> Non-exclusive defragment >>> Here we are introducing the non-exclusive manner to defragment a file, >>> especially for huge files, without blocking IO to it long. >>> Non-exclusive defragmentation divides the whole file into small segments. >>> For each segment, we lock the file, defragment the segment and unlock the file. >>> Defragmenting the small segment doesn’t take long. File IO requests can get >>> served between defragmenting segments before blocked long. Also we put >>> (user adjustable) idle time between defragmenting two consecutive segments to >>> balance the defragmentation and file IOs. >>> >>> The first patch in the set checks for valid target files >>> >>> Valid target files to defrag must: >>> 1. be accessible for read/write >>> 2. be regular files >>> 3. be in XFS filesystem >>> 4. the containing XFS has reflink enabled. This is not checked >>> before starting defragmentation, but error would be reported >>> later. >>> >>> Signed-off-by: Wengang Wang <wen.gang.wang@xxxxxxxxxx> >>> --- >>> spaceman/Makefile | 2 +- >>> spaceman/defrag.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++ >>> spaceman/init.c | 1 + >>> spaceman/space.h | 1 + >>> 4 files changed, 201 insertions(+), 1 deletion(-) >>> create mode 100644 spaceman/defrag.c >>> >>> diff --git a/spaceman/Makefile b/spaceman/Makefile >>> index 1f048d54..9c00b20a 100644 >>> --- a/spaceman/Makefile >>> +++ b/spaceman/Makefile >>> @@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs >>> >>> LTCOMMAND = xfs_spaceman >>> HFILES = init.h space.h >>> -CFILES = info.c init.c file.c health.c prealloc.c trim.c >>> +CFILES = info.c init.c file.c health.c prealloc.c trim.c defrag.c >>> LSRCFILES = xfs_info.sh >>> >>> LLDLIBS = $(LIBXCMD) $(LIBFROG) >>> diff --git a/spaceman/defrag.c b/spaceman/defrag.c >>> new file mode 100644 >>> index 00000000..c9732984 >>> --- /dev/null >>> +++ b/spaceman/defrag.c >>> @@ -0,0 +1,198 @@ >>> +// SPDX-License-Identifier: GPL-2.0 >>> +/* >>> + * Copyright (c) 2024 Oracle. >>> + * All Rights Reserved. >>> + */ >>> + >>> +#include "libxfs.h" >>> +#include <linux/fiemap.h> >>> +#include <linux/fsmap.h> >>> +#include "libfrog/fsgeom.h" >>> +#include "command.h" >>> +#include "init.h" >>> +#include "libfrog/paths.h" >>> +#include "space.h" >>> +#include "input.h" >>> + >>> +/* defrag segment size limit in units of 512 bytes */ >>> +#define MIN_SEGMENT_SIZE_LIMIT 8192 /* 4MiB */ >>> +#define DEFAULT_SEGMENT_SIZE_LIMIT 32768 /* 16MiB */ >>> +static int g_segment_size_lmt = DEFAULT_SEGMENT_SIZE_LIMIT; >>> + >>> +/* size of the defrag target file */ >>> +static off_t g_defrag_file_size = 0; >>> + >>> +/* stats for the target file extents before defrag */ >>> +struct ext_stats { >>> + long nr_ext_total; >>> + long nr_ext_unwritten; >>> + long nr_ext_shared; >>> +}; >>> +static struct ext_stats g_ext_stats; >>> + >>> +/* >>> + * check if the target is a valid file to defrag >>> + * also store file size >>> + * returns: >>> + * true for yes and false for no >>> + */ >>> +static bool >>> +defrag_check_file(char *path) >>> +{ >>> + struct statfs statfs_s; >>> + struct stat stat_s; >>> + >>> + if (access(path, F_OK|W_OK) == -1) { >>> + if (errno == ENOENT) >>> + fprintf(stderr, "file \"%s\" doesn't exist\n", path); >>> + else >>> + fprintf(stderr, "no access to \"%s\", %s\n", path, >>> + strerror(errno)); >>> + return false; >>> + } >>> + >>> + if (stat(path, &stat_s) == -1) { >>> + fprintf(stderr, "failed to get file info on \"%s\": %s\n", >>> + path, strerror(errno)); >>> + return false; >>> + } >>> + >>> + g_defrag_file_size = stat_s.st_size; >>> + >>> + if (!S_ISREG(stat_s.st_mode)) { >>> + fprintf(stderr, "\"%s\" is not a regular file\n", path); >>> + return false; >>> + } >>> + >>> + if (statfs(path, &statfs_s) == -1) { >> >> statfs is deprecated, please use fstatvfs. > > OK, will move to fstatvfs. > >> >>> + fprintf(stderr, "failed to get FS info on \"%s\": %s\n", >>> + path, strerror(errno)); >>> + return false; >>> + } >>> + >>> + if (statfs_s.f_type != XFS_SUPER_MAGIC) { >>> + fprintf(stderr, "\"%s\" is not a xfs file\n", path); >>> + return false; >>> + } >>> + >>> + return true; >>> +} >>> + >>> +/* >>> + * defragment a file >>> + * return 0 if successfully done, 1 otherwise >>> + */ >>> +static int >>> +defrag_xfs_defrag(char *file_path) { >> >> defrag_xfs_path() ? > > OK. >> >>> + int max_clone_us = 0, max_unshare_us = 0, max_punch_us = 0; >>> + long nr_seg_defrag = 0, nr_ext_defrag = 0; >>> + int scratch_fd = -1, defrag_fd = -1; >>> + char tmp_file_path[PATH_MAX+1]; >>> + char *defrag_dir; >>> + struct fsxattr fsx; >>> + int ret = 0; >>> + >>> + fsx.fsx_nextents = 0; >>> + memset(&g_ext_stats, 0, sizeof(g_ext_stats)); >>> + >>> + if (!defrag_check_file(file_path)) { >>> + ret = 1; >>> + goto out; >>> + } >>> + >>> + defrag_fd = open(file_path, O_RDWR); >>> + if (defrag_fd == -1) { >> >> Not sure why you check the path before opening it -- all those file and >> statvfs attributes that you collect there can change (or the entire fs >> gets unmounted) until you've pinned the fs by opening the file. > > The idea comes from internal reviews hoping some explicit reasons why > Defrag failed. Those reasons include: > 1) if user has permission to access the target file. > 2) if the species path exist (when moving to spaceman, spaceman takes care of it) > 3) if the specified is a regular file > 4) if the target file is a XFS file > > Thing might change after checking and opening, but that’s very rare case and user is > responsible for that change rather than this tool. > >> >>> + fprintf(stderr, "Opening %s failed. %s\n", file_path, >>> + strerror(errno)); >>> + ret = 1; >>> + goto out; >>> + } >>> + >>> + defrag_dir = dirname(file_path); >>> + snprintf(tmp_file_path, PATH_MAX, "%s/.xfsdefrag_%d", defrag_dir, >>> + getpid()); >>> + tmp_file_path[PATH_MAX] = 0; >>> + scratch_fd = open(tmp_file_path, O_CREAT|O_EXCL|O_RDWR, 0600); >> >> O_TMPFILE? Then you don't have to do this .xfsdefrag_XXX stuff. >> > > My first first version was using O_TMPFILE. But clone failed somehow (Don’t remember the details). > I retried O_TMPFILE, it’s working now. So will move to use O_TMPFILE. > >>> + if (scratch_fd == -1) { >>> + fprintf(stderr, "Opening temporary file %s failed. %s\n", >>> + tmp_file_path, strerror(errno)); >>> + ret = 1; >>> + goto out; >>> + } >>> +out: >>> + if (scratch_fd != -1) { >>> + close(scratch_fd); >>> + unlink(tmp_file_path); >>> + } >>> + if (defrag_fd != -1) { >>> + ioctl(defrag_fd, FS_IOC_FSGETXATTR, &fsx); >>> + close(defrag_fd); >>> + } >>> + >>> + printf("Pre-defrag %ld extents detected, %ld are \"unwritten\"," >>> + "%ld are \"shared\"\n", >>> + g_ext_stats.nr_ext_total, g_ext_stats.nr_ext_unwritten, >>> + g_ext_stats.nr_ext_shared); >>> + printf("Tried to defragment %ld extents in %ld segments\n", >>> + nr_ext_defrag, nr_seg_defrag); >>> + printf("Time stats(ms): max clone: %d, max unshare: %d," >>> + " max punch_hole: %d\n", >>> + max_clone_us/1000, max_unshare_us/1000, max_punch_us/1000); >>> + printf("Post-defrag %u extents detected\n", fsx.fsx_nextents); >>> + return ret; >>> +} >>> + >>> + >>> +static void defrag_help(void) >>> +{ >>> + printf(_( >>> +"\n" >>> +"Defragemnt files on XFS where reflink is enabled. IOs to the target files \n" >> >> "Defragment" > > OK. > >> >>> +"can be served durning the defragmentations.\n" >>> +"\n" >>> +" -s segment_size -- specify the segment size in MiB, minmum value is 4 \n" >>> +" default is 16\n")); >>> +} >>> + >>> +static cmdinfo_t defrag_cmd; >>> + >>> +static int >>> +defrag_f(int argc, char **argv) >>> +{ >>> + int i; >>> + int c; >>> + >>> + while ((c = getopt(argc, argv, "s:")) != EOF) { >>> + switch(c) { >>> + case 's': >>> + g_segment_size_lmt = atoi(optarg) * 1024 * 1024 / 512; >>> + if (g_segment_size_lmt < MIN_SEGMENT_SIZE_LIMIT) { >>> + g_segment_size_lmt = MIN_SEGMENT_SIZE_LIMIT; >>> + printf("Using minimium segment size %d\n", >>> + g_segment_size_lmt); >>> + } >>> + break; >>> + default: >>> + command_usage(&defrag_cmd); >>> + return 1; >>> + } >>> + } >>> + >>> + for (i = 0; i < filecount; i++) >>> + defrag_xfs_defrag(filetable[i].name); >> >> Pass in the whole filetable[i] and then you've already got an open fd >> and some validation that it's an xfs filesystem. > Filetable[I].xfd.fd doesn’t work well. UNSHARE returns “Bad file descriptor”, I am suspecting that fd is readonly. So I have to write-open again. Thanks, Wengang > Good to know. >> >>> + return 0; >>> +} >>> +void defrag_init(void) >>> +{ >>> + defrag_cmd.name = "defrag"; >>> + defrag_cmd.altname = "dfg"; >>> + defrag_cmd.cfunc = defrag_f; >>> + defrag_cmd.argmin = 0; >>> + defrag_cmd.argmax = 4; >>> + defrag_cmd.args = "[-s segment_size]"; >>> + defrag_cmd.flags = CMD_FLAG_ONESHOT; >> >> IIRC if you don't set CMD_FLAG_FOREIGN_OK then the command processor >> won't let this command get run against a non-xfs file. >> > > OK. > > Thanks, > Winging > >> --D >> >>> + defrag_cmd.oneline = _("Defragment XFS files"); >>> + defrag_cmd.help = defrag_help; >>> + >>> + add_command(&defrag_cmd); >>> +} >>> diff --git a/spaceman/init.c b/spaceman/init.c >>> index cf1ff3cb..396f965c 100644 >>> --- a/spaceman/init.c >>> +++ b/spaceman/init.c >>> @@ -35,6 +35,7 @@ init_commands(void) >>> trim_init(); >>> freesp_init(); >>> health_init(); >>> + defrag_init(); >>> } >>> >>> static int >>> diff --git a/spaceman/space.h b/spaceman/space.h >>> index 723209ed..c288aeb9 100644 >>> --- a/spaceman/space.h >>> +++ b/spaceman/space.h >>> @@ -26,6 +26,7 @@ extern void help_init(void); >>> extern void prealloc_init(void); >>> extern void quit_init(void); >>> extern void trim_init(void); >>> +extern void defrag_init(void); >>> #ifdef HAVE_GETFSMAP >>> extern void freesp_init(void); >>> #else >>> -- >>> 2.39.3 (Apple Git-146)