> On Jul 11, 2024, at 3:49 PM, Wengang Wang <wen.gang.wang@xxxxxxxxxx> wrote: > > > >> On Jul 9, 2024, at 2:57 PM, Darrick J. Wong <djwong@xxxxxxxxxx> wrote: >> >> On Tue, Jul 09, 2024 at 12:10:22PM -0700, Wengang Wang wrote: >>> For each segment, the following steps are done trying to defrag it: >>> >>> 1. share the segment with a temporary file >>> 2. unshare the segment in the target file. kernel simulates Cow on the whole >>> segment complete the unshare (defrag). >>> 3. release blocks from the tempoary file. >>> >>> Signed-off-by: Wengang Wang <wen.gang.wang@xxxxxxxxxx> >>> --- >>> spaceman/defrag.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++ >>> 1 file changed, 114 insertions(+) >>> >>> diff --git a/spaceman/defrag.c b/spaceman/defrag.c >>> index 175cf461..9f11e36b 100644 >>> --- a/spaceman/defrag.c >>> +++ b/spaceman/defrag.c >>> @@ -263,6 +263,40 @@ add_ext: >>> return ret; >>> } >>> >>> +/* >>> + * check if the segment exceeds EoF. >>> + * fix up the clone range and return true if EoF happens, >>> + * return false otherwise. >>> + */ >>> +static bool >>> +defrag_clone_eof(struct file_clone_range *clone) >>> +{ >>> + off_t delta; >>> + >>> + delta = clone->src_offset + clone->src_length - g_defrag_file_size; >>> + if (delta > 0) { >>> + clone->src_length = 0; // to the end >>> + return true; >>> + } >>> + return false; >>> +} >>> + >>> +/* >>> + * get the time delta since pre_time in ms. >>> + * pre_time should contains values fetched by gettimeofday() >>> + * cur_time is used to store current time by gettimeofday() >>> + */ >>> +static long long >>> +get_time_delta_us(struct timeval *pre_time, struct timeval *cur_time) >>> +{ >>> + long long us; >>> + >>> + gettimeofday(cur_time, NULL); >>> + us = (cur_time->tv_sec - pre_time->tv_sec) * 1000000; >>> + us += (cur_time->tv_usec - pre_time->tv_usec); >>> + return us; >>> +} >>> + >>> /* >>> * defragment a file >>> * return 0 if successfully done, 1 otherwise >>> @@ -273,6 +307,7 @@ defrag_xfs_defrag(char *file_path) { >>> long nr_seg_defrag = 0, nr_ext_defrag = 0; >>> int scratch_fd = -1, defrag_fd = -1; >>> char tmp_file_path[PATH_MAX+1]; >>> + struct file_clone_range clone; >>> char *defrag_dir; >>> struct fsxattr fsx; >>> int ret = 0; >> >> Now that I see this, you might want to straighten up the lines: >> >> struct fsxattr fsx = { }; >> long nr_seg_defrag = 0, nr_ext_defrag = 0; >> >> etc. Note the "= { }" bit that means you don't have to memset them to >> zero explicitly. > > Nice! > >> >>> @@ -296,6 +331,8 @@ defrag_xfs_defrag(char *file_path) { >>> goto out; >>> } >>> >>> + clone.src_fd = defrag_fd; >>> + >>> defrag_dir = dirname(file_path); >>> snprintf(tmp_file_path, PATH_MAX, "%s/.xfsdefrag_%d", defrag_dir, >>> getpid()); >>> @@ -309,7 +346,11 @@ defrag_xfs_defrag(char *file_path) { >>> } >>> >>> do { >>> + struct timeval t_clone, t_unshare, t_punch_hole; >>> struct defrag_segment segment; >>> + long long seg_size, seg_off; >>> + int time_delta; >>> + bool stop; >>> >>> ret = defrag_get_next_segment(defrag_fd, &segment); >>> /* no more segments, we are done */ >>> @@ -322,6 +363,79 @@ defrag_xfs_defrag(char *file_path) { >>> ret = 1; >>> break; >>> } >>> + >>> + /* we are done if the segment contains only 1 extent */ >>> + if (segment.ds_nr < 2) >>> + continue; >>> + >>> + /* to bytes */ >>> + seg_off = segment.ds_offset * 512; >>> + seg_size = segment.ds_length * 512; >>> + >>> + clone.src_offset = seg_off; >>> + clone.src_length = seg_size; >>> + clone.dest_offset = seg_off; >>> + >>> + /* checks for EoF and fix up clone */ >>> + stop = defrag_clone_eof(&clone); >>> + gettimeofday(&t_clone, NULL); >>> + ret = ioctl(scratch_fd, FICLONERANGE, &clone); >> >> Hm, should the top-level defrag_f function check in the >> filetable[i].fsgeom structure that the fs supports reflink? > > Yes, good to know. It seems that xfs_fsop_geom doesn’t know about reflink? Thanks, Wengang > >> >>> + if (ret != 0) { >>> + fprintf(stderr, "FICLONERANGE failed %s\n", >>> + strerror(errno)); >> >> Might be useful to include the file_path in the error message: >> >> /opt/a: FICLONERANGE failed Software caused connection abort >> >> (maybe also put a semicolon before the strerror message?) > > OK. > >> >>> + break; >>> + } >>> + >>> + /* for time stats */ >>> + time_delta = get_time_delta_us(&t_clone, &t_unshare); >>> + if (time_delta > max_clone_us) >>> + max_clone_us = time_delta; >>> + >>> + /* for defrag stats */ >>> + nr_ext_defrag += segment.ds_nr; >>> + >>> + /* >>> + * For the shared range to be unshared via a copy-on-write >>> + * operation in the file to be defragged. This causes the >>> + * file needing to be defragged to have new extents allocated >>> + * and the data to be copied over and written out. >>> + */ >>> + ret = fallocate(defrag_fd, FALLOC_FL_UNSHARE_RANGE, seg_off, >>> + seg_size); >>> + if (ret != 0) { >>> + fprintf(stderr, "UNSHARE_RANGE failed %s\n", >>> + strerror(errno)); >>> + break; >>> + } >>> + >>> + /* for time stats */ >>> + time_delta = get_time_delta_us(&t_unshare, &t_punch_hole); >>> + if (time_delta > max_unshare_us) >>> + max_unshare_us = time_delta; >>> + >>> + /* >>> + * Punch out the original extents we shared to the >>> + * scratch file so they are returned to free space. >>> + */ >>> + ret = fallocate(scratch_fd, >>> + FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, seg_off, >>> + seg_size); >> >> Indentation here (two tabs for a continuation). > > OK. > >> Or just ftruncate >> scratch_fd to zero bytes? I think you have to do that for the EOF stuff >> to work, right? >> > > I’d truncate the UNSHARE range only in the loop. > EOF stuff would be truncated on (O_TMPFILE) file close. > The EOF stuff would be used for another purpose, see > [PATCH 6/9] spaceman/defrag: workaround kernel > > Thanks, > Wengang > >> --D >> >>> + if (ret != 0) { >>> + fprintf(stderr, "PUNCH_HOLE failed %s\n", >>> + strerror(errno)); >>> + break; >>> + } >>> + >>> + /* for defrag stats */ >>> + nr_seg_defrag += 1; >>> + >>> + /* for time stats */ >>> + time_delta = get_time_delta_us(&t_punch_hole, &t_clone); >>> + if (time_delta > max_punch_us) >>> + max_punch_us = time_delta; >>> + >>> + if (stop) >>> + break; >>> } while (true); >>> out: >>> if (scratch_fd != -1) { >>> -- >>> 2.39.3 (Apple Git-146)