On Tue, Jul 09, 2024 at 12:10:21PM -0700, Wengang Wang wrote: > segments are the smallest unit to defragment. > > A segment > 1. Can't exceed size limit What size limit? Do you mean a segment can't extend beyond EOF? Or did you actually mean RLIMIT_FSIZE? > 2. contains some extents > 3. the contained extents can't be "unwritten" > 4. the contained extents must be contigous in file blocks As in the segment cannot contain sparse holes? I think what I"m reading here is that a segment cannot extend beyond EOF and must be completely filled with written extent mappings? Is there an upper limit on the number of mappings per segment? > Signed-off-by: Wengang Wang <wen.gang.wang@xxxxxxxxxx> > --- > spaceman/defrag.c | 204 ++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 204 insertions(+) > > diff --git a/spaceman/defrag.c b/spaceman/defrag.c > index c9732984..175cf461 100644 > --- a/spaceman/defrag.c > +++ b/spaceman/defrag.c > @@ -14,6 +14,32 @@ > #include "space.h" > #include "input.h" > > +#define MAPSIZE 512 > +/* used to fetch bmap */ > +struct getbmapx g_mapx[MAPSIZE]; Each of these global arrays increases the bss segment size, which increases the overall footprint of xfs_spaceman, even when it's not being used to defragment files. Could you switch this data to be dynamically allocated at the start of defrag_f and freed at the end? > +/* current offset of the file in units of 512 bytes, used to fetch bmap */ > +static long long g_offset = 0; Unnecessary space after the 'long long'. > +/* index to indentify next extent, used to get next extent */ > +static int g_ext_next_idx = -1; > + > +/* > + * segment, the smallest unit to defrag > + * it includes some contiguous extents. > + * no holes included, > + * no unwritten extents included > + * the size is limited by g_segment_size_lmt > + */ > +struct defrag_segment { > + /* segment offset in units of 512 bytes */ > + long long ds_offset; > + /* length of segment in units of 512 bytes */ > + long long ds_length; > + /* number of extents in this segment */ > + int ds_nr; > + /* flag indicating if segment contains shared blocks */ > + bool ds_shared; Maybe g_mapx belongs in here? Wait, does a bunch of contiguous written bmapx records comprise a segment, or is a segment created from (possibly a subection of) a particular written bmapx record? > +}; > + > /* defrag segment size limit in units of 512 bytes */ > #define MIN_SEGMENT_SIZE_LIMIT 8192 /* 4MiB */ > #define DEFAULT_SEGMENT_SIZE_LIMIT 32768 /* 16MiB */ > @@ -78,6 +104,165 @@ defrag_check_file(char *path) > return true; > } > > +/* > + * get next extent in the file. > + * Note: next call will get the same extent unless move_next_extent() is called. > + * returns: > + * -1: error happened. > + * 0: extent returned > + * 1: no more extent left > + */ > +static int > +defrag_get_next_extent(int fd, struct getbmapx *map_out) > +{ > + int err = 0, i; > + > + /* when no extents are cached in g_mapx, fetch from kernel */ > + if (g_ext_next_idx == -1) { > + g_mapx[0].bmv_offset = g_offset; > + g_mapx[0].bmv_length = -1LL; > + g_mapx[0].bmv_count = MAPSIZE; > + g_mapx[0].bmv_iflags = BMV_IF_NO_HOLES | BMV_IF_PREALLOC; > + err = ioctl(fd, XFS_IOC_GETBMAPX, g_mapx); > + if (err == -1) { > + perror("XFS_IOC_GETBMAPX failed"); > + goto out; > + } > + /* for stats */ > + g_ext_stats.nr_ext_total += g_mapx[0].bmv_entries; > + > + /* no more extents */ > + if (g_mapx[0].bmv_entries == 0) { > + err = 1; > + goto out; > + } > + > + /* for stats */ > + for (i = 1; i <= g_mapx[0].bmv_entries; i++) { > + if (g_mapx[i].bmv_oflags & BMV_OF_PREALLOC) > + g_ext_stats.nr_ext_unwritten++; > + if (g_mapx[i].bmv_oflags & BMV_OF_SHARED) > + g_ext_stats.nr_ext_shared++; > + } > + > + g_ext_next_idx = 1; > + g_offset = g_mapx[g_mapx[0].bmv_entries].bmv_offset + > + g_mapx[g_mapx[0].bmv_entries].bmv_length; > + } Huh. AFAICT, g_ext_next_idx/g_mapx effectively act as a cursor over the mappings for this file segment. In that case, shouldn't defrag_move_next_extent (which actually advances the cursor) be in charge of grabbing bmapx records from the kernel, and defrag_get_next_extent be the trivial helper to pass mappings to the consumer of the bmapx objects (aka defrag_get_next_segment)? > + > + map_out->bmv_offset = g_mapx[g_ext_next_idx].bmv_offset; > + map_out->bmv_length = g_mapx[g_ext_next_idx].bmv_length; > + map_out->bmv_oflags = g_mapx[g_ext_next_idx].bmv_oflags; > +out: > + return err; > +} > + > +/* > + * move to next extent > + */ > +static void > +defrag_move_next_extent() > +{ > + if (g_ext_next_idx == g_mapx[0].bmv_entries) > + g_ext_next_idx = -1; > + else > + g_ext_next_idx += 1; > +} > + > +/* > + * check if the given extent is a defrag target. > + * no need to check for holes as we are using BMV_IF_NO_HOLES > + */ > +static bool > +defrag_is_target(struct getbmapx *mapx) > +{ > + /* unwritten */ > + if (mapx->bmv_oflags & BMV_OF_PREALLOC) > + return false; > + return mapx->bmv_length < g_segment_size_lmt; > +} > + > +static bool > +defrag_is_extent_shared(struct getbmapx *mapx) > +{ > + return !!(mapx->bmv_oflags & BMV_OF_SHARED); > +} > + > +/* > + * get next segment to defragment. > + * returns: > + * -1 error happened. > + * 0 segment returned. > + * 1 no more segments to return > + */ > +static int > +defrag_get_next_segment(int fd, struct defrag_segment *out) > +{ > + struct getbmapx mapx; > + int ret; > + > + out->ds_offset = 0; > + out->ds_length = 0; > + out->ds_nr = 0; > + out->ds_shared = false; > + > + do { > + ret = defrag_get_next_extent(fd, &mapx); > + if (ret != 0) { > + /* > + * no more extetns, return current segment if its not > + * empty > + */ > + if (ret == 1 && out->ds_nr > 0) > + ret = 0; > + /* otherwise, error heppened, stop */ > + break; > + } > + > + /* > + * If the extent is not a defrag target, skip it. > + * go to next extent if the segment is empty; > + * otherwise return the segment. > + */ > + if (!defrag_is_target(&mapx)) { > + defrag_move_next_extent(); > + if (out->ds_nr == 0) > + continue; > + else > + break; > + } > + > + /* check for segment size limitation */ > + if (out->ds_length + mapx.bmv_length > g_segment_size_lmt) > + break; > + > + /* the segment is empty now, add this extent to it for sure */ > + if (out->ds_nr == 0) { > + out->ds_offset = mapx.bmv_offset; > + goto add_ext; > + } > + > + /* > + * the segment is not empty, check for hole since the last exent > + * if a hole exist before this extent, this extent can't be > + * added to the segment. return the segment > + */ > + if (out->ds_offset + out->ds_length != mapx.bmv_offset) > + break; > + > +add_ext: > + if (defrag_is_extent_shared(&mapx)) > + out->ds_shared = true; > + > + out->ds_length += mapx.bmv_length; > + out->ds_nr += 1; OH, ok. So we walk the mappings for a file. If we can identify a run of contiguous written mappings, we define a segment to be the file range described by that run, up to whatever the maximum is (~4-16M). Each of these segments is defragmented (somehow). Is that correct? > + defrag_move_next_extent(); > + > + } while (true); > + > + return ret; > +} > + > /* > * defragment a file > * return 0 if successfully done, 1 otherwise > @@ -92,6 +277,9 @@ defrag_xfs_defrag(char *file_path) { > struct fsxattr fsx; > int ret = 0; > > + g_offset = 0; > + g_ext_next_idx = -1; > + > fsx.fsx_nextents = 0; > memset(&g_ext_stats, 0, sizeof(g_ext_stats)); > > @@ -119,6 +307,22 @@ defrag_xfs_defrag(char *file_path) { > ret = 1; > goto out; > } > + > + do { > + struct defrag_segment segment; > + > + ret = defrag_get_next_segment(defrag_fd, &segment); > + /* no more segments, we are done */ > + if (ret == 1) { > + ret = 0; > + break; > + } If you reverse the polarity of the 0/1 return values (aka return 1 if there is a segment and 0 if there is none) then you can shorten this loop to: struct defrag_segment segment; int ret; while ((ret = defrag_get_next_segment(...)) == 1) { /* process segment */ } return ret; --D > + /* error happened when reading bmap, stop here */ > + if (ret == -1) { > + ret = 1; > + break; > + } > + } while (true); > out: > if (scratch_fd != -1) { > close(scratch_fd); > -- > 2.39.3 (Apple Git-146) > >