Add a new -o fstrim_pct= option to xfs_scrub just in case there are users out there who want a different percentage. For example, accepting a 95% trim would net us a speed increase of nearly two orders of magnitude, ignoring system call overhead. Setting it to 100% will trim everything, just like fstrim(8). Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- libfrog/histogram.c | 2 + libfrog/histogram.h | 1 + man/man8/xfs_scrub.8 | 16 +++++++++++ scrub/phase8.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++--- scrub/vfs.c | 4 ++- scrub/vfs.h | 2 + scrub/xfs_scrub.c | 38 +++++++++++++++++++++++++ scrub/xfs_scrub.h | 12 ++++++++ 8 files changed, 141 insertions(+), 9 deletions(-) diff --git a/libfrog/histogram.c b/libfrog/histogram.c index 54e2bac0f731..61ecda16ffef 100644 --- a/libfrog/histogram.c +++ b/libfrog/histogram.c @@ -109,7 +109,7 @@ hist_free( * of small extents, e.g. 98% of the free space extents are larger than 31 * blocks. */ -static int +int hist_cdf( const struct histogram *hs, struct histogram *cdf) diff --git a/libfrog/histogram.h b/libfrog/histogram.h index ec788344d4c2..ecac66d240da 100644 --- a/libfrog/histogram.h +++ b/libfrog/histogram.h @@ -39,6 +39,7 @@ void hist_add(struct histogram *hs, long long len); void hist_init(struct histogram *hs); void hist_prepare(struct histogram *hs, long long maxlen); void hist_free(struct histogram *hs); +int hist_cdf(const struct histogram *hs, struct histogram *cdf); void hist_print(const struct histogram *hs); void hist_summarize(const struct histogram *hs); diff --git a/man/man8/xfs_scrub.8 b/man/man8/xfs_scrub.8 index 404baba696e1..b9f253e1b079 100644 --- a/man/man8/xfs_scrub.8 +++ b/man/man8/xfs_scrub.8 @@ -100,6 +100,22 @@ The supported are: .RS 1.0i .TP +.BI fstrim_pct= percentage +To constrain the amount of time spent on fstrim activities during phase 8, +this program tries to balance estimated runtime against completeness of the +trim. +In short, the program avoids small trim requests to save time. + +During phase 7, a log-scale histogram of free space extents is constructed. +At the start of phase 8, a CDF is computed in decreasing order of extent +length from the histogram buckets. +A point corresponding to the fstrim percentage target is chosen from the CDF +and mapped back to a histogram bucket. +Free space extents at least as long as the bucket size are trimmed. +Smaller extents are ignored. + +By default, the percentage threshold is 99%. +.TP .BI iwarn Treat informational messages as warnings. This will result in a nonzero return code, and a higher logging level. diff --git a/scrub/phase8.c b/scrub/phase8.c index ae6d965c75e1..6bb9317afecc 100644 --- a/scrub/phase8.c +++ b/scrub/phase8.c @@ -11,6 +11,7 @@ #include "list.h" #include "libfrog/paths.h" #include "libfrog/workqueue.h" +#include "libfrog/histogram.h" #include "xfs_scrub.h" #include "common.h" #include "progress.h" @@ -57,10 +58,12 @@ static int fstrim_fsblocks( struct scrub_ctx *ctx, uint64_t start_fsb, - uint64_t fsbcount) + uint64_t fsbcount, + uint64_t minlen_fsb) { uint64_t start = cvt_off_fsb_to_b(&ctx->mnt, start_fsb); uint64_t len = cvt_off_fsb_to_b(&ctx->mnt, fsbcount); + uint64_t minlen = cvt_off_fsb_to_b(&ctx->mnt, minlen_fsb); int error; while (len > 0) { @@ -68,7 +71,7 @@ fstrim_fsblocks( run = min(len, FSTRIM_MAX_BYTES); - error = fstrim(ctx, start, run); + error = fstrim(ctx, start, run, minlen); if (error == EOPNOTSUPP) { /* Pretend we finished all the work. */ progress_add(len); @@ -78,9 +81,10 @@ fstrim_fsblocks( char descr[DESCR_BUFSZ]; snprintf(descr, sizeof(descr) - 1, - _("fstrim start 0x%llx run 0x%llx"), + _("fstrim start 0x%llx run 0x%llx minlen 0x%llx"), (unsigned long long)start, - (unsigned long long)run); + (unsigned long long)run, + (unsigned long long)minlen); str_liberror(ctx, error, descr); return error; } @@ -93,6 +97,64 @@ fstrim_fsblocks( return 0; } +/* Compute a suitable minlen parameter for fstrim. */ +static uint64_t +fstrim_compute_minlen( + const struct scrub_ctx *ctx, + const struct histogram *freesp_hist) +{ + struct histogram cdf; + uint64_t ret = 0; + double blk_threshold = 0; + unsigned int i; + unsigned int ag_max_usable; + int error; + + /* + * The kernel will reject a minlen that's larger than m_ag_max_usable. + * We can't calculate or query that value directly, so we guesstimate + * that it's 95% of the AG size. + */ + ag_max_usable = ctx->mnt.fsgeom.agblocks * 95 / 100; + + if (freesp_hist->totexts == 0) + goto out; + + if (debug > 1) + hist_print(freesp_hist); + + /* Insufficient samples to make a meaningful histogram */ + if (freesp_hist->totexts < freesp_hist->nr_buckets * 10) + goto out; + + hist_init(&cdf); + error = hist_cdf(freesp_hist, &cdf); + if (error) + goto out_free; + + blk_threshold = freesp_hist->totblocks * ctx->fstrim_block_pct; + for (i = 1; i < freesp_hist->nr_buckets; i++) { + if (cdf.buckets[i].blocks < blk_threshold) { + ret = freesp_hist->buckets[i - 1].low; + break; + } + } + +out_free: + hist_free(&cdf); +out: + if (debug > 1) + printf(_("fstrim minlen %lld threshold %lld ag_max_usable %u\n"), + (unsigned long long)ret, + (unsigned long long)blk_threshold, + ag_max_usable); + if (ret > ag_max_usable) + ret = ag_max_usable; + if (ret == 1) + ret = 0; + return ret; +} + /* Trim each AG on the data device. */ static int fstrim_datadev( @@ -100,8 +162,11 @@ fstrim_datadev( { struct xfs_fsop_geom *geo = &ctx->mnt.fsgeom; uint64_t fsbno; + uint64_t minlen_fsb; int error; + minlen_fsb = fstrim_compute_minlen(ctx, &ctx->datadev_hist); + for (fsbno = 0; fsbno < geo->datablocks; fsbno += geo->agblocks) { uint64_t fsbcount; @@ -113,7 +178,7 @@ fstrim_datadev( */ progress_add(geo->blocksize); fsbcount = min(geo->datablocks - (fsbno + 1), geo->agblocks - 1); - error = fstrim_fsblocks(ctx, fsbno + 1, fsbcount); + error = fstrim_fsblocks(ctx, fsbno + 1, fsbcount, minlen_fsb); if (error) return error; } diff --git a/scrub/vfs.c b/scrub/vfs.c index cc958ba9438e..22c19485a2da 100644 --- a/scrub/vfs.c +++ b/scrub/vfs.c @@ -300,11 +300,13 @@ int fstrim( struct scrub_ctx *ctx, uint64_t start, - uint64_t len) + uint64_t len, + uint64_t minlen) { struct fstrim_range range = { .start = start, .len = len, + .minlen = minlen, }; if (ioctl(ctx->mnt.fd, FITRIM, &range) == 0) diff --git a/scrub/vfs.h b/scrub/vfs.h index 1af8d80d1de6..f0cfd53c27be 100644 --- a/scrub/vfs.h +++ b/scrub/vfs.h @@ -24,6 +24,6 @@ typedef int (*scan_fs_tree_dirent_fn)(struct scrub_ctx *, const char *, int scan_fs_tree(struct scrub_ctx *ctx, scan_fs_tree_dir_fn dir_fn, scan_fs_tree_dirent_fn dirent_fn, void *arg); -int fstrim(struct scrub_ctx *ctx, uint64_t start, uint64_t len); +int fstrim(struct scrub_ctx *ctx, uint64_t start, uint64_t len, uint64_t minlen); #endif /* XFS_SCRUB_VFS_H_ */ diff --git a/scrub/xfs_scrub.c b/scrub/xfs_scrub.c index 2894f6148e10..296d814eceeb 100644 --- a/scrub/xfs_scrub.c +++ b/scrub/xfs_scrub.c @@ -622,11 +622,13 @@ report_outcome( */ enum o_opt_nums { IWARN = 0, + FSTRIM_PCT, O_MAX_OPTS, }; static char *o_opts[] = { [IWARN] = "iwarn", + [FSTRIM_PCT] = "fstrim_pct", [O_MAX_OPTS] = NULL, }; @@ -635,8 +637,11 @@ parse_o_opts( struct scrub_ctx *ctx, char *p) { + double dval; + while (*p != '\0') { char *val; + char *endp; switch (getsubopt(&p, o_opts, &val)) { case IWARN: @@ -647,6 +652,35 @@ parse_o_opts( } info_is_warning = true; break; + case FSTRIM_PCT: + if (!val) { + fprintf(stderr, + _("-o fstrim_pct requires a parameter\n")); + usage(); + } + + errno = 0; + dval = strtod(val, &endp); + + if (*endp) { + fprintf(stderr, + _("-o fstrim_pct must be a floating point number\n")); + usage(); + } + if (errno) { + fprintf(stderr, + _("-o fstrim_pct: %s\n"), + strerror(errno)); + usage(); + } + if (dval <= 0 || dval > 100) { + fprintf(stderr, + _("-o fstrim_pct must be larger than 0 and less than 100\n")); + usage(); + } + + ctx->fstrim_block_pct = dval / 100.0; + break; default: usage(); break; @@ -659,7 +693,9 @@ main( int argc, char **argv) { - struct scrub_ctx ctx = {0}; + struct scrub_ctx ctx = { + .fstrim_block_pct = FSTRIM_BLOCK_PCT_DEFAULT, + }; struct phase_rusage all_pi; char *mtab = NULL; FILE *progress_fp = NULL; diff --git a/scrub/xfs_scrub.h b/scrub/xfs_scrub.h index 1a28f0cc847e..7d48f4bad9ce 100644 --- a/scrub/xfs_scrub.h +++ b/scrub/xfs_scrub.h @@ -90,8 +90,20 @@ struct scrub_ctx { /* Free space histograms, in fsb */ struct histogram datadev_hist; + + /* + * Pick the largest value for fstrim minlen such that we trim at least + * this much space per volume. + */ + double fstrim_block_pct; }; +/* + * Trim only enough free space extents (in order of decreasing length) to + * ensure that this percentage of the free space is trimmed. + */ +#define FSTRIM_BLOCK_PCT_DEFAULT (99.0 / 100.0) + /* Phase helper functions */ void xfs_shutdown_fs(struct scrub_ctx *ctx); int scrub_cleanup(struct scrub_ctx *ctx);