From: Darrick J. Wong <djwong@xxxxxxxxxx> Add a -r concurrency= option to mkfs so that sysadmins can configure the filesystem so that there are enough rtgroups that the specified number of threads can (in theory) can find an uncontended rtgroup from which to allocate space. This has the exact same purpose as the -d concurrency switch that was added for the data device. Signed-off-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> Reviewed-by: Christoph Hellwig <hch@xxxxxx> --- man/man8/mkfs.xfs.8.in | 28 ++++++++++ mkfs/xfs_mkfs.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 165 insertions(+), 3 deletions(-) diff --git a/man/man8/mkfs.xfs.8.in b/man/man8/mkfs.xfs.8.in index 32361cf973fcf8..37e3a88e7ac777 100644 --- a/man/man8/mkfs.xfs.8.in +++ b/man/man8/mkfs.xfs.8.in @@ -1220,6 +1220,34 @@ .SH OPTIONS and .B rgsize suboptions are mutually exclusive. +.TP +.BI concurrency= value +Create enough realtime allocation groups to handle the desired level of +concurrency. +The goal of this calculation scheme is to set the number of rtgroups to an +integer multiple of the number of writer threads desired, to minimize +contention of rtgroup locks. +This scheme will neither create fewer rtgroups than would be created by the +default configuration, nor will it create rtgroups smaller than 4GB. +This option is not compatible with the +.B rgcount +or +.B rgsize +options. +The magic value +.I nr_cpus +or +.I 1 +or no value at all will set this parameter to the number of active processors +in the system. +If the kernel advertises that the realtime device is a non-mechanical storage +device, +.B mkfs.xfs +will use this new geometry calculation scheme. +The magic value of +.I 0 +forces use of the older rtgroups geometry calculations that is used for +mechanical storage. .RE .PP .PD 0 diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index deaac2044b94dd..073e79ac58303c 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -134,6 +134,7 @@ enum { R_NOALIGN, R_RGCOUNT, R_RGSIZE, + R_CONCURRENCY, R_MAX_OPTS, }; @@ -737,6 +738,7 @@ static struct opt_params ropts = { [R_NOALIGN] = "noalign", [R_RGCOUNT] = "rgcount", [R_RGSIZE] = "rgsize", + [R_CONCURRENCY] = "concurrency", [R_MAX_OPTS] = NULL, }, .subopt_params = { @@ -778,6 +780,7 @@ static struct opt_params ropts = { }, { .index = R_RGCOUNT, .conflicts = { { &ropts, R_RGSIZE }, + { &ropts, R_CONCURRENCY }, { NULL, LAST_CONFLICT } }, .minval = 1, .maxval = XFS_MAX_RGNUMBER, @@ -785,12 +788,22 @@ static struct opt_params ropts = { }, { .index = R_RGSIZE, .conflicts = { { &ropts, R_RGCOUNT }, + { &ropts, R_CONCURRENCY }, { NULL, LAST_CONFLICT } }, .convert = true, .minval = 0, .maxval = (unsigned long long)XFS_MAX_RGBLOCKS << XFS_MAX_BLOCKSIZE_LOG, .defaultval = SUBOPT_NEEDS_VAL, }, + { .index = R_CONCURRENCY, + .conflicts = { { &ropts, R_RGCOUNT }, + { &ropts, R_RGSIZE }, + { NULL, LAST_CONFLICT } }, + .convert = true, + .minval = 0, + .maxval = INT_MAX, + .defaultval = 1, + }, }, }; @@ -1034,6 +1047,7 @@ struct cli_params { int proto_slashes_are_spaces; int data_concurrency; int log_concurrency; + int rtvol_concurrency; /* parameters where 0 is not a valid value */ int64_t agcount; @@ -1157,7 +1171,8 @@ usage( void ) /* no-op info only */ [-N]\n\ /* prototype file */ [-p fname]\n\ /* quiet */ [-q]\n\ -/* realtime subvol */ [-r extsize=num,size=num,rtdev=xxx,rgcount=n,rgsize=n]\n\ +/* realtime subvol */ [-r extsize=num,size=num,rtdev=xxx,rgcount=n,rgsize=n,\n\ + concurrency=num]\n\ /* sectorsize */ [-s size=num]\n\ /* version */ [-V]\n\ devicename\n\ @@ -2071,6 +2086,31 @@ proto_opts_parser( return 0; } +static void +set_rtvol_concurrency( + struct opt_params *opts, + int subopt, + struct cli_params *cli, + const char *value) +{ + long long optnum; + + /* + * "nr_cpus" or "1" means set the concurrency level to the CPU count. + * If this cannot be determined, fall back to the default rtgroup + * geometry. + */ + if (!value || !strcmp(value, "nr_cpus")) + optnum = 1; + else + optnum = getnum(value, opts, subopt); + + if (optnum == 1) + cli->rtvol_concurrency = nr_cpus(); + else + cli->rtvol_concurrency = optnum; +} + static int rtdev_opts_parser( struct opt_params *opts, @@ -2101,6 +2141,9 @@ rtdev_opts_parser( case R_RGSIZE: cli->rgsize = getstr(value, opts, subopt); break; + case R_CONCURRENCY: + set_rtvol_concurrency(opts, subopt, cli, value); + break; default: return -EINVAL; } @@ -3740,10 +3783,97 @@ _("realtime group size (%llu) not at all congruent with extent size (%llu)\n"), return 0; } +static bool +rtdev_is_solidstate( + struct libxfs_init *xi) +{ + unsigned short rotational = 1; + int error; + + error = ioctl(xi->rt.fd, BLKROTATIONAL, &rotational); + if (error) + return false; + + return rotational == 0; +} + +static void +calc_concurrency_rtgroup_geometry( + struct mkfs_params *cfg, + struct cli_params *cli, + struct libxfs_init *xi) +{ + uint64_t try_rgsize; + uint64_t def_rgsize; + uint64_t def_rgcount; + int nr_threads = cli->rtvol_concurrency; + int try_threads; + + if (is_power_of_2(cfg->rtextblocks)) + def_rgsize = calc_rgsize_extsize_power(cfg); + else + def_rgsize = calc_rgsize_extsize_nonpower(cfg); + def_rgcount = howmany(cfg->rtblocks, def_rgsize); + try_rgsize = def_rgsize; + + /* + * If the caller doesn't have a particular concurrency level in mind, + * set it to the number of CPUs in the system. + */ + if (nr_threads < 0) + nr_threads = nr_cpus(); + + /* + * Don't create fewer rtgroups than what we would create with the + * default geometry calculation. + */ + if (!nr_threads || nr_threads < def_rgcount) + goto out; + + /* + * Let's try matching the number of rtgroups to the number of CPUs. If + * the proposed geometry results in rtgroups smaller than 4GB, reduce + * the rtgroup count until we have 4GB rtgroups. Don't let the thread + * count go below the default geometry calculation. + */ + try_threads = nr_threads; + try_rgsize = cfg->rtblocks / try_threads; + if (try_rgsize < GIGABYTES(4, cfg->blocklog)) { + do { + try_threads--; + if (try_threads <= def_rgcount) { + try_rgsize = def_rgsize; + goto out; + } + + try_rgsize = cfg->rtblocks / try_threads; + } while (try_rgsize < GIGABYTES(4, cfg->blocklog)); + goto out; + } + + /* + * For large filesystems we try to ensure that the rtgroup count is a + * multiple of the desired thread count. Specifically, if the proposed + * rtgroup size is larger than both the maximum rtgroup size and the + * rtgroup size we would have gotten with the defaults, add the thread + * count to the rtgroup count until we get an rtgroup size below both + * of those factors. + */ + while (try_rgsize > XFS_MAX_RGBLOCKS && try_rgsize > def_rgsize) { + try_threads += nr_threads; + try_rgsize = cfg->dblocks / try_threads; + } + +out: + cfg->rgsize = try_rgsize; + cfg->rgcount = howmany(cfg->rtblocks, cfg->rgsize); +} + static void calculate_rtgroup_geometry( struct mkfs_params *cfg, - struct cli_params *cli) + struct cli_params *cli, + struct libxfs_init *xi) { if (!cli->sb_feat.metadir) { cfg->rgcount = 0; @@ -3783,6 +3913,9 @@ _("rgsize (%s) not a multiple of fs blk size (%d)\n"), /* too small even for a single group */ cfg->rgsize = cfg->rtblocks; cfg->rgcount = 0; + } else if (cli->rtvol_concurrency > 0 || + (cli->data_concurrency == -1 && rtdev_is_solidstate(xi))) { + calc_concurrency_rtgroup_geometry(cfg, cli, xi); } else if (is_power_of_2(cfg->rtextblocks)) { cfg->rgsize = calc_rgsize_extsize_power(cfg); cfg->rgcount = cfg->rtblocks / cfg->rgsize + @@ -4890,6 +5023,7 @@ main( .is_supported = 1, .data_concurrency = -1, /* auto detect non-mechanical storage */ .log_concurrency = -1, /* auto detect non-mechanical ddev */ + .rtvol_concurrency = -1, /* auto detect non-mechanical rtdev */ .autofsck = FSPROP_AUTOFSCK_UNSET, }; struct mkfs_params cfg = {}; @@ -5077,7 +5211,7 @@ main( */ calculate_initial_ag_geometry(&cfg, &cli, &xi); align_ag_geometry(&cfg); - calculate_rtgroup_geometry(&cfg, &cli); + calculate_rtgroup_geometry(&cfg, &cli, &xi); calculate_imaxpct(&cfg, &cli);