Parameterize the attempts for the _firstn choose method, and apply the rule-specified tries count to firstn mode as well. Note that we have slightly different behavior here than with indep: If the firstn value is not specified for firstn, we pass through the normal attempt count. This maintains compatibility with legacy behavior. Note that this is usually *not* actually N^2 work, though, because of the descend_once tunable. However, descend_once is unfortunately *not* the same thing as 1 chooseleaf try because it is only checked on a reject but not on a collision. Sigh. In contrast, for indep, if tries is not specified we default to 1 recursive attempt, because that is simply more sane, and we have the option to do so. The descend_once tunable has no effect for indep. Reflects ceph.git commit 64aeded50d80942d66a5ec7b604ff2fcbf5d7b63. Signed-off-by: Ilya Dryomov <ilya.dryomov@xxxxxxxxxxx> --- include/linux/crush/crush.h | 5 ++++- net/ceph/crush/mapper.c | 14 ++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 2e50bab91655..07b8fd4f81fc 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -165,7 +165,10 @@ struct crush_map { __u32 choose_local_fallback_tries; /* choose attempts before giving up */ __u32 choose_total_tries; - /* attempt chooseleaf inner descent once; on failure retry outer descent */ + /* attempt chooseleaf inner descent once for firstn mode; on + * reject retry outer descent. Note that this does *not* + * apply to a collision: in that case we will retry as we used + * to. */ __u32 chooseleaf_descend_once; }; diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index e3ade074541c..c34320518c8b 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -299,6 +299,8 @@ static int crush_choose_firstn(const struct crush_map *map, const __u32 *weight, int weight_max, int x, int numrep, int type, int *out, int outpos, + unsigned int attempts, + unsigned int recurse_attempts, int recurse_to_leaf, int descend_once, int *out2) { @@ -385,6 +387,7 @@ static int crush_choose_firstn(const struct crush_map *map, weight, weight_max, x, outpos+1, 0, out2, outpos, + recurse_attempts, 0, 0, map->chooseleaf_descend_once, NULL) <= outpos) @@ -421,7 +424,7 @@ reject: flocal <= in->size + map->choose_local_fallback_tries) /* exhaustive bucket search */ retry_bucket = 1; - else if (ftotal <= map->choose_total_tries) + else if (ftotal <= attempts) /* then retry descent */ retry_descent = 1; else @@ -634,7 +637,8 @@ int crush_do_rule(const struct crush_map *map, __u32 step; int i, j; int numrep; - int choose_leaf_tries = 1; + int choose_tries = map->choose_total_tries; + int choose_leaf_tries = 0; const int descend_once = 0; if ((__u32)ruleno >= map->max_rules) { @@ -701,6 +705,8 @@ int crush_do_rule(const struct crush_map *map, x, numrep, curstep->arg2, o+osize, j, + choose_tries, + choose_leaf_tries ? choose_leaf_tries : choose_tries, recurse_to_leaf, descend_once, c+osize); } else { @@ -711,8 +717,8 @@ int crush_do_rule(const struct crush_map *map, x, numrep, numrep, curstep->arg2, o+osize, j, - map->choose_total_tries, - choose_leaf_tries, + choose_tries, + choose_leaf_tries ? choose_leaf_tries : 1, recurse_to_leaf, c+osize, 0); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html