From: Darrick J. Wong <djwong@xxxxxxxxxx> Print the cumulative distribution function of the free space buckets in reverse order. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> Reviewed-by: Christoph Hellwig <hch@xxxxxx> --- libfrog/histogram.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++--- libfrog/histogram.h | 8 +++++ 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/libfrog/histogram.c b/libfrog/histogram.c index 7cee6b350..59d46363c 100644 --- a/libfrog/histogram.c +++ b/libfrog/histogram.c @@ -102,17 +102,81 @@ hist_free( memset(hs, 0, sizeof(struct histogram)); } +/* + * Compute the CDF of the histogram in decreasing order of value. + * + * For a free space histogram, callers can determine how much free space is not + * in the long tail of small extents, e.g. 98% of the free space extents are + * larger than 31 blocks. + */ +static struct histogram_cdf * +hist_cdf( + const struct histogram *hs) +{ + struct histogram_cdf *cdf; + int i = hs->nr_buckets - 1; + + ASSERT(hs->nr_buckets < INT_MAX); + + cdf = malloc(sizeof(struct histogram_cdf)); + if (!cdf) + return NULL; + cdf->histogram = hs; + + if (hs->nr_buckets == 0) { + cdf->buckets = NULL; + return cdf; + } + + cdf->buckets = calloc(hs->nr_buckets, sizeof(struct histbucket)); + if (!cdf->buckets) { + free(cdf); + return NULL; + } + + cdf->buckets[i].nr_obs = hs->buckets[i].nr_obs; + cdf->buckets[i].sum = hs->buckets[i].sum; + i--; + + while (i >= 0) { + cdf->buckets[i].nr_obs = hs->buckets[i].nr_obs + + cdf->buckets[i + 1].nr_obs; + + cdf->buckets[i].sum = hs->buckets[i].sum + + cdf->buckets[i + 1].sum; + i--; + } + + return cdf; +} + +/* Free all data associated with a histogram cdf. */ +static void +histcdf_free( + struct histogram_cdf *cdf) +{ + free(cdf->buckets); + free(cdf); +} + /* Dump a histogram to stdout. */ void hist_print( const struct histogram *hs, const struct histogram_strings *hstr) { + struct histogram_cdf *cdf; unsigned int obs_w = strlen(hstr->observations); unsigned int sum_w = strlen(hstr->sum); unsigned int from_w = 7, to_w = 7; unsigned int i; + cdf = hist_cdf(hs); + if (!cdf) { + perror(_("histogram cdf")); + return; + } + for (i = 0; i < hs->nr_buckets; i++) { char buf[256]; @@ -132,23 +196,27 @@ hist_print( sum_w = max(sum_w, strlen(buf)); } - printf("%*s %*s %*s %*s %6s\n", + printf("%*s %*s %*s %*s %6s %6s %6s\n", from_w, _("from"), to_w, _("to"), obs_w, hstr->observations, sum_w, hstr->sum, - _("pct")); + _("pct"), _("blkcdf"), _("extcdf")); for (i = 0; i < hs->nr_buckets; i++) { if (hs->buckets[i].nr_obs == 0) continue; - printf("%*lld %*lld %*lld %*lld %6.2f\n", + printf("%*lld %*lld %*lld %*lld %6.2f %6.2f %6.2f\n", from_w, hs->buckets[i].low, to_w, hs->buckets[i].high, obs_w, hs->buckets[i].nr_obs, sum_w, hs->buckets[i].sum, - hs->buckets[i].sum * 100.0 / hs->tot_sum); + hs->buckets[i].sum * 100.0 / hs->tot_sum, + cdf->buckets[i].sum * 100.0 / hs->tot_sum, + cdf->buckets[i].nr_obs * 100.0 / hs->tot_obs); } + + histcdf_free(cdf); } /* Summarize the contents of the histogram. */ diff --git a/libfrog/histogram.h b/libfrog/histogram.h index 68afdeb29..0c534f65d 100644 --- a/libfrog/histogram.h +++ b/libfrog/histogram.h @@ -33,6 +33,14 @@ struct histogram { unsigned int nr_buckets; }; +struct histogram_cdf { + /* histogram from which this cdf was computed */ + const struct histogram *histogram; + + /* distribution information */ + struct histbucket *buckets; +}; + int hist_add_bucket(struct histogram *hs, long long bucket_low); void hist_add(struct histogram *hs, long long value); void hist_init(struct histogram *hs);