Re: [PATCH i-g-t 17/18] stats: Add support for quartiles (and thus median)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Jun 27, 2015 at 04:08:15PM +0100, Damien Lespiau wrote:
> More stuff, quite useful characteristics of a dataset.
> 
> Signed-off-by: Damien Lespiau <damien.lespiau@xxxxxxxxx>
> ---
>  lib/igt_stats.c       | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/igt_stats.h       |   5 ++
>  lib/tests/igt_stats.c |  41 ++++++++++++++++
>  3 files changed, 177 insertions(+)
> 
> diff --git a/lib/igt_stats.c b/lib/igt_stats.c
> index 5c1f23f..66fd563 100644
> --- a/lib/igt_stats.c
> +++ b/lib/igt_stats.c
> @@ -23,6 +23,7 @@
>   */
>  
>  #include <math.h>
> +#include <stdlib.h>
>  #include <string.h>
>  
>  #include "igt_core.h"
> @@ -94,6 +95,7 @@ void igt_stats_init(igt_stats_t *stats, unsigned int capacity)
>  void igt_stats_fini(igt_stats_t *stats)
>  {
>  	free(stats->values);
> +	free(stats->sorted);
>  }
>  
>  
> @@ -158,6 +160,7 @@ void igt_stats_push(igt_stats_t *stats, uint64_t value)
>  	stats->values[stats->n_values++] = value;
>  
>  	stats->mean_variance_valid = false;
> +	stats->sorted_array_valid = false;
>  
>  	if (value < stats->min)
>  		stats->min = value;
> @@ -220,6 +223,134 @@ uint64_t igt_stats_get_range(igt_stats_t *stats)
>  	return igt_stats_get_max(stats) - igt_stats_get_min(stats);
>  }
>  
> +static int cmp_u64(const void *pa, const void *pb)
> +{
> +	const uint64_t *a = pa, *b = pb;
> +
> +	if (*a < *b)
> +		return -1;
> +	if (*a > *b)
> +		return 1;
> +	return 0;
> +}
> +
> +static void igt_stats_ensure_sorted_values(igt_stats_t *stats)
> +{
> +	if (stats->sorted_array_valid)
> +		return;
> +
> +	if (!stats->sorted) {
> +		stats->sorted = calloc(stats->capacity, sizeof(*stats->values));
> +		igt_assert(stats->sorted);
> +	}

Since sorted_array_valid = false on igt_stats_push, but we only allocate
for the first call to sort, there's no safeguard against

  igt_stats_push();
  igt_stats_get_median();
  igt_stats_push();
  igt_stats_get_median();

exploding.
-Chris
> +
> +	memcpy(stats->sorted, stats->values,
> +	       sizeof(*stats->values) * stats->n_values);
> +
> +	qsort(stats->sorted, stats->n_values, sizeof(*stats->values), cmp_u64);
> +
> +	stats->sorted_array_valid = true;
> +}
> +
> +/*
> + * We use Tukey's hinge for our quartiles determination.
> + * ends (end, lower_end) are exclusive.
> + */
> +static double
> +igt_stats_get_median_internal(igt_stats_t *stats,
> +			      unsigned int start, unsigned int end,
> +			      unsigned int *lower_end /* out */,
> +			      unsigned int *upper_start /* out */)
> +{
> +	unsigned int mid, n_values = end - start;
> +	double median;
> +
> +	igt_stats_ensure_sorted_values(stats);
> +
> +	/* odd number of data points */
> +	if (n_values % 2 == 1) {
> +		/* median is the value in the middle (actual datum) */
> +		mid = start + n_values / 2;
> +		median = stats->sorted[mid];
> +
> +		/* the two halves contain the median value */
> +		if (lower_end)
> +			*lower_end = mid + 1;
> +		if (upper_start)
> +			*upper_start = mid;
> +
> +	/* even number of data points */
> +	} else {
> +		/*
> +		 * The middle is in between two indexes, 'mid' points at the
> +		 * lower one. The median is then the average between those two
> +		 * values.
> +		 */
> +		mid = start + n_values / 2 - 1;
> +		median = (stats->sorted[mid] + stats->sorted[mid + 1]) / 2.;
> +
> +		if (lower_end)
> +			*lower_end = mid + 1;
> +		if (upper_start)
> +			*upper_start = mid + 1;
> +	}
> +
> +	return median;
> +}
> +
> +/**
> + * igt_stats_get_quartiles:
> + * @stats: An #igt_stats_t instance
> + * @q1: (out): lower or 25th quartile
> + * @q2: (out): median or 50th quartile
> + * @q3: (out): upper or 75th quartile
> + *
> + * Retrieves the [quartiles](https://en.wikipedia.org/wiki/Quartile) of the
> + * @stats dataset.
> + */
> +void igt_stats_get_quartiles(igt_stats_t *stats,
> +			     double *q1, double *q2, double *q3)
> +{
> +	unsigned int lower_end, upper_start;
> +	double ret;
> +
> +	if (stats->n_values < 3) {
> +		if (q1)
> +			*q1 = 0.;
> +		if (q2)
> +			*q2 = 0.;
> +		if (q3)
> +			*q3 = 0.;
> +		return;
> +	}
> +
> +	ret = igt_stats_get_median_internal(stats, 0, stats->n_values,
> +					    &lower_end, &upper_start);
> +	if (q2)
> +		*q2 = ret;
> +
> +	ret = igt_stats_get_median_internal(stats, 0, lower_end, NULL, NULL);
> +	if (q1)
> +		*q1 = ret;
> +
> +	ret = igt_stats_get_median_internal(stats, upper_start, stats->n_values,
> +					    NULL, NULL);
> +	if (q3)
> +		*q3 = ret;

That's neat, I like that determination of the quartiles a lot.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux