[PATCH 08/11] perf/aggregate: optionally include a t-test score

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Adds support for calling out to R[1] to perform a significance test on
the gathered results.

More specifically, the script's purpose was always to compare several
revisions (it still makes nice tables for single tests though ;-).
Given the first and some other column (= revision/directory), this
runs Welch's t-test[2] on the two sets of measurements to determine
whether there is a significant difference between the distributions.
It then shows the p-value in a simplified form, so that significant
differences stand out optically.

All of this is entirely optional: if R is not available, it simply
puts nothing in this field.

[1] http://www.r-project.org/
[2] http://en.wikipedia.org/wiki/Welch%27s_t-test

Signed-off-by: Thomas Rast <trast@xxxxxxxxxxxxxxx>
---
 t/perf/aggregate.perl  |   37 +++++++++++++++++++++++++++++++------
 t/perf/t_test_score.sh |   24 ++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 6 deletions(-)
 create mode 100755 t/perf/t_test_score.sh

diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index b04d3a0..4db685d 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -7,8 +7,12 @@
 	"$FindBin::Bin/../../perl/blib/arch/auto/Git";
 use Git;
 
+my $any_sign_printed = 0;
+
 sub get_times {
 	my $name = shift;
+	my $firstset = shift;
+	my $sig = "";
 	open my $fh, "<", $name or return undef;
 	my $sum_rt = 0.0;
 	my $sum_u = 0.0;
@@ -24,11 +28,21 @@ sub get_times {
 	}
 	return undef if !$n;
 	close $fh or die "cannot close $name: $!";
-	return ($sum_rt/$n, $sum_u/$n, $sum_s/$n);
+	if (defined $firstset &&
+	    open my $ph, "-|", "./t_test_score.sh $name $firstset 2>/dev/null") {
+		my $result = <$ph>;
+		close $ph or die "cannot close pipe to t_test_score.sh: $!";
+		chomp $result;
+		$sig = $result;
+		if ($sig ne "") {
+			$any_sign_printed = 1;
+		}
+	}
+	return ($sum_rt/$n, $sum_u/$n, $sum_s/$n, $sig);
 }
 
 sub format_times {
-	my ($r, $u, $s, $firstr) = @_;
+	my ($r, $u, $s, $sign, $firstr) = @_;
 	if (!defined $r) {
 		return "<missing>";
 	}
@@ -41,6 +55,7 @@ sub format_times {
 		} else {
 			$out .= " +inf";
 		}
+		$out .= $sign;
 	}
 	return $out;
 }
@@ -145,13 +160,17 @@ sub have_slash {
 }
 for my $t (@subtests) {
 	my $firstr;
+	my $firstset;
 	for my $i (0..$#dirs) {
 		my $d = $dirs[$i];
-		$times{$prefixes{$d}.$t} = [get_times("test-results/$prefixes{$d}$t.times")];
+		$times{$prefixes{$d}.$t} = [get_times("test-results/$prefixes{$d}$t.times", $firstset)];
 		my ($r,$u,$s,$sign) = @{$times{$prefixes{$d}.$t}};
 		my $w = length format_times($r,$u,$s,$sign,$firstr);
 		$colwidth[$i] = $w if $w > $colwidth[$i];
-		$firstr = $r unless defined $firstr;
+		if (!defined $firstr) {
+			$firstr = $r;
+			$firstset = "test-results/$prefixes{$d}$t.times";
+		}
 	}
 }
 my $totalwidth = 3*@dirs+$descrlen;
@@ -169,9 +188,15 @@ sub have_slash {
 	my $firstr;
 	for my $i (0..$#dirs) {
 		my $d = $dirs[$i];
-		my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
-		printf "   %-$colwidth[$i]s", format_times($r,$u,$s,$firstr);
+		my ($r,$u,$s,$sign) = @{$times{$prefixes{$d}.$t}};
+		printf "   %-$colwidth[$i]s", format_times($r,$u,$s,$sign,$firstr);
 		$firstr = $r unless defined $firstr;
 	}
 	print "\n";
 }
+
+if ($any_sign_printed) {
+	print "-"x$totalwidth, "\n";
+	print "Significance hints:  '.' 0.1  '*' 0.05  '**' 0.01  '***' 0.001\n"
+}
+
diff --git a/t/perf/t_test_score.sh b/t/perf/t_test_score.sh
new file mode 100755
index 0000000..32353d6
--- /dev/null
+++ b/t/perf/t_test_score.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+# If the user doesn't have R, we don't care
+
+command -v R >/dev/null || exit 0
+
+# Uses R to run a t-test on the hypothesis that the elapsed time
+# values in $1 are less than the ones in $2.
+
+pvalue=$(R --no-save --slave <<-EOF
+	a <- read.table("$1")
+	b <- read.table("$2")
+	tst <- t.test(a\$V1, b\$V1)
+	p <- tst\$p.value
+	if (p<0.001) print ("***") \
+	else if (p<0.01) print ("**") \
+	else if (p<0.05) print ("*") \
+	else if (p<0.1) print (".")
+EOF
+)
+
+pvalue=${pvalue##\[1\] \"}
+pvalue=${pvalue%%\"}
+echo "$pvalue"
-- 
1.7.10.rc0.230.g16d90

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]