[PATCH 2/2] aperf: Make data collection cpu hotplug proof

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Measuring boosting performance fails when some cores are offlined due to
the cores counting not being aware of that fact. Make it more robust and
print an "offline" label for the offlined cores instead of bailing out.

Also, simplify features checking and relocate them to main().

Signed-off-by: Borislav Petkov <borislav.petkov@xxxxxxx>
---
 utils/aperf.c |  102 ++++++++++++++++++++++++++++++---------------------------
 1 files changed, 54 insertions(+), 48 deletions(-)

diff --git a/utils/aperf.c b/utils/aperf.c
index 1d58d00..edc282d 100644
--- a/utils/aperf.c
+++ b/utils/aperf.c
@@ -59,7 +59,7 @@
 #define MSR_IA32_APERF 0x000000E8
 #define MSR_IA32_MPERF 0x000000E7
 
-#define LINE_LEN 10
+#define LINE_LEN 5
 
 struct avg_perf_cpu_info
 {
@@ -69,14 +69,14 @@ struct avg_perf_cpu_info
 	uint32_t is_valid:1;
 };
 
-static unsigned int count_cpus(void)
+static unsigned int count_max_cpus(void)
 {
 	FILE *fp;
 	char value[LINE_LEN];
 	unsigned int ret = 0;
 	unsigned int cpunr = 0;
 
-	fp = fopen("/proc/stat", "r");
+	fp = fopen("/sys/devices/system/cpu/present", "r");
 	if(!fp) {
 		printf("Couldn't count the number of CPUs (%s: %s), "
 			"assuming 1\n", "/proc/stat", strerror(errno));
@@ -86,12 +86,13 @@ static unsigned int count_cpus(void)
 	while (!feof(fp)) {
 		if (!fgets(value, LINE_LEN, fp))
 			continue;
+
 		value[LINE_LEN - 1] = '\0';
+
 		if (strlen(value) < (LINE_LEN - 2))
 			continue;
-		if (strstr(value, "cpu "))
-			continue;
-		if (sscanf(value, "cpu%d ", &cpunr) != 1)
+
+		if (sscanf(value, "0-%d", &cpunr) != 1)
 			continue;
 		if (cpunr > ret)
 			ret = cpunr;
@@ -153,7 +154,7 @@ static int get_aperf_mperf(unsigned int cpu, uint64_t *aperf, uint64_t *mperf)
 	retval = read_msr(cpu, MSR_IA32_APERF, (unsigned long long*)aperf);
 	if (retval < 0)
 		return retval;
-	
+
 	retval = read_msr(cpu, MSR_IA32_MPERF, (unsigned long long*)mperf);
 	if (retval < 0)
 		return retval;
@@ -237,35 +238,20 @@ static int get_measure_start_info(unsigned int cpu,
 	int ret;
 
 	cpu_info->is_valid = 0;
-		
-	ret = has_mperf_aperf_support(cpu);
-	if (ret < 0) {
-		fprintf(stderr, "Could not read cpuid, is the cpuid "
-			"driver loaded or compiled into the kernel?\n");
-		return ret;
-	} else if (ret == 0) {
-		fprintf(stderr, "CPU(s) do not support mperf/aperf MSR "
-			"registers\n");
-		return EXIT_FAILURE;
-	}
-	
-	if (cpufreq_get_hardware_limits(cpu, &min, &max)) {
-		fprintf(stderr, "Could not get max frequency (P0), a "
-			"cpufreq driver must be loaded?\n");
-		return EXIT_FAILURE;
-	} else
-		cpu_info->max_freq = max;
-	
+
+	if (cpufreq_get_hardware_limits(cpu, &min, &max))
+		return -EINVAL;
+
+	cpu_info->max_freq = max;
+
 	ret = get_aperf_mperf(cpu, &aperf, &mperf);
-	if (ret < 0) {
-		fprintf(stderr, "Could not read MSRs, is the msr driver loaded"
-			" or compiled into the kernel?\n");
-		return EXIT_FAILURE;
-	} else {
-		cpu_info->saved_aperf = aperf;
-		cpu_info->saved_mperf = mperf;
-	}
+	if (ret < 0)
+		return -EINVAL;
+
+	cpu_info->saved_aperf = aperf;
+	cpu_info->saved_mperf = mperf;
 	cpu_info->is_valid = 1;
+
 	return 0;
 }
 
@@ -280,7 +266,6 @@ static void print_cpu_stats(unsigned int cpu, unsigned long average,
 	printf("%.2u", c0_percent);
 }
 
-
 static int do_measuring_on_cpu(int sleep_time, int once, int cpu)
 {
 	int ret;
@@ -305,13 +290,16 @@ static int do_measuring_on_cpu(int sleep_time, int once, int cpu)
 
 		if (!cpu_info.is_valid)
 			continue;
-		
+
 		ret = get_aperf_mperf(cpu, &current_aperf, &current_mperf);
-		if (ret < 0)
-			return EXIT_FAILURE;
+		if (ret < 0) {
+			printf("\t[offline]\n");
+			continue;
+		}
+
 		mperf_diff = current_mperf - cpu_info.saved_mperf;
 		aperf_diff = current_aperf - cpu_info.saved_aperf;
-		
+
 		get_C_state_time(diff_time, mperf_diff,
 				 cpu_info.max_freq,
 				 &C0_time, &CX_time,
@@ -343,16 +331,16 @@ static int do_measure_all_cpus(int sleep_time, int once)
 	uint64_t current_aperf, current_mperf, mperf_diff, aperf_diff;
 	struct avg_perf_cpu_info *cpu_list;
 
-	cpus = count_cpus();
+	cpus = count_max_cpus();
 
 	cpu_list = (struct avg_perf_cpu_info*)
 		malloc(cpus * sizeof (struct avg_perf_cpu_info));
-	
+
 	for (cpu = 0; cpu < cpus; cpu++) {
 		ret = get_measure_start_info(cpu, &cpu_list[cpu]);
 		if (ret)
-			return ret;
-	}	
+			continue;
+	}
 
 	while(1) {
 		gettimeofday(&start_time, NULL);
@@ -367,13 +355,15 @@ static int do_measure_all_cpus(int sleep_time, int once)
 		       sizeof(struct timeval));
 
 		for (cpu = 0; cpu < cpus; cpu++) {
-			if (!cpu_list[cpu].is_valid)
-				continue;
 
 			ret = get_aperf_mperf(cpu, &current_aperf,
 					      &current_mperf);
-			if (ret < 0)
-				return EXIT_FAILURE;
+
+			if ((ret < 0) || !cpu_list[cpu].is_valid) {
+				printf("\t[offline]\n");
+				continue;
+			}
+
 			mperf_diff = current_mperf - cpu_list[cpu].saved_mperf;
 			aperf_diff = current_aperf - cpu_list[cpu].saved_aperf;
 
@@ -424,6 +414,8 @@ int main(int argc, char *argv[])
 {
 	int c, ret, cpu = -1;
 	int sleep_time = 1, once = 0;
+	const char *msr_path = "/dev/cpu/0/msr";
+
 	while ( (c = getopt_long(argc,argv,"c:ohi:",long_options,
 				 NULL)) != -1 ) {
 		switch ( c ) {
@@ -441,6 +433,18 @@ int main(int argc, char *argv[])
 			break;
 		}
 	}
+
+	if (!cpu_has_effective_freq()) {
+		fprintf(stderr, "CPU doesn't support APERF/MPERF!\n");
+		return EXIT_FAILURE;
+	}
+
+	ret = access(msr_path, F_OK);
+	if (ret < 0) {
+		fprintf(stderr, "Error reading %s, load/enable msr.ko\n", msr_path);
+		goto out;
+	}
+
 	printf("CPU\tAverage freq(KHz)\tTime in C0\tTime in"
 	       " Cx\tC0 percentage\n");
 
@@ -448,6 +452,8 @@ int main(int argc, char *argv[])
 		ret = do_measure_all_cpus(sleep_time, once);
 	else
 		ret = do_measuring_on_cpu(sleep_time, once, cpu);
-	return ret;		
+
+out:
+	return ret;
 }
 /******* Options parsing, main ********/
-- 
1.7.0

-- 
Regards/Gruss,
Boris.

--
Advanced Micro Devices, Inc.
Operating Systems Research Center
--
To unsubscribe from this list: send the line "unsubscribe cpufreq" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Devel]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Forum]     [Linux SCSI]

  Powered by Linux