[PATCH 3/3] intel_perf_counters: Add support for Sandybridge.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Mar 26, 2013 at 10:06:39PM -0700, Kenneth Graunke wrote:
> While the Sandybridge PRM doesn't have any documentation on the GPU's
> performance counters, a lot of information can be gleaned from the older
> Ironlake PRM.  Oddly, none of the information documented there actually
> appears to apply to Ironlake.  However, it apparently works just great
> on Sandybridge.
> 
> Since this information has all been publicly available on the internet
> for around three years, we can use it.
> 
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>

Merged, thanks for the patches.
-Daniel

> ---
>  tools/intel_perf_counters.c | 146 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 146 insertions(+)
> 
> diff --git a/tools/intel_perf_counters.c b/tools/intel_perf_counters.c
> index fd268b1..b528361 100644
> --- a/tools/intel_perf_counters.c
> +++ b/tools/intel_perf_counters.c
> @@ -22,9 +22,21 @@
>   *
>   * Authors:
>   *    Eric Anholt <eric at anholt.net>
> + *    Kenneth Graunke <kenneth at whitecape.org>
> + *
> + * While documentation for performance counters is suspiciously missing from the
> + * Sandybridge PRM, they were documented in Volume 1 Part 3 of the Ironlake PRM.
> + *
> + * A lot of the Ironlake PRM actually unintentionally documents Sandybridge
> + * due to mistakes made when updating the documentation for Gen6+.  Many of
> + * these mislabeled sections carried forward to the public documentation.
> + *
> + * The Ironlake PRMs have been publicly available since 2010 and are online at:
> + * https://01.org/linuxgraphics/documentation/2010-intel-core-processor-family
>   */
>  
>  #include <unistd.h>
> +#include <stdbool.h>
>  #include <stdlib.h>
>  #include <stdio.h>
>  #include <err.h>
> @@ -71,6 +83,60 @@ const char *gen5_counter_names[GEN5_COUNTER_COUNT] = {
>  	"cycles any EU is stalled for math",
>  };
>  
> +#define GEN6_COUNTER_COUNT 29
> +
> +/**
> + * Sandybridge: Counter Select = 001
> + * A0   A1   A2   A3   A4   TIMESTAMP RPT_ID
> + * A5   A6   A7   A8   A9   A10  A11  A12
> + * A13  A14  A15  A16  A17  A18  A19  A20
> + * A21  A22  A23  A24  A25  A26  A27  A28
> + */
> +const int gen6_counter_format = 1;
> +
> +/**
> + * Names for aggregating counters A0-A28.
> + *
> + * While the Ironlake PRM clearly documents that there are 29 counters (A0-A28),
> + * it only lists the names for 28 of them; one is missing.  However, careful
> + * examination reveals a pattern: there are five GS counters (Active, Stall,
> + * Core Stall, # threads loaded, and ready but not running time).  There are
> + * also five PS counters, in the same order.  But there are only four VS
> + * counters listed - the number of VS threads loaded is missing.  Presumably,
> + * it exists and is counter 5, and the rest are shifted over one place.
> + */
> +const char *gen6_counter_names[GEN6_COUNTER_COUNT] = {
> +	[0]  = "Aggregated Core Array Active",
> +	[1]  = "Aggregated Core Array Stalled",
> +	[2]  = "Vertex Shader Active Time",
> +	[3]  = "Vertex Shader Stall Time",
> +	[4]  = "Vertex Shader Stall Time - Core Stall",
> +	[5]  = "# VS threads loaded",
> +	[6]  = "Vertex Shader Ready but not running time",
> +	[7]  = "Geometry Shader Active Time",
> +	[8]  = "Geometry Shader Stall Time",
> +	[9]  = "Geometry Shader Stall Time - Core Stall",
> +	[10] = "# GS threads loaded",
> +	[11] = "Geometry Shader ready but not running Time",
> +	[12] = "Pixel Shader Active Time",
> +	[13] = "Pixel Shader Stall Time",
> +	[14] = "Pixel Shader Stall Time - Core Stall",
> +	[15] = "# PS threads loaded",
> +	[16] = "Pixel Shader ready but not running Time",
> +	[17] = "Early Z Test Pixels Passing",
> +	[18] = "Early Z Test Pixels Failing",
> +	[19] = "Early Stencil Test Pixels Passing",
> +	[20] = "Early Stencil Test Pixels Failing",
> +	[21] = "Pixel Kill Count",
> +	[22] = "Alpha Test Pixels Failed",
> +	[23] = "Post PS Stencil Pixels Failed",
> +	[24] = "Post PS Z buffer Pixels Failed",
> +	[25] = "Pixels/samples Written in the frame buffer",
> +	[26] = "GPU Busy",
> +	[27] = "CL active and not stalled",
> +	[28] = "SF active and stalled",
> +};
> +
>  int have_totals = 0;
>  uint32_t *totals;
>  uint32_t *last_counter;
> @@ -85,6 +151,20 @@ struct intel_batchbuffer *batch;
>  #define MI_COUNTER_ADDRESS_GTT	(1 << 0)
>  /* DW2: report ID */
>  
> +/**
> + * According to the Sandybridge PRM, Volume 1, Part 1, page 48,
> + * MI_REPORT_PERF_COUNT is now opcode 0x28.  The Ironlake PRM, Volume 1,
> + * Part 3 details how it works.
> + */
> +/* DW0 */
> +#define GEN6_MI_REPORT_PERF_COUNT (0x28 << 23)
> +/* DW1 and 2 are the same as above */
> +
> +/* OACONTROL exists on Gen6+ but is documented in the Ironlake PRM */
> +#define OACONTROL                       0x2360
> +# define OACONTROL_COUNTER_SELECT_SHIFT 2
> +# define PERFORMANCE_COUNTER_ENABLE     (1 << 0)
> +
>  static void
>  gen5_get_counters(void)
>  {
> @@ -124,6 +204,45 @@ gen5_get_counters(void)
>  	drm_intel_bo_unreference(stats_bo);
>  }
>  
> +static void
> +gen6_get_counters(void)
> +{
> +	int i;
> +	drm_intel_bo *stats_bo;
> +	uint32_t *stats_result;
> +
> +	/* Map from counter names to their index in the buffer object */
> +	static const int buffer_index[GEN6_COUNTER_COUNT] =
> +	{
> +		7,   6,  5,  4,  3,
> +		15, 14, 13, 12, 11, 10,  9,  8,
> +		23, 22, 21, 20, 19, 18, 17, 16,
> +		31, 30, 29, 28, 27, 26, 25, 24,
> +	};
> +
> +	stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096);
> +
> +	BEGIN_BATCH(3);
> +	OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2));
> +	OUT_RELOC(stats_bo,
> +		  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
> +		  MI_COUNTER_ADDRESS_GTT);
> +	OUT_BATCH(0);
> +	ADVANCE_BATCH();
> +
> +	intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER);
> +
> +	drm_intel_bo_map(stats_bo, 0);
> +	stats_result = stats_bo->virtual;
> +	for (i = 0; i < GEN6_COUNTER_COUNT; i++) {
> +		totals[i] += stats_result[buffer_index[i]] - last_counter[i];
> +		last_counter[i] = stats_result[buffer_index[i]];
> +	}
> +
> +	drm_intel_bo_unmap(stats_bo);
> +	drm_intel_bo_unreference(stats_bo);
> +}
> +
>  #define STATS_CHECK_FREQUENCY	100
>  #define STATS_REPORT_FREQUENCY	2
>  
> @@ -131,6 +250,7 @@ int
>  main(int argc, char **argv)
>  {
>  	uint32_t devid;
> +	int counter_format;
>  	int counter_count;
>  	const char **counter_name;
>  	void (*get_counters)(void);
> @@ -138,6 +258,7 @@ main(int argc, char **argv)
>  	char clear_screen[] = {0x1b, '[', 'H',
>  			       0x1b, '[', 'J',
>  			       0x0};
> +	bool oacontrol = true;
>  	int fd;
>  	int l;
>  
> @@ -152,10 +273,27 @@ main(int argc, char **argv)
>  		counter_name = gen5_counter_names;
>  		counter_count = GEN5_COUNTER_COUNT;
>  		get_counters = gen5_get_counters;
> +		oacontrol = false;
> +	} else if (IS_GEN6(devid)) {
> +		counter_name = gen6_counter_names;
> +		counter_count = GEN6_COUNTER_COUNT;
> +		counter_format = gen6_counter_format;
> +		get_counters = gen6_get_counters;
>  	} else {
>  		printf("This tool is not yet supported on your platform.\n");
>  		abort();
>  	}
> +
> +	if (oacontrol) {
> +		/* Forcewake */
> +		intel_register_access_init(intel_get_pci_device(), 0);
> +
> +		/* Enable performance counters */
> +		intel_register_write(OACONTROL,
> +			counter_format << OACONTROL_COUNTER_SELECT_SHIFT |
> +			PERFORMANCE_COUNTER_ENABLE);
> +	}
> +
>  	totals = calloc(counter_count, sizeof(uint32_t));
>  	last_counter = calloc(counter_count, sizeof(uint32_t));
>  
> @@ -180,6 +318,14 @@ main(int argc, char **argv)
>  		}
>  	}
>  
> +	if (oacontrol) {
> +		/* Disable performance counters */
> +		intel_register_write(OACONTROL, 0);
> +
> +		/* Forcewake */
> +		intel_register_access_fini();
> +	}
> +
>  	free(totals);
>  	free(last_counter);
>  
> -- 
> 1.8.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch


[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux