Adds functions to create a number of different batch buffers to perform several functions including: Batch buffer which will run for a long duration to provide a delay on a specified ring. Function to calibrate the delay batch buffer to run for a specified period of time. Function to create a batch buffer which writes timestamps to a buffer object. Function to compare timestamps allowing for wrapping of the values. v2: Moved code to intel_batchbuffer (Daniel Vetter) Addressed review comments from Daniele Ceraolo Spurio Signed-off-by: Derek Morton <derek.j.morton@xxxxxxxxx> --- lib/intel_batchbuffer.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++- lib/intel_batchbuffer.h | 14 ++ 2 files changed, 393 insertions(+), 5 deletions(-) diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c index 692521f..30e78c5 100644 --- a/lib/intel_batchbuffer.c +++ b/lib/intel_batchbuffer.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ #include <inttypes.h> @@ -30,6 +30,8 @@ #include <stdio.h> #include <string.h> #include <assert.h> +#include <stdint.h> +#include <time.h> #include "drm.h" #include "drmtest.h" @@ -42,6 +44,7 @@ #include "ioctl_wrappers.h" #include "media_spin.h" #include "gpgpu_fill.h" +#include "igt_gt.h" #include <i915_drm.h> @@ -817,3 +820,374 @@ igt_media_spinfunc_t igt_get_media_spinfunc(int devid) return spin; } + +#define SEC_TO_NSEC (1000 * 1000 * 1000) +#define DWORDS_TO_BYTES(x) ((x)*4) + +#define MI_STORE_REGISTER_MEM(LENGTH) ((0x024 << 23) | ((LENGTH - 2) & 0xff)) +#define MI_MATH(NrInst) ((0x01A << 23) | ((NrInst - 1) & 0x3f)) +#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2) +#define MI_COPY_MEM_MEM ((0x02E << 23) | (3)) + +#define ALU_LOAD(TO, FROM) ((0x080 << 20) | ((TO) << 10) | (FROM)) +#define ALU_SUB ( 0x101 << 20) +#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM)) + +#define TIMESTAMP_offset (0x358) /* Elapsed time from system start */ +#define CTX_TIMESTAMP_offset (0x3A8) /* Elapsed Time from context creation */ +#define ALU_GPU_R0_LSB_offset (0x600) +#define ALU_GPU_R0_MSB_offset (0x604) +#define ALU_GPU_R1_LSB_offset (0x608) +#define ALU_GPU_R1_MSB_offset (0x60C) +#define ALU_GPU_R2_LSB_offset (0x610) +#define ALU_GPU_R2_MSB_offset (0x614) + +#define ALU_R0_ENCODING (0x00) +#define ALU_R1_ENCODING (0x01) +#define ALU_SRCA_ENCODING (0x20) +#define ALU_SRCB_ENCODING (0x21) +#define ALU_ACCU_ENCODING (0x31) + +static int bb_address_size_dw(int fd) +{ + if (intel_gen(intel_get_drm_devid(fd)) >= 8) + return 2; + else + return 1; +} + +static uint32_t get_mmio_base(int ringid) +{ + switch (ringid) { + case I915_EXEC_RENDER: + return 0x02000; + case I915_EXEC_BSD: + case I915_EXEC_BSD | 1<<13: /* BSD1 */ + return 0x12000; + case I915_EXEC_BSD | 2<<13: /* BSD2 */ + return 0x1c000; + case I915_EXEC_BLT: + return 0x22000; + case I915_EXEC_VEBOX: + return 0x1A000; + default: + igt_assert_f(0, "Invalid ringid %d passed to get_mmio_base()\n", ringid); + } +} + +/** + * igt_batch_used + * @batch batchbuffer to get offset from + * + * This returns the number of bytes of the batchbuffer that have been used. + * e.g. The offset into the batchbuffer that the next OUT_BATCH would write to. + * + * Returns: + * The number of bytes of the batchbuffer that have been used. + */ +uint32_t igt_batch_used(struct intel_batchbuffer *batch) +{ + return batch->ptr - batch->buffer; +} + +/** + * igt_create_delay_bb: + * @fd: file descriptor for i915 driver instance + * @batch: Batch buffer to write to + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER + * loops: Number of times to loop + * dest: Buffer to use for saving the current loop count and timestamp. + * + * This creates a batch buffer which will iterate a loop a specified number + * of times. Intended for creating batch buffers which take an arbitarily + * long time to execute. This can be useful to keep a ring busy while + * constructing a test scenario. + * + * The dest buffer will have a number of Dwords written by the batch buffer + * when it runs. They are: + * DW0 & DW1 - These are loaded with the value of 'loops' and are decremented + * as the batch buffer executes. They will be 0 after the batch + * buffer completes if it finished succesfully. + * DW2 Timestamp - An indication of when the batch buffer ran allowing a + * comparison between batch buffers to show execution order. + * May wrap so igt_compare_timestamps() should be used to + * compare timestamps. + * The timestamp will wrap every few minutes. + * + */ +void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch, + int ringid, uint32_t loops, drm_intel_bo *dest) +{ + int addr_size_dw; + uint32_t mmio_base, jump_offset; + + /* CMD parser blocks reading TIMESTAMP register on gen 7.5 */ + igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8); + + addr_size_dw = bb_address_size_dw(fd); + mmio_base = get_mmio_base(ringid); + igt_assert(batch); + BEGIN_BATCH(32, 5); + + /* store current timestamp in DW2 */ + OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2)); + OUT_BATCH(mmio_base + TIMESTAMP_offset); + OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2)); + + /* Load R0 with loops */ + OUT_BATCH(MI_LOAD_REGISTER_IMM); + OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset); + OUT_BATCH(loops); + OUT_BATCH(MI_LOAD_REGISTER_IMM); + OUT_BATCH(mmio_base + ALU_GPU_R0_MSB_offset); + OUT_BATCH(0x00000000); + /* Load R1 with 1 */ + OUT_BATCH(MI_LOAD_REGISTER_IMM); + OUT_BATCH(mmio_base + ALU_GPU_R1_LSB_offset); + OUT_BATCH(0x00000001); + OUT_BATCH(MI_LOAD_REGISTER_IMM); + OUT_BATCH(mmio_base + ALU_GPU_R1_MSB_offset); + OUT_BATCH(0x00000000); + /* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */ + /* e.g. R0 -= 1 */ + jump_offset=igt_batch_used(batch); + OUT_BATCH(MI_MATH(4)); + OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING)); + OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING)); + OUT_BATCH(ALU_SUB); + OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING)); + /* Copy R0 to dest + * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address + * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed + * together. + * On Gen9+, and the other rings on Gen8 Compare address points to + * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1. + */ + OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2)); + OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset); + OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2)); + OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset); + OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1)); + /* Repeat until R0 == 0 */ + OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END); + OUT_BATCH(0x00000000); + OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1)); + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, jump_offset); + + /* Should never get here, but end if it happens */ + OUT_BATCH(MI_BATCH_BUFFER_END); + ADVANCE_BATCH(); +} + +/** + * igt_create_timestamp_bb: + * @fd: file descriptor for i915 driver instance + * @batch: Batch buffer to write to + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER + * dest: Buffer to use for saving the timestamps. + * load: Buffer to access. Set NULL if not required. + * write: If true and load is not NULL, will also write a timestamp to load + * buffer. If false and load is not NULL, will read from load buffer into dest. + * Intended for dependency checking. + * + * This creates a batch buffer which writes timestamps into a buffer object. + * If 'load' is non null, data is either written to 'load' or copied from 'load' + * depending on whether 'write' is set. + * + * The dest buffer will have a number of Dwords written by the batch buffer + * when it runs. They are: + * DW0 Reported timestamp - An indication of when the batch buffer ran allowing a + * comparison between batch buffers to show execution order. + * May wrap so igt_compare_timestamps() should be used to + * compare timestamps. + * The timestamp will wrap every few minutes. + * DW2 Value copied from DW0 of load if write == false + * + */ +void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int ringid, + drm_intel_bo *dest, drm_intel_bo *load, bool write) +{ + int addr_size_dw; + uint32_t mmio_base; + + /* CMD parser blocks reading TIMESTAMP register on gen 7.5 */ + igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8); + + addr_size_dw = bb_address_size_dw(fd); + mmio_base = get_mmio_base(ringid); + igt_assert(batch); + + BEGIN_BATCH(3, 1); + /* store current reported timestamp in DW0 */ + OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2)); + OUT_BATCH(mmio_base + TIMESTAMP_offset); + OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0)); + + ADVANCE_BATCH(); + + if(load != NULL) { + if(write) { + BEGIN_BATCH(3, 1); + OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2)); + OUT_BATCH(mmio_base + TIMESTAMP_offset); + OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0)); + ADVANCE_BATCH(); + } + else { + BEGIN_BATCH(3, 2); + OUT_BATCH(MI_COPY_MEM_MEM); + OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2)); + OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(0)); + ADVANCE_BATCH(); + } + } + + BEGIN_BATCH(1, 0); + OUT_BATCH(MI_BATCH_BUFFER_END); + ADVANCE_BATCH(); +} + +/** + * igt_create_noop_bb: + * @batch: Batch buffer to write to + * noops: Number of MI_NOOP instructions to add to the batch buffer. + * + * This creates a batch buffer with a specified number of MI_NOOP instructions. + */ +void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops) +{ + int loop; + + igt_assert(batch); + + BEGIN_BATCH(noops + 1, 0); + for(loop = 0; loop < noops; loop++) + OUT_BATCH(MI_NOOP); + OUT_BATCH(MI_BATCH_BUFFER_END); + ADVANCE_BATCH(); + +} + +/* Store calibrated values so they only need calculating once. + * Use intel_execution_engines array as list of supported rings + */ +static uint32_t *calibrated_ring_value = NULL; + +/** + * igt_calibrate_delay_bb: + * @fd: file descriptor for i915 driver instance + * @bufmgr: Buffer manager to be used for creation of batch buffers + * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER + * + * This calculates the value of loops that would need to be passed to + * igt_create_delay_bb() to create a delay of about 1 second on the specified + * ring. + * + * Returns: + * uint32_t to be passed to igt_create_delay_bb(). + */ +/* 0x100000 will run for about 0.6 - 0.8 seconds (dependant on ring) on BXT HW */ +#define CAL_SEED (0x100000) +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid) +{ + uint32_t buf[2]; + struct intel_batchbuffer *bb; + struct timespec start, end; + uint64_t duration; + uint64_t calibrated; + drm_intel_bo *target_bo; + int ring_index=0; + +/* igt_assert(ringid < 8); + if(calibrated_ring_value[ringid] != 0) + return calibrated_ring_value[ringid];*/ + + if(calibrated_ring_value == NULL) { + int count; + for(count = 0; intel_execution_engines[count].name != NULL; count++) {} + calibrated_ring_value = calloc(count, sizeof(uint32_t)); + igt_assert(calibrated_ring_value); + } + + /* Check if there is already a calibration value for this ring */ + while(intel_execution_engines[ring_index].name != NULL) { + if((intel_execution_engines[ring_index].exec_id | + intel_execution_engines[ring_index].flags) == ringid) { + if(calibrated_ring_value[ring_index] != 0) { + return calibrated_ring_value[ring_index]; + } + break; + } + ring_index++; + } + + target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ); + igt_assert(target_bo); + + /* Put some non zero values in the target bo */ + { + uint32_t data=0xffffffff; + drm_intel_bo_subdata(target_bo, 0, 4, &data); + } + + bb = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd)); + igt_assert(bb); + igt_create_delay_bb(fd, bb, ringid, CAL_SEED, target_bo); + + gem_quiescent_gpu(fd); + clock_gettime(CLOCK_MONOTONIC, &start); + intel_batchbuffer_flush_on_ring(bb, ringid); + /* This will not return until the bo has finished executing */ + drm_intel_bo_wait_rendering(target_bo); + clock_gettime(CLOCK_MONOTONIC, &end); + + drm_intel_bo_get_subdata(target_bo, 0, 4, (void*)buf); + + /* buf[0] in the target buffer should be 0 if the batch buffer completed */ + igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", buf[0]); + + duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * SEC_TO_NSEC) + + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec; + + calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration); + igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n", + duration / SEC_TO_NSEC, + (duration % SEC_TO_NSEC) / 100000); + drm_intel_bo_unreference(target_bo); + intel_batchbuffer_free(bb); + + /* Sanity check. If duration < 100ms, something has clearly gone wrong */ + igt_assert(duration > (SEC_TO_NSEC / 10)); + + igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > UINT32_MAX\n"); + + if(intel_execution_engines[ring_index].name != NULL) + calibrated_ring_value[ring_index] = (uint32_t)calibrated; + return (uint32_t)calibrated; +} + +/** + * igt_compare_timestamps: + * @ts1: timestamp 1 + * @ts2: timestamp 2 + * + * This compares two uint32_t timestamps. To handle wrapping it assumes the + * difference between the two timestamps is less than 1/4 the max elapsed time + * represented by the counters. + * It also assumes the timestamps are samples from the same counter. + * + * Returns: + * True if ts2 > ts1, allowing for counter wrapping, false otherwise. + */ + +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2) +{ + if (ts2 > ts1) + return true; + else if ((ts1 > 0x80000000) && (ts2 < 0x40000000)) + return true; /* Assuming timestamp counter wrapped */ + else + return false; +} diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h index 869747d..5b66fa3 100644 --- a/lib/intel_batchbuffer.h +++ b/lib/intel_batchbuffer.h @@ -323,4 +323,18 @@ typedef void (*igt_media_spinfunc_t)(struct intel_batchbuffer *batch, igt_media_spinfunc_t igt_get_media_spinfunc(int devid); +uint32_t igt_batch_used(struct intel_batchbuffer *batch); + +void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch, + int ringid, uint32_t loops, drm_intel_bo *dest); + +void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int ringid, + drm_intel_bo *dest, drm_intel_bo *load, bool write); + +void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops); + +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid); + +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2); + #endif -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx