These tests were initially reviewed/merged under the gem_softpin title. They use softpinning and userptr mechanism to share buffers between CPU and GPU. The userptr part was decoupled from them recently. Adding these tests under a different name to ensure buffered SVM usage testing. The only change made was to instantiate the drm fd in the main instead of every subtest. Cc: Michel Thierry <michel.thierry@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- tests/Makefile.sources | 1 + tests/gem_buffered_svm.c | 1051 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1052 insertions(+) create mode 100644 tests/gem_buffered_svm.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index df92586..e6ec6f8 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -17,6 +17,7 @@ TESTS_progs_M = \ drv_hangman \ gem_bad_reloc \ gem_basic \ + gem_buffered_svm \ gem_busy \ gem_caching \ gem_close_race \ diff --git a/tests/gem_buffered_svm.c b/tests/gem_buffered_svm.c new file mode 100644 index 0000000..90e63c4 --- /dev/null +++ b/tests/gem_buffered_svm.c @@ -0,0 +1,1051 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Vinay Belgaumkar <vinay.belgaumkar@xxxxxxxxx> + * Thomas Daniel <thomas.daniel@xxxxxxxxx> + * + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <malloc.h> +#include "drm.h" +#include "ioctl_wrappers.h" +#include "drmtest.h" +#include "intel_chipset.h" +#include "intel_io.h" +#include "i915_drm.h" +#include <assert.h> +#include <sys/wait.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include "igt_kms.h" +#include <inttypes.h> +#include <sys/types.h> +#include <sys/stat.h> +#include "igt.h" + +#define BO_SIZE 4096 +#define MULTIPAGE_BO_SIZE 2 * BO_SIZE +#define STORE_BATCH_BUFFER_SIZE 4 +#define EXEC_OBJECT_PINNED (1<<4) +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) +#define SHARED_BUFFER_SIZE 4096 + +typedef struct drm_i915_gem_userptr i915_gem_userptr; + +static uint32_t init_userptr(int fd, i915_gem_userptr *, void *ptr, uint64_t size); +static void *create_mem_buffer(uint64_t size); +static int gem_call_userptr_ioctl(int fd, i915_gem_userptr *userptr); +static void gem_pin_userptr_test(int fd); +static void gem_pin_bo_test(int fd); +static void gem_pin_invalid_vma_test(int fd, bool test_decouple_flags, bool test_canonical_offset); +static void gem_pin_overlap_test(int fd); +static void gem_pin_high_address_test(int fd); + +#define NO_PPGTT 0 +#define ALIASING_PPGTT 1 +#define FULL_32_BIT_PPGTT 2 +#define FULL_48_BIT_PPGTT 3 +/* uses_full_ppgtt + * Finds supported PPGTT details. + * @fd DRM fd + * @min can be + * 0 - No PPGTT + * 1 - Aliasing PPGTT + * 2 - Full PPGTT (32b) + * 3 - Full PPGTT (48b) + * RETURNS true/false if min support is present +*/ +static bool uses_full_ppgtt(int fd, int min) +{ + struct drm_i915_getparam gp; + int val = 0; + + memset(&gp, 0, sizeof(gp)); + gp.param = 18; /* HAS_ALIASING_PPGTT */ + gp.value = &val; + + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return 0; + + errno = 0; + return val >= min; +} + +/* gem_call_userptr_ioctl + * Helper to call ioctl - TODO: move to lib + * @fd - drm fd + * @userptr - pointer to initialised userptr + * RETURNS status of ioctl call +*/ +static int gem_call_userptr_ioctl(int fd, i915_gem_userptr *userptr) +{ + int ret; + + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_USERPTR, userptr); + + if (ret) + ret = errno; + + return ret; +} + +/* init_userptr + * Helper that inits userptr an returns handle + * @fd - drm fd + * @userptr - pointer to empty userptr + * @ptr - buffer to be shared + * @size - size of buffer + * @ro - read only flag + * RETURNS handle to shared buffer +*/ +static uint32_t init_userptr(int fd, i915_gem_userptr *userptr, void *ptr, + uint64_t size) +{ + int ret; + + memset((void*)userptr, 0, sizeof(i915_gem_userptr)); + + userptr->user_ptr = (unsigned long)ptr; /* Need the cast to overcome compiler warning */ + userptr->user_size = size; + userptr->flags = 0; /* use synchronized operation */ + + ret = gem_call_userptr_ioctl(fd, userptr); + igt_assert_eq(ret, 0); + + return userptr->handle; +} + +/* create_mem_buffer + * Creates a 4K aligned CPU buffer + * @size - size of buffer + * RETURNS pointer to buffer of @size +*/ +static void *create_mem_buffer(uint64_t size) +{ + void *addr; + int ret; + + ret = posix_memalign(&addr, 4096, size); + igt_assert(ret == 0); + + return addr; +} + +/* setup_exec_obj + * populate exec object + * @exec - exec object + * @handle - handle to gem buffer + * @flags - any flags + * @offset - requested VMA +*/ +static void setup_exec_obj(struct drm_i915_gem_exec_object2 *exec, + uint32_t handle, uint32_t flags, + uint64_t offset) +{ + memset(exec, 0, sizeof(struct drm_i915_gem_exec_object2)); + exec->handle = handle; + exec->flags = flags; + exec->offset = offset; +} + +/* gen8_canonical_addr + * Used to convert any address into canonical form, i.e. [63:48] == [47]. + * Based on kernel's sign_extend64 implementation. + * @address - a virtual address +*/ +#define GEN8_HIGH_ADDRESS_BIT 47 +static uint64_t gen8_canonical_addr(uint64_t address) +{ + __u8 shift = 63 - GEN8_HIGH_ADDRESS_BIT; + return (__s64)(address << shift) >> shift; +} + +/* gem_store_data_svm + * populate batch buffer with MI_STORE_DWORD_IMM command + * @fd: drm file descriptor + * @cmd_buf: batch buffer + * @vaddr: destination Virtual address + * @data: data to be store at destination + * @end: whether to end batch buffer or not +*/ +static int gem_store_data_svm(int fd, uint32_t *cmd_buf, uint64_t vaddr, + uint32_t data, bool end) +{ + int i = 0; + + cmd_buf[i++] = MI_STORE_DWORD_IMM; + cmd_buf[i++] = vaddr & 0xFFFFFFFC; + cmd_buf[i++] = (vaddr >> 32) & 0xFFFF; /* bits 32:47 */ + + cmd_buf[i++] = data; + if (end) { + cmd_buf[i++] = MI_BATCH_BUFFER_END; + cmd_buf[i++] = 0; + } + + return(i * sizeof(uint32_t)); +} + +/* gem_store_data + * populate batch buffer with MI_STORE_DWORD_IMM command + * This one fills up reloc buffer as well + * @fd: drm file descriptor + * @cmd_buf: batch buffer + * @data: data to be store at destination + * @reloc - relocation entry + * @end: whether to end batch buffer or not +*/ +static int gem_store_data(int fd, uint32_t *cmd_buf, + uint32_t handle, uint32_t data, + struct drm_i915_gem_relocation_entry *reloc, + bool end) +{ + int i = 0; + + cmd_buf[i++] = MI_STORE_DWORD_IMM; + cmd_buf[i++] = 0; /* lower 31 bits of 48 bit address - 0 reloc needed */ + cmd_buf[i++] = 0; /* upper 15 bits of 48 bit address - 0 reloc needed */ + reloc->offset = 1 * sizeof(uint32_t); + reloc->delta = 0; + reloc->target_handle = handle; + reloc->read_domains = I915_GEM_DOMAIN_RENDER; + reloc->write_domain = I915_GEM_DOMAIN_RENDER; + reloc->presumed_offset = 0; + cmd_buf[i++] = data; + if (end) { + cmd_buf[i++] = MI_BATCH_BUFFER_END; + cmd_buf[i++] = 0; + } + + return (i * sizeof(uint32_t)); +} + +/* setup_execbuffer + * helper for buffer execution + * @execbuf - pointer to execbuffer + * @exec_object - pointer to exec object2 struct + * @ring - ring to be used + * @buffer_count - how manu buffers to submit + * @batch_length - length of batch buffer +*/ +static void setup_execbuffer(struct drm_i915_gem_execbuffer2 *execbuf, + struct drm_i915_gem_exec_object2 *exec_object, + int ring, int buffer_count, int batch_length) +{ + execbuf->buffers_ptr = (unsigned long)exec_object; + execbuf->buffer_count = buffer_count; + execbuf->batch_start_offset = 0; + execbuf->batch_len = batch_length; + execbuf->cliprects_ptr = 0; + execbuf->num_cliprects = 0; + execbuf->DR1 = 0; + execbuf->DR4 = 0; + execbuf->flags = ring; + i915_execbuffer2_set_context_id(*execbuf, 0); + execbuf->rsvd2 = 0; +} + +/* submit_and_sync + * Helper function for exec and sync functions + * @fd - drm fd + * @execbuf - pointer to execbuffer + * @batch_buf_handle - batch buffer handle +*/ +static void submit_and_sync(int fd, struct drm_i915_gem_execbuffer2 *execbuf, + uint32_t batch_buf_handle) +{ + gem_execbuf(fd, execbuf); + gem_sync(fd, batch_buf_handle); +} + +/* gem_userptr_sync + * helper for syncing to CPU domain - copy/paste from userblit + * @fd - drm fd + * @handle - buffer handle to sync +*/ +static void gem_userptr_sync(int fd, uint32_t handle) +{ + gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); +} + + +/* gem_pin_userptr_test + * This test will create a shared buffer, and create a command + * for GPU to write data in it + * CPU will read and make sure expected value is obtained + * Malloc a 4K buffer + * Share buffer with with GPU by using userptr ioctl + * Create batch buffer to write DATA to first dword of buffer + * Use 0x1000 address as destination address in batch buffer + * Set EXEC_OBJECT_PINNED flag in exec object + * Set 'offset' in exec object to 0x1000 + * Submit execbuffer + * Verify value of first DWORD in shared buffer matches DATA +*/ +static void gem_pin_userptr_test(int fd) +{ + i915_gem_userptr userptr; + uint32_t *shared_buffer; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec_object2[2]; + uint32_t batch_buffer[STORE_BATCH_BUFFER_SIZE + 2]; + uint32_t batch_buf_handle, shared_buf_handle; + int ring, len; + const uint32_t data = 0x12345678; + uint64_t pinning_offset = 0x1000; + + igt_require(uses_full_ppgtt(fd, ALIASING_PPGTT)); + batch_buf_handle = gem_create(fd, BO_SIZE); + + /* create cpu buffer */ + shared_buffer = create_mem_buffer(BO_SIZE); + + /* share with GPU */ + shared_buf_handle = init_userptr(fd, &userptr, shared_buffer, + BO_SIZE); + + /* create command buffer with write command */ + len = gem_store_data_svm(fd, batch_buffer, pinning_offset, data, true); + gem_write(fd, batch_buf_handle, 0, batch_buffer, len); + + /* submit command buffer */ + setup_exec_obj(&exec_object2[0], shared_buf_handle, + EXEC_OBJECT_PINNED, pinning_offset); + setup_exec_obj(&exec_object2[1], batch_buf_handle, 0, 0); + + ring = I915_EXEC_RENDER; + + setup_execbuffer(&execbuf, exec_object2, ring, 2, len); + submit_and_sync(fd, &execbuf, batch_buf_handle); + gem_userptr_sync(fd, shared_buf_handle); + + /* Check if driver pinned the buffer as requested */ + igt_fail_on_f(exec_object2[0].offset != pinning_offset, + "\nFailed to pin at requested offset"); + /* check on CPU to see if value changes */ + igt_fail_on_f(shared_buffer[0] != data, + "\nCPU read does not match GPU write,\ + expected: 0x%x, got: 0x%x\n", + data, shared_buffer[0]); + + gem_close(fd, batch_buf_handle); + gem_close(fd, shared_buf_handle); + free(shared_buffer); +} + +/* gem_pin_bo + * This test will test softpinning of a gem buffer object + * Malloc a 4K buffer + * Create batch buffer to write DATA to first dword of buffer + * Use 0x1000 address as destination address in batch buffer + * Set EXEC_OBJECT_PINNED flag in exec object + * Set 'offset' in exec object to 0x1000 + * Submit execbuffer + * Verify value pinned offset matches the request +*/ +static void gem_pin_bo_test(int fd) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec_object2[2]; + uint32_t batch_buffer[STORE_BATCH_BUFFER_SIZE + 2]; + uint32_t batch_buf_handle, unshared_buf_handle; + struct drm_i915_gem_relocation_entry reloc[4]; + int ring, len; + uint32_t value; + const uint32_t data = 0x12345678; + uint64_t pinning_offset = 0x1000; + + igt_require(uses_full_ppgtt(fd, ALIASING_PPGTT)); + + batch_buf_handle = gem_create(fd, BO_SIZE); + + /* create gem buffer */ + unshared_buf_handle = gem_create(fd, BO_SIZE); + + /* create command buffer with write command */ + len = gem_store_data(fd, batch_buffer, unshared_buf_handle, data, + reloc, true); + gem_write(fd, batch_buf_handle, 0, batch_buffer, len); + + /* submit command buffer */ + setup_exec_obj(&exec_object2[0], unshared_buf_handle, + EXEC_OBJECT_PINNED, pinning_offset); + setup_exec_obj(&exec_object2[1], batch_buf_handle, 0, 0); + exec_object2[1].relocation_count = 1; + exec_object2[1].relocs_ptr = (unsigned long)reloc; + + ring = I915_EXEC_RENDER; + + setup_execbuffer(&execbuf, exec_object2, ring, 2, len); + submit_and_sync(fd, &execbuf, batch_buf_handle); + + /* Check if driver pinned the buffer as requested */ + igt_fail_on_f(exec_object2[0].offset != pinning_offset, + "\nFailed to pin at requested offset"); + gem_read(fd, unshared_buf_handle, 0, (void*)&value, 4); + igt_assert(value == data); + + gem_close(fd, batch_buf_handle); + gem_close(fd, unshared_buf_handle); +} + + +/* gem_multiple_process_test + * Run basic test simultaneously with multiple processes + * This will test pinning same VA separately in each process + + * fork(); + * Execute basic test in parent/child processes +*/ +#define MAX_NUM_PROCESSES 10 + +static void gem_multiple_process_test(int fd) +{ + igt_require(uses_full_ppgtt(fd, ALIASING_PPGTT)); + + igt_fork(child, MAX_NUM_PROCESSES) { + gem_pin_userptr_test(fd); + } + igt_waitchildren(); + +} + + +/* gem_repin_test + * This test tries to repin a buffer at a previously pinned vma + * from a different execbuf. + * Malloc a 4K buffer + * Share buffer with with GPU by using userptr ioctl + * Create batch buffer to write DATA to first dword of buffer + * Use 0x1000 address as destination address in batch buffer + * Set EXEC_OBJECT_PINNED flag in exec object + * Set 'offset' in exec object to 0x1000 VMA + * Submit execbuffer + * Verify value of first DWORD in shared buffer matches DATA + + * Create second shared buffer + * Follow all steps above + * Execpt, for offset, use VMA of first buffer above + * Submit execbuffer + * Verify value of first DWORD in second shared buffer matches DATA +*/ +static void gem_repin_test(int fd) +{ + i915_gem_userptr userptr; + i915_gem_userptr userptr1; + uint32_t *shared_buffer; + uint32_t *shared_buffer1; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec_object2[2]; + uint32_t batch_buffer[STORE_BATCH_BUFFER_SIZE + 2]; + uint32_t batch_buf_handle, shared_buf_handle, shared_buf_handle1; + int ring, len; + const uint32_t data = 0x12345678; + uint64_t pinning_offset = 0x1000; + + /* Create gem object */ + igt_require(uses_full_ppgtt(fd, ALIASING_PPGTT)); + + batch_buf_handle = gem_create(fd, BO_SIZE); + + /* create cpu buffer, set first elements to 0x0 */ + shared_buffer = create_mem_buffer(BO_SIZE); + shared_buffer1 = create_mem_buffer(BO_SIZE); + shared_buffer[0] = 0x0; + shared_buffer1[0] = 0x0; + + /* share with GPU and get handles */ + shared_buf_handle = init_userptr(fd, &userptr, shared_buffer, + BO_SIZE); + shared_buf_handle1 = init_userptr(fd, &userptr1, shared_buffer1, + BO_SIZE); + + /* create command buffer with write command */ + len = gem_store_data_svm(fd, batch_buffer, pinning_offset, data, true); + gem_write(fd, batch_buf_handle, 0, batch_buffer, len); + + /* submit command buffer */ + setup_exec_obj(&exec_object2[0], shared_buf_handle, + EXEC_OBJECT_PINNED, pinning_offset); + setup_exec_obj(&exec_object2[1], batch_buf_handle, 0, 0); + + ring = I915_EXEC_RENDER; + + setup_execbuffer(&execbuf, exec_object2, ring, 2, len); + submit_and_sync(fd, &execbuf, batch_buf_handle); + gem_userptr_sync(fd, shared_buf_handle); + + igt_assert(exec_object2[0].offset == pinning_offset); + igt_assert(*shared_buffer == data); + + /* Second buffer */ + /* create command buffer with write command */ + pinning_offset = exec_object2[0].offset; + len = gem_store_data_svm(fd, batch_buffer, pinning_offset, data, true); + gem_write(fd, batch_buf_handle, 0, batch_buffer, len); + + /* submit command buffer */ + /* Pin at shared_buffer, not shared_buffer1 */ + /* We are requesting address where another buffer was pinned previously */ + setup_exec_obj(&exec_object2[0], shared_buf_handle1, + EXEC_OBJECT_PINNED, pinning_offset); + setup_exec_obj(&exec_object2[1], batch_buf_handle, 0, 0); + + ring = I915_EXEC_RENDER; + + setup_execbuffer(&execbuf, exec_object2, ring, 2, len); + submit_and_sync(fd, &execbuf, batch_buf_handle); + gem_userptr_sync(fd, shared_buf_handle1); + + igt_assert(exec_object2[0].offset == pinning_offset); + igt_assert(*shared_buffer1 == data); + + gem_close(fd, batch_buf_handle); + gem_close(fd, shared_buf_handle); + + free(shared_buffer); + free(shared_buffer1); +} + + +/* gem_repin_overlap_test + * This test will attempt to pin two buffers at the same VMA as part of the same + execbuffer object + + * Malloc a 4K buffer + * Share buffer with with GPU by using userptr ioctl + * Create second shared buffer + * Create batch buffer to write DATA to first dword of each buffer + * Use same virtual address as destination addresses in batch buffer + * Set EXEC_OBJECT_PINNED flag in both exec objects + * Set 'offset' in both exec objects to same VMA + * Submit execbuffer + * Command should return EINVAL, since we are trying to pin to same VMA +*/ +static void gem_pin_overlap_test(int fd) +{ + i915_gem_userptr userptr; + i915_gem_userptr userptr1; + int ret; + uint32_t *shared_buffer; + uint32_t *shared_buffer1; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec_object2[3]; + uint32_t shared_buf_handle, shared_buf_handle1; + int ring, len; + uint64_t pinning_offset = 0x1000; + + igt_require(uses_full_ppgtt(fd, ALIASING_PPGTT)); + + shared_buffer = create_mem_buffer(BO_SIZE); + shared_buffer1 = create_mem_buffer(BO_SIZE * 2); + + /* share with GPU */ + shared_buf_handle = init_userptr(fd, &userptr, shared_buffer, + BO_SIZE); + shared_buf_handle1 = init_userptr(fd, &userptr1, shared_buffer1, + BO_SIZE * 2); + + /* submit command buffer */ + setup_exec_obj(&exec_object2[0], shared_buf_handle, + EXEC_OBJECT_PINNED, pinning_offset); + setup_exec_obj(&exec_object2[1], shared_buf_handle1, + EXEC_OBJECT_PINNED, pinning_offset); + + ring = I915_EXEC_RENDER; + + setup_execbuffer(&execbuf, exec_object2, ring, 2, len); + + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); + + /* expect to fail */ + igt_assert_neq(ret, 0); + igt_assert(errno == EINVAL); + + free(shared_buffer); + free(shared_buffer1); +} + +/* gem_softpin_stress_test + * Stress test which creates 10K buffers and shares with GPU + * Create 100K uint32 buffers of size 4K each + * Share with GPU using userptr ioctl + * Create batch buffer to write DATA in first element of each buffer + * Pin each buffer to varying addresses starting from 0x800000000000 going below + * (requires offsets in canonical form) + * Execute Batch Buffer on Blit ring STRESS_NUM_LOOPS times + * Validate every buffer has DATA in first element + * Rinse and Repeat on Render ring +*/ +#define STRESS_NUM_BUFFERS 100000 +#define STRESS_NUM_LOOPS 100 +#define STRESS_STORE_COMMANDS 4 * STRESS_NUM_BUFFERS +#define STRESS_START_ADDRESS 0x800000000000 +static void gem_softpin_stress_test(int fd) +{ + i915_gem_userptr userptr; + uint32_t **shared_buffer; + uint32_t *shared_handle; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 *exec_object2; + uint32_t *batch_buffer; + uint32_t batch_buf_handle; + int ring, len; + int buf, loop; + uint64_t pinning_offset = STRESS_START_ADDRESS; + + igt_require(uses_full_ppgtt(fd, FULL_48_BIT_PPGTT)); + + /* Allocate blobs for all data structures */ + shared_handle = calloc(STRESS_NUM_BUFFERS, sizeof(uint32_t)); + shared_buffer = calloc(STRESS_NUM_BUFFERS, sizeof(uint32_t *)); + exec_object2 = calloc(STRESS_NUM_BUFFERS + 1, + sizeof(struct drm_i915_gem_exec_object2)); + /* 4 dwords per buffer + 2 for the end of batchbuffer */ + batch_buffer = calloc(STRESS_STORE_COMMANDS + 2, sizeof(uint32_t)); + batch_buf_handle = gem_create(fd, (STRESS_STORE_COMMANDS + 2)*4); + + /* create command buffer with write commands */ + len = 0; + for(buf = 0; buf < STRESS_NUM_BUFFERS; buf++) { + shared_buffer[buf] = create_mem_buffer(BO_SIZE); + *shared_buffer[buf] = 0xFFFFFFFF; + + /* share with GPU */ + shared_handle[buf] = init_userptr(fd, &userptr, + shared_buffer[buf], + BO_SIZE); + + setup_exec_obj(&exec_object2[buf], shared_handle[buf], + EXEC_OBJECT_PINNED | + EXEC_OBJECT_SUPPORTS_48B_ADDRESS, + gen8_canonical_addr(pinning_offset)); + len += gem_store_data_svm(fd, batch_buffer + (len/4), + gen8_canonical_addr(pinning_offset), + buf, (buf == STRESS_NUM_BUFFERS-1)? \ + true:false); + + /* decremental 4K aligned address */ + pinning_offset -= ALIGN(BO_SIZE, 4096); + } + + /* setup command buffer */ + gem_write(fd, batch_buf_handle, 0, batch_buffer, len); + setup_exec_obj(&exec_object2[STRESS_NUM_BUFFERS], batch_buf_handle, + 0, 0); + + /* We want to run this on BLT ring if possible */ + if (HAS_BLT_RING(intel_get_drm_devid(fd))) { + ring = I915_EXEC_BLT; + + setup_execbuffer(&execbuf, exec_object2, ring, + STRESS_NUM_BUFFERS + 1, len); + + for (loop = 0; loop < STRESS_NUM_LOOPS; loop++) { + submit_and_sync(fd, &execbuf, batch_buf_handle); + /* Set pinning offset back to original value */ + pinning_offset = STRESS_START_ADDRESS; + for(buf = 0; buf < STRESS_NUM_BUFFERS; buf++) { + gem_userptr_sync(fd, shared_handle[buf]); + igt_assert(exec_object2[buf].offset == + gen8_canonical_addr(pinning_offset)); + igt_fail_on_f(*shared_buffer[buf] != buf, \ + "Mismatch in buffer %d, iteration %d: 0x%08X\n", \ + buf, loop, *shared_buffer[buf]); + pinning_offset -= ALIGN(BO_SIZE, 4096); + } + /* Reset the buffer entries for next iteration */ + for(buf = 0; buf < STRESS_NUM_BUFFERS; buf++) { + *shared_buffer[buf] = 0xFFFFFFFF; + } + } + } + + /* Now Render Ring */ + ring = I915_EXEC_RENDER; + setup_execbuffer(&execbuf, exec_object2, ring, + STRESS_NUM_BUFFERS + 1, len); + for (loop = 0; loop < STRESS_NUM_LOOPS; loop++) { + submit_and_sync(fd, &execbuf, batch_buf_handle); + pinning_offset = STRESS_START_ADDRESS; + for(buf = 0; buf < STRESS_NUM_BUFFERS; buf++) { + gem_userptr_sync(fd, shared_handle[buf]); + igt_assert(exec_object2[buf].offset == + gen8_canonical_addr(pinning_offset)); + igt_fail_on_f(*shared_buffer[buf] != buf, \ + "Mismatch in buffer %d, \ + iteration %d: 0x%08X\n", buf, loop, *shared_buffer[buf]); + pinning_offset -= ALIGN(BO_SIZE, 4096); + } + /* Reset the buffer entries for next iteration */ + for(buf = 0; buf < STRESS_NUM_BUFFERS; buf++) { + *shared_buffer[buf] = 0xFFFFFFFF; + } + } + + for(buf = 0; buf < STRESS_NUM_BUFFERS; buf++) { + gem_close(fd, shared_handle[buf]); + free(shared_buffer[buf]); + } + gem_close(fd, batch_buf_handle); + + free(shared_handle); + free(shared_buffer); + free(exec_object2); + free(batch_buffer); +} + +/* gem_write_multipage_buffer + * Create a buffer spanning multiple pages, and share with GPU. + * Write to every element of the buffer + * and verify correct contents. + + * Create 8K buffer + * Share with GPU using userptr ioctl + * Create batch buffer to write DATA in all elements of buffer + * Execute Batch Buffer + * Validate every element has DATA +*/ + +#define DWORD_SIZE sizeof(uint32_t) +#define BB_SIZE ((MULTIPAGE_BO_SIZE / DWORD_SIZE) * STORE_BATCH_BUFFER_SIZE) + 2 +#define NUM_DWORDS (MULTIPAGE_BO_SIZE/sizeof(uint32_t)) +static void gem_write_multipage_buffer_test(int fd) +{ + i915_gem_userptr userptr; + uint32_t *shared_buffer; + uint32_t shared_handle; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec_object2[2]; + uint32_t batch_buffer[BB_SIZE]; + uint32_t batch_buf_handle; + int ring, len, j; + uint64_t pinning_offset=0x1000; + uint64_t vaddr; + + igt_require(uses_full_ppgtt(fd, ALIASING_PPGTT)); + + batch_buf_handle = gem_create(fd, sizeof(batch_buffer)); + shared_buffer = create_mem_buffer(MULTIPAGE_BO_SIZE); + + len = 0; + memset(batch_buffer, 0, sizeof(batch_buffer)); + memset(shared_buffer, 0, MULTIPAGE_BO_SIZE); + + /* share with GPU */ + shared_handle = init_userptr(fd, &userptr, shared_buffer, + MULTIPAGE_BO_SIZE); + setup_exec_obj(&exec_object2[0], shared_handle, + EXEC_OBJECT_PINNED, pinning_offset); + + /* create command buffer with write commands */ + vaddr = pinning_offset; + for(j=0; j< NUM_DWORDS; j++) { + len += gem_store_data_svm(fd, batch_buffer + (len/4), vaddr, + j, + (j == NUM_DWORDS - 1) ? true:false); + vaddr += sizeof(shared_buffer[0]); /* 4 bytes */ + } + + gem_write(fd, batch_buf_handle, 0, batch_buffer, len); + + /* submit command buffer */ + setup_exec_obj(&exec_object2[1], batch_buf_handle, 0, 0); + + ring = I915_EXEC_RENDER; + setup_execbuffer(&execbuf, exec_object2, ring, 2, len); + submit_and_sync(fd, &execbuf, batch_buf_handle); + gem_userptr_sync(fd, shared_handle); + + igt_assert(exec_object2[0].offset == pinning_offset); + for(j = 0; j < (MULTIPAGE_BO_SIZE/sizeof(uint32_t)); j++) { + igt_fail_on_f(shared_buffer[j] != j, + "Mismatch in index %d: 0x%08X\n", j, shared_buffer[j]); + } + + gem_close(fd, batch_buf_handle); + gem_close(fd, shared_handle); + + free(shared_buffer); +} + +/* gem_pin_invalid_vma_test + * This test will request to pin a shared buffer to an invalid + * VMA > 48-bit address if system supports 48B PPGTT; it also + * will test that any attempt of using a 48-bit address requires + * the SUPPORTS_48B_ADDRESS flag, and that 48-bit address need to be + * in canonical form (bits [63:48] == [47]). + * If system supports 32B PPGTT, it will test the equivalent invalid VMA + * Create shared buffer of size 4K + * Try and Pin object to invalid address +*/ +static void gem_pin_invalid_vma_test(int fd, bool test_decouple_flags, + bool test_canonical_offset) +{ + i915_gem_userptr userptr; + int ret; + uint32_t *shared_buffer; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec_object2[1]; + uint32_t shared_buf_handle; + int ring; + uint64_t invalid_address_for_48b = 0x9000000000000; /* 52 bit address */ + uint64_t noncanonical_address_for_48b = 0xFF0000000000; /* 48 bit address in noncanonical form */ + uint64_t invalid_address_for_32b = 0x900000000; /* 36 bit address */ + + igt_require(uses_full_ppgtt(fd, FULL_48_BIT_PPGTT) || + uses_full_ppgtt(fd, FULL_32_BIT_PPGTT)); + + shared_buffer = create_mem_buffer(BO_SIZE); + *shared_buffer = 0xFFFFFFFF; + + /* share with GPU */ + shared_buf_handle = init_userptr(fd, &userptr, shared_buffer, BO_SIZE); + + if (uses_full_ppgtt(fd, FULL_48_BIT_PPGTT) && test_canonical_offset) { + setup_exec_obj(&exec_object2[0], shared_buf_handle, + EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS, + noncanonical_address_for_48b); + } else if (uses_full_ppgtt(fd, FULL_48_BIT_PPGTT) && !test_decouple_flags) { + setup_exec_obj(&exec_object2[0], shared_buf_handle, + EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS, + invalid_address_for_48b); + } else { + /* This also fails in 48b without 48B_ADDRESS support flag */ + setup_exec_obj(&exec_object2[0], shared_buf_handle, + EXEC_OBJECT_PINNED, invalid_address_for_32b); + } + + ring = I915_EXEC_RENDER; + + setup_execbuffer(&execbuf, exec_object2, ring, 1, 0); + + /* Expect execbuf to fail */ + ret = drmIoctl(fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf); + + igt_assert(errno == EINVAL); + igt_assert_neq(ret, 0); + + gem_close(fd, shared_buf_handle); + free(shared_buffer); +} + + +/* gem_pin_high_address_test + * This test will create a shared buffer, and create a command + * for GPU to write data in it. It will attempt to pin the buffer at address > 32 bits. + * CPU will read and make sure expected value is obtained + + * Malloc a 4K buffer + * Share buffer with with GPU by using userptr ioctl + * Create batch buffer to write DATA to first dword of buffer + * Use virtual address of buffer as 0x1100000000 (> 32 bit) + * Set EXEC_OBJECT_PINNED flag in exec object + * Set 'offset' in exec object to shared buffer VMA + * Submit execbuffer + * Verify value of first DWORD in shared buffer matches DATA +*/ + +static void gem_pin_high_address_test(int fd) +{ + i915_gem_userptr userptr; + uint32_t *shared_buffer; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec_object2[2]; + uint32_t batch_buffer[STORE_BATCH_BUFFER_SIZE + 2]; + uint32_t batch_buf_handle, shared_buf_handle; + int ring, len; + const uint32_t data = 0x12345678; + uint64_t high_address = 0x1111FFFF000; /* 44 bit address */ + + igt_require(uses_full_ppgtt(fd, FULL_48_BIT_PPGTT)); + + batch_buf_handle = gem_create(fd, BO_SIZE); + + /* create cpu buffer, set to all 0xF's */ + shared_buffer = create_mem_buffer(BO_SIZE); + *shared_buffer = 0xFFFFFFFF; + + /* share with GPU */ + shared_buf_handle = init_userptr(fd, &userptr, shared_buffer, BO_SIZE); + + /* create command buffer with write command */ + len = gem_store_data_svm(fd, batch_buffer, high_address, data, true); + gem_write(fd, batch_buf_handle, 0, batch_buffer, len); + + /* submit command buffer */ + setup_exec_obj(&exec_object2[0], shared_buf_handle, + EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS, high_address); + setup_exec_obj(&exec_object2[1], batch_buf_handle, 0, 0); + + ring = I915_EXEC_RENDER; + + setup_execbuffer(&execbuf, exec_object2, ring, 2, len); + submit_and_sync(fd, &execbuf, batch_buf_handle); + gem_userptr_sync(fd, shared_buf_handle); + + igt_assert(exec_object2[0].offset == high_address); + /* check on CPU to see if value changes */ + igt_fail_on_f(shared_buffer[0] != data, + "\nCPU read does not match GPU write, \ + expected: 0x%x, got: 0x%x\n", data, shared_buffer[0]); + + gem_close(fd, batch_buf_handle); + gem_close(fd, shared_buf_handle); + free(shared_buffer); +} + +/* gem_pin_near_48Bit_test + * This test will create a shared buffer, + * and create a command for GPU to write data in it. It will attempt + * to pin the buffer at address > 47 bits <= 48-bit. + * CPU will read and make sure expected value is obtained. + * Note that we must submit addresses in canonical form, not only + * because the addresss will be validated, but also the returned offset + * will be in this format. + + * Malloc a 4K buffer + * Share buffer with with GPU by using userptr ioctl + * Create batch buffer to write DATA to first dword of buffer + * Use virtual address of buffer as range between 47-bit and 48-bit + * Set EXEC_OBJECT_PINNED flag in exec object + * Set 'offset' in exec object to shared buffer VMA + * Submit execbuffer + * Verify value of first DWORD in shared buffer matches DATA +*/ +#define BEGIN_HIGH_ADDRESS 0x7FFFFFFFF000 +#define END_HIGH_ADDRESS 0xFFFFFFFFC000 +#define ADDRESS_INCREMENT 0x2000000000 +static void gem_pin_near_48Bit_test(int fd) +{ + i915_gem_userptr userptr; + uint32_t *shared_buffer; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec_object2[2]; + uint32_t batch_buffer[BO_SIZE]; + uint32_t batch_buf_handle, shared_buf_handle; + int ring, len; + const uint32_t data = 0x12345678; + uint64_t high_address, can_high_address; + + igt_require(uses_full_ppgtt(fd, FULL_48_BIT_PPGTT)); + + batch_buf_handle = gem_create(fd, BO_SIZE); + + /* create cpu buffer, set to all 0xF's */ + shared_buffer = create_mem_buffer(BO_SIZE); + *shared_buffer = 0xFFFFFFFF; + + /* share with GPU */ + shared_buf_handle = init_userptr(fd, &userptr, shared_buffer, BO_SIZE); + + for (high_address = BEGIN_HIGH_ADDRESS; high_address <= END_HIGH_ADDRESS; + high_address+=ADDRESS_INCREMENT) { + can_high_address = gen8_canonical_addr(high_address); + /* create command buffer with write command */ + len = gem_store_data_svm(fd, batch_buffer, can_high_address, + data, true); + gem_write(fd, batch_buf_handle, 0, batch_buffer, len); + /* submit command buffer */ + setup_exec_obj(&exec_object2[0], shared_buf_handle, + EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS, + can_high_address); + setup_exec_obj(&exec_object2[1], batch_buf_handle, 0, 0); + + ring = I915_EXEC_RENDER; + setup_execbuffer(&execbuf, exec_object2, ring, 2, len); + submit_and_sync(fd, &execbuf, batch_buf_handle); + gem_userptr_sync(fd, shared_buf_handle); + + igt_assert(exec_object2[0].offset == can_high_address); + /* check on CPU to see if value changes */ + igt_fail_on_f(shared_buffer[0] != data, + "\nCPU read does not match GPU write, expected: 0x%x, \ + got: 0x%x\n, 0x%"PRIx64"", data, shared_buffer[0], high_address); + } + + gem_close(fd, batch_buf_handle); + gem_close(fd, shared_buf_handle); + free(shared_buffer); +} + + +igt_main +{ + int fd = -1; + + igt_skip_on_simulation(); + + igt_fixture { + fd = drm_open_driver(DRIVER_INTEL); + igt_require(gem_has_softpin(fd)); + } + + /* All tests need PPGTT support */ + igt_subtest("gem_pin_userptr") + gem_pin_userptr_test(fd); + igt_subtest("gem_pin_bo") + gem_pin_bo_test(fd); + igt_subtest("gem_multiple_process") + gem_multiple_process_test(fd); + igt_subtest("gem_repin") + gem_repin_test(fd); + igt_subtest("gem_pin_overlap") + gem_pin_overlap_test(fd); + igt_subtest("gem_write_multipage_buffer") + gem_write_multipage_buffer_test(fd); + + /* Following tests need 32/48 Bit PPGTT support */ + igt_subtest("gem_pin_invalid_vma") + gem_pin_invalid_vma_test(fd, false, false); + + /* Following tests need 48 Bit PPGTT support */ + igt_subtest("gen_pin_noncanonical_high_address") + gem_pin_invalid_vma_test(fd, false, true); + igt_subtest("gem_pin_high_address_without_correct_flag") + gem_pin_invalid_vma_test(fd, true, false); + igt_subtest("gem_softpin_stress") + gem_softpin_stress_test(fd); + igt_subtest("gem_pin_high_address") + gem_pin_high_address_test(fd); + igt_subtest("gem_pin_near_48Bit") + gem_pin_near_48Bit_test(fd); + + igt_fixture + close(fd); + + igt_exit(); +} -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx