On Sat, Jan 09, 2016 at 05:01:30PM +0530, akash.goel@xxxxxxxxx wrote: > +static void* mmap_bo(int fd, uint32_t handle, uint64_t size) > +{ > + uint32_t *ptr = gem_mmap__cpu(fd, handle, 0, size, PROT_READ); > + gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); read-only mapping, but set to the cpu write domain? Seems inconsistent. > + return ptr; > +} > +/* gem_store_data_svm > + * populate batch buffer with MI_STORE_DWORD_IMM command > + * @fd: drm file descriptor > + * @cmd_buf: batch buffer > + * @dw_offset: write offset in batch buffer > + * @vaddr: destination Virtual address > + * @data: data to be store at destination > + * @end: whether to end batch buffer or not > +*/ > +static int gem_store_data_svm(int fd, uint32_t *cmd_buf, uint32_t dw_offset, > + uint64_t vaddr, uint32_t data, bool end) Urm, what? Just call this what it is, emit_store_dword(cs, offset, vaddr, value); Don't pass in bool end, since it is going to used exactly once and just confuses all the other callers. > +{ > + cmd_buf[dw_offset++] = MI_STORE_DWORD_IMM; > + cmd_buf[dw_offset++] = vaddr & 0xFFFFFFFC; > + cmd_buf[dw_offset++] = (vaddr >> 32) & 0xFFFF; /* bits 32:47 */ > + > + cmd_buf[dw_offset++] = data; Interesting use of whitespace. > + if (end) { > + cmd_buf[dw_offset++] = MI_BATCH_BUFFER_END; > + cmd_buf[dw_offset++] = 0; > + } > + > + return dw_offset; > +} > + > +/* setup_execbuffer > + * helper for buffer execution > + * @execbuf - pointer to execbuffer > + * @exec_object - pointer to exec object2 struct > + * @ring - ring to be used > + * @buffer_count - how manu buffers to submit > + * @batch_length - length of batch buffer > +*/ > +static void setup_execbuffer(struct drm_i915_gem_execbuffer2 *execbuf, > + struct drm_i915_gem_exec_object2 *exec_object, > + int ring, int buffer_count, int batch_length) > +{ How about memset(execbuf, 0, sizeof(*execbuf)); > + execbuf->buffers_ptr = (unsigned long)exec_object; > + execbuf->buffer_count = buffer_count; > + execbuf->batch_start_offset = 0; > + execbuf->batch_len = batch_length; > + execbuf->cliprects_ptr = 0; > + execbuf->num_cliprects = 0; > + execbuf->DR1 = 0; > + execbuf->DR4 = 0; > + execbuf->flags = ring; > + i915_execbuffer2_set_context_id(*execbuf, 0); > + execbuf->rsvd2 = 0; > +} > + > +/* submit_and_sync > + * Helper function for exec and sync functions > + * @fd - drm fd > + * @execbuf - pointer to execbuffer > + * @batch_buf_handle - batch buffer handle > +*/ > +static void submit_and_sync(int fd, struct drm_i915_gem_execbuffer2 *execbuf, > + uint32_t batch_buf_handle) > +{ > + gem_execbuf(fd, execbuf); > + gem_sync(fd, batch_buf_handle); The only caller of this also does its own sync. This seems irrelevant and serves a bad example. > +} > + > +struct local_i915_gem_context_trtt_param { > + uint64_t l3_table_address; > + uint32_t invd_tile_val; > + uint32_t null_tile_val; > +}; > + > +/* send_trtt_params > + * Helper function to request KMD to enable TRTT > + * @fd - drm fd > + * @ctx_id - id of the context for which TRTT is to be enabled > + * @l3_table_address - GFX address of the L3 table > +*/ > +static void send_trtt_params(int fd, uint32_t ctx_id, uint64_t l3_table_address) It is not a socket, pipe, or other transport medium. Just setup_trtt(). > +#define TABLE_SIZE 0x1000 > +#define TILE_SIZE 0x10000 > + > +#define FIRST_TILE_ADDRESS 0xF00000000000 > +#define LAST_TILE_ADDRESS 0xFFFFFFFF0000 > + > +#define BO_ALLOC_AND_SETUP(fd, bo_size, bo_handle, bo_offset, idx) \ > + bo_handle = gem_create(fd, bo_size); \ > + bo_offset = current_ppgtt_offset; \ > + setup_exec_obj(&exec_object2[idx], bo_handle, EXEC_OBJECT_PINNED, bo_offset); \ > + current_ppgtt_offset += bo_size; Function! > + > +/* basic test > + * This test will add a series of MI_STORE_ commands, first to update the > + * TR-TT table entries and then to update the data buffers using the TR-TT VA, > + * exercising the programming the table programming done previously > +*/ > +static void gem_basic_trtt_use(void) > +{ > + int fd; > + int ring, len = 0; > + uint32_t *ptr; > + struct drm_i915_gem_execbuffer2 execbuf; > + struct drm_i915_gem_exec_object2 exec_object2[8]; > + uint32_t batch_buffer[BO_SIZE]; > + > + uint32_t l3_tbl_handle, l2_tbl1_handle, l2_tbl2_handle; > + uint32_t l1_tbl1_handle, l1_tbl2_handle, batch_buf_handle; > + uint32_t buffer1_handle, buffer2_handle; > + > + uint64_t l3_tbl_offset, l2_tbl1_offset, l2_tbl2_offset; > + uint64_t l1_tbl1_offset, l1_tbl2_offset; > + uint64_t buffer1_offset, buffer2_offset; > + > + uint32_t data; > + uint64_t address, current_ppgtt_offset = 0x10000; > + > + fd = drm_open_driver(DRIVER_INTEL); > + igt_require(uses_full_ppgtt(fd, FULL_48_BIT_PPGTT)); > + igt_require(has_softpin_support(fd)); > + igt_require(has_trtt_support(fd)); > + > + /* Allocate a L3 table BO */ > + BO_ALLOC_AND_SETUP(fd, TABLE_SIZE, l3_tbl_handle, l3_tbl_offset, 0); > + > + /* Allocate two L2 table BOs */ > + BO_ALLOC_AND_SETUP(fd, TABLE_SIZE, l2_tbl1_handle, l2_tbl1_offset, 1); > + BO_ALLOC_AND_SETUP(fd, TABLE_SIZE, l2_tbl2_handle, l2_tbl2_offset, 2); > + > + /* Allocate two L1 table BOs */ > + BO_ALLOC_AND_SETUP(fd, TABLE_SIZE, l1_tbl1_handle, l1_tbl1_offset, 3); > + BO_ALLOC_AND_SETUP(fd, TABLE_SIZE, l1_tbl2_handle, l1_tbl2_offset, 4); > + > + /* Align the PPGTT offsets for the 2 data buffers to next 64 KB boundary */ > + current_ppgtt_offset = ALIGN(current_ppgtt_offset, TILE_SIZE); > + > + /* Allocate two Data buffer BOs */ > + BO_ALLOC_AND_SETUP(fd, TILE_SIZE, buffer1_handle, buffer1_offset, 5); > + BO_ALLOC_AND_SETUP(fd, TILE_SIZE, buffer2_handle, buffer2_offset, 6); > + > + /* Finally allocate Batch buffer BO */ > + batch_buf_handle = gem_create(fd, BO_SIZE); > + setup_exec_obj(&exec_object2[7], batch_buf_handle, 0, 0); Scary jump from idx to 7. Why not just pin this as well to reduce the code complexity? Afterwards setup_exec_obj() can allocate an object all by itself. > + > + /* Add commands to update the two L3 table entries to point them to the L2 tables*/ > + address = l3_tbl_offset; > + data = l2_tbl1_offset; > + len = gem_store_data_svm(fd, batch_buffer, len, address, data, false); > + > + address = l3_tbl_offset + 511*sizeof(uint64_t); > + data = l2_tbl2_offset; > + len = gem_store_data_svm(fd, batch_buffer, len, address, data, false); > + > + /* Add commands to update an entry of 2 L2 tables to point them to the L1 tables*/ > + address = l2_tbl1_offset; > + data = l1_tbl1_offset; > + len = gem_store_data_svm(fd, batch_buffer, len, address, data, false); > + > + address = l2_tbl2_offset + 511*sizeof(uint64_t); > + data = l1_tbl2_offset; > + len = gem_store_data_svm(fd, batch_buffer, len, address, data, false); > + > + /* Add commands to update an entry of 2 L1 tables to point them to the data buffers*/ > + address = l1_tbl1_offset; > + data = buffer1_offset >> 16; > + len = gem_store_data_svm(fd, batch_buffer, len, address, data, false); > + > + address = l1_tbl2_offset + 1023*sizeof(uint32_t); > + data = buffer2_offset >> 16; > + len = gem_store_data_svm(fd, batch_buffer, len, address, data, false); > + > + /* Add commands to update the 2 data buffers, using their TRTT VA */ > + data = 0x12345678; > + len = gem_store_data_svm(fd, batch_buffer, len, FIRST_TILE_ADDRESS, data, false); > + len = gem_store_data_svm(fd, batch_buffer, len, LAST_TILE_ADDRESS, data, true); > + > + gem_write(fd, batch_buf_handle, 0, batch_buffer, len*4); Or for even shorter code: batch_buffer = gem_mmap__cpu(exec_object[batch].handle); > + > + /* Request KMD to setup the TR-TT */ > + send_trtt_params(fd, 0, l3_tbl_offset); > + > + ring = I915_EXEC_RENDER; > + setup_execbuffer(&execbuf, exec_object2, ring, 8, len*4); > + > + /* submit command buffer */ > + submit_and_sync(fd, &execbuf, batch_buf_handle); > + > + /* read the 2 data buffers to check for the value written by the GPU */ > + ptr = mmap_bo(fd, buffer1_handle, TILE_SIZE); > + igt_fail_on_f(ptr[0] != data, > + "\nCPU read does not match GPU write,\ > + expected: 0x%x, got: 0x%x\n", > + data, ptr[0]); > + > + ptr = mmap_bo(fd, buffer2_handle, TILE_SIZE); > + igt_fail_on_f(ptr[0] != data, > + "\nCPU read does not match GPU write,\ > + expected: 0x%x, got: 0x%x\n", > + data, ptr[0]); > + > + gem_close(fd, l3_tbl_handle); > + gem_close(fd, l2_tbl1_handle); > + gem_close(fd, l2_tbl2_handle); > + gem_close(fd, l1_tbl1_handle); > + gem_close(fd, l1_tbl2_handle); > + gem_close(fd, buffer1_handle); > + gem_close(fd, buffer2_handle); > + gem_close(fd, batch_buf_handle); > + close(fd); > +} > + > +int main(int argc, char* argv[]) > +{ > + igt_subtest_init(argc, argv); Together these are igt_main, and then you can also drop igt_exit. > + igt_skip_on_simulation(); I think you want this on simulation as well, as least "basic". > + > + /* test needs 48 PPGTT & Soft Pin support */ > + igt_subtest("basic") { > + gem_basic_trtt_use(); > + } > + > + igt_exit(); > +} > + > -- > 1.9.2 > -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx