In userspace we can track which buffer a relocation refers to in constant time. However, the kernel has to look up the per-fd gem handle for each relocation. Using the I915_EXEC_HANDLE_LUT flag lets us use the the bos validation list index instead of the gem handle in the relocation entries. This allows the kernel to look up the bo for a reloc in constant time. Signed-off-by: Kristian Høgsberg <krh@xxxxxxxxxxxxx> --- intel/intel_bufmgr_gem.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index cf85bb8..8a51cea 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -130,6 +130,7 @@ typedef struct _drm_intel_bufmgr_gem { unsigned int bo_reuse : 1; unsigned int no_exec : 1; unsigned int has_vebox : 1; + unsigned int has_handle_lut : 1; bool fenced_relocs; char *aub_filename; @@ -399,11 +400,12 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) (drm_intel_bo_gem *) target_bo; DBG("%2d: %d (%s)@0x%08llx -> " - "%d (%s)@0x%08lx + 0x%08x\n", + "%d (#%d) (%s)@0x%08lx + 0x%08x\n", i, bo_gem->gem_handle, bo_gem->name, (unsigned long long)bo_gem->relocs[j].offset, target_gem->gem_handle, + bo_gem->relocs[j].target_handle, target_gem->name, target_bo->offset64, bo_gem->relocs[j].delta); @@ -470,7 +472,7 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; - int index; + int i, index; if (bo_gem->validate_index != -1) { if (need_fence) @@ -512,6 +514,26 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) EXEC_OBJECT_NEEDS_FENCE; } bufmgr_gem->exec_count++; + + if (bufmgr_gem->has_handle_lut) { + /* If the kernel supports I915_EXEC_HANDLE_LUT, we can + * use validate_index instead of the gem handle in + * target_handle in the reloc entry. This allows the + * kernel to do a simple constant time lookup instead + * of looking up the gem handle for each reloc. + * + * We have to fix up the relocs here, since the bo_gem + * needs to have a valid validate_index in case there + * are self-relocs in the reloc list. + */ + for (i = 0; i < bo_gem->reloc_count; i++) { + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) + bo_gem->reloc_target_info[i].bo; + + bo_gem->relocs[i].target_handle = + target_bo_gem->validate_index; + } + } } #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ @@ -2447,6 +2469,9 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, if (bufmgr_gem->no_exec) goto skip_execution; + if (bufmgr_gem->has_handle_lut) + execbuf.flags |= I915_EXEC_HANDLE_LUT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); @@ -3569,6 +3594,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) } } + gp.param = I915_PARAM_HAS_EXEC_HANDLE_LUT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_handle_lut = ret == 0; + /* Let's go with one relocation per every 2 dwords (but round down a bit * since a power of two will mean an extra page allocation for the reloc * buffer). -- 2.2.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx