On Thu, Sep 03, 2015 at 03:23:58PM +0100, Michel Thierry wrote: > Gen8+ supports 48-bit virtual addresses, but some objects must always be > allocated inside the 32-bit address range. > > In specific, any resource used with flat/heapless (0x00000000-0xfffff000) > General State Heap (GSH) or Instruction State Heap (ISH) must be in a > 32-bit range, because the General State Offset and Instruction State Offset > are limited to 32-bits. > > The i915 driver has been modified to provide a flag to set when the 4GB > limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS). > 48-bit range will only be used when explicitly requested. > > Callers to the existing drm_intel_bo_emit_reloc function should set the > use_48b_address_range flag beforehand, in order to use full ppgtt range. > > v2: Make set/clear functions nops on pre-gen8 platforms, and use them > internally in emit_reloc functions (Ben) > s/48BADDRESS/48B_ADDRESS/ (Dave) > v3: Keep set/clear functions internal, no-one needs to use them directly. > v4: Don't set 48bit-support flag in emit reloc, check for ppgtt type > before enabling set/clear function, print full offsets in debug > statements, using port of lower_32_bits and upper_32_bits from linux > kernel (Michał) > > References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html > Cc: Ben Widawsky <ben@xxxxxxxxxxxx> > Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx> +Kristian LGTM. Acked-by: Michał Winiarski <michal.winiarski@xxxxxxxxx> > Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx> > --- > include/drm/i915_drm.h | 3 +- > intel/intel_bufmgr.c | 11 ++++++ > intel/intel_bufmgr.h | 1 + > intel/intel_bufmgr_gem.c | 88 +++++++++++++++++++++++++++++++++++++---------- > intel/intel_bufmgr_priv.h | 14 ++++++++ > 5 files changed, 97 insertions(+), 20 deletions(-) > > diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h > index ded43b1..426b25c 100644 > --- a/include/drm/i915_drm.h > +++ b/include/drm/i915_drm.h > @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_FENCE (1<<0) > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) > +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) > __u64 flags; > > __u64 rsvd1; > diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c > index 14ea9f9..0856e60 100644 > --- a/intel/intel_bufmgr.c > +++ b/intel/intel_bufmgr.c > @@ -293,6 +293,17 @@ drm_intel_bo_madvise(drm_intel_bo *bo, int madv) > } > > int > +drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) > +{ > + if (bo->bufmgr->bo_use_48b_address_range) { > + bo->bufmgr->bo_use_48b_address_range(bo, enable); > + return 0; > + } > + > + return -ENODEV; > +} > + > +int > drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) > { > return bo->bufmgr->bo_references(bo, target_bo); > diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h > index 95eecb8..a14c78f 100644 > --- a/intel/intel_bufmgr.h > +++ b/intel/intel_bufmgr.h > @@ -164,6 +164,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, > int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name); > int drm_intel_bo_busy(drm_intel_bo *bo); > int drm_intel_bo_madvise(drm_intel_bo *bo, int madv); > +int drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable); > > int drm_intel_bo_disable_reuse(drm_intel_bo *bo); > int drm_intel_bo_is_reusable(drm_intel_bo *bo); > diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c > index 2723e21..09d82d2 100644 > --- a/intel/intel_bufmgr_gem.c > +++ b/intel/intel_bufmgr_gem.c > @@ -83,6 +83,22 @@ > #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) > #define MAX2(A, B) ((A) > (B) ? (A) : (B)) > > +/** > + * upper_32_bits - return bits 32-63 of a number > + * @n: the number we're accessing > + * > + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress > + * the "right shift count >= width of type" warning when that quantity is > + * 32-bits. > + */ > +#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) > + > +/** > + * lower_32_bits - return bits 0-31 of a number > + * @n: the number we're accessing > + */ > +#define lower_32_bits(n) ((__u32)(n)) > + > typedef struct _drm_intel_bo_gem drm_intel_bo_gem; > > struct drm_intel_gem_bo_bucket { > @@ -237,6 +253,15 @@ struct _drm_intel_bo_gem { > bool is_userptr; > > /** > + * Boolean of whether this buffer can be placed in the full 48-bit > + * address range on gen8+. > + * > + * By default, buffers will be keep in a 32-bit range, unless this > + * flag is explicitly set. > + */ > + bool use_48b_address_range; > + > + /** > * Size in bytes of this buffer and its relocation descendents. > * > * Used to avoid costly tree walking in > @@ -395,14 +420,16 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) > drm_intel_bo_gem *target_gem = > (drm_intel_bo_gem *) target_bo; > > - DBG("%2d: %d (%s)@0x%08llx -> " > - "%d (%s)@0x%08lx + 0x%08x\n", > + DBG("%2d: %d (%s)@0x%08x %08x -> " > + "%d (%s)@0x%08x %08x + 0x%08x\n", > i, > bo_gem->gem_handle, bo_gem->name, > - (unsigned long long)bo_gem->relocs[j].offset, > + upper_32_bits(bo_gem->relocs[j].offset), > + lower_32_bits(bo_gem->relocs[j].offset), > target_gem->gem_handle, > target_gem->name, > - target_bo->offset64, > + upper_32_bits(target_bo->offset64), > + lower_32_bits(target_bo->offset64), > bo_gem->relocs[j].delta); > } > } > @@ -468,11 +495,15 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) > drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; > drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; > int index; > + int flags = 0; > + > + if (need_fence) > + flags |= EXEC_OBJECT_NEEDS_FENCE; > + if (bo_gem->use_48b_address_range) > + flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; > > if (bo_gem->validate_index != -1) { > - if (need_fence) > - bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= > - EXEC_OBJECT_NEEDS_FENCE; > + bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; > return; > } > > @@ -501,13 +532,9 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) > bufmgr_gem->exec2_objects[index].alignment = bo->align; > bufmgr_gem->exec2_objects[index].offset = 0; > bufmgr_gem->exec_bos[index] = bo; > - bufmgr_gem->exec2_objects[index].flags = 0; > + bufmgr_gem->exec2_objects[index].flags = flags; > bufmgr_gem->exec2_objects[index].rsvd1 = 0; > bufmgr_gem->exec2_objects[index].rsvd2 = 0; > - if (need_fence) { > - bufmgr_gem->exec2_objects[index].flags |= > - EXEC_OBJECT_NEEDS_FENCE; > - } > bufmgr_gem->exec_count++; > } > > @@ -780,6 +807,7 @@ retry: > bo_gem->used_as_reloc_target = false; > bo_gem->has_error = false; > bo_gem->reusable = true; > + bo_gem->use_48b_address_range = false; > > drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); > > @@ -926,6 +954,7 @@ drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, > bo_gem->used_as_reloc_target = false; > bo_gem->has_error = false; > bo_gem->reusable = false; > + bo_gem->use_48b_address_range = false; > > drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); > > @@ -1081,6 +1110,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, > bo_gem->bo.handle = open_arg.handle; > bo_gem->global_name = handle; > bo_gem->reusable = false; > + bo_gem->use_48b_address_range = false; > > memclear(get_tiling); > get_tiling.handle = bo_gem->gem_handle; > @@ -1930,6 +1960,13 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, > return 0; > } > > +static void > +drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) > +{ > + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; > + bo_gem->use_48b_address_range = enable; > +} > + > static int > drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, > drm_intel_bo *target_bo, uint32_t target_offset, > @@ -2073,10 +2110,12 @@ drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) > > /* Update the buffer offset */ > if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { > - DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", > - bo_gem->gem_handle, bo_gem->name, bo->offset64, > - (unsigned long long)bufmgr_gem->exec_objects[i]. > - offset); > + DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", > + bo_gem->gem_handle, bo_gem->name, > + upper_32_bits(bo->offset64), > + lower_32_bits(bo->offset64), > + upper_32_bits(bufmgr_gem->exec_objects[i].offset), > + lower_32_bits(bufmgr_gem->exec_objects[i].offset)); > bo->offset64 = bufmgr_gem->exec_objects[i].offset; > bo->offset = bufmgr_gem->exec_objects[i].offset; > } > @@ -2094,9 +2133,12 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) > > /* Update the buffer offset */ > if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { > - DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", > - bo_gem->gem_handle, bo_gem->name, bo->offset64, > - (unsigned long long)bufmgr_gem->exec2_objects[i].offset); > + DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", > + bo_gem->gem_handle, bo_gem->name, > + upper_32_bits(bo->offset64), > + lower_32_bits(bo->offset64), > + upper_32_bits(bufmgr_gem->exec2_objects[i].offset), > + lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); > bo->offset64 = bufmgr_gem->exec2_objects[i].offset; > bo->offset = bufmgr_gem->exec2_objects[i].offset; > } > @@ -2481,6 +2523,7 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s > bo_gem->used_as_reloc_target = false; > bo_gem->has_error = false; > bo_gem->reusable = false; > + bo_gem->use_48b_address_range = false; > > DRMINITLISTHEAD(&bo_gem->vma_list); > DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); > @@ -3278,6 +3321,13 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) > } > } > > + if (bufmgr_gem->gen >= 8) { > + gp.param = I915_PARAM_HAS_ALIASING_PPGTT; > + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); > + if (ret == 0 && *gp.value == 3) > + bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; > + } > + > /* Let's go with one relocation per every 2 dwords (but round down a bit > * since a power of two will mean an extra page allocation for the reloc > * buffer). > diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h > index 59ebd18..5c17ffb 100644 > --- a/intel/intel_bufmgr_priv.h > +++ b/intel/intel_bufmgr_priv.h > @@ -152,6 +152,20 @@ struct _drm_intel_bufmgr { > void (*destroy) (drm_intel_bufmgr *bufmgr); > > /** > + * Indicate if the buffer can be placed anywhere in the full ppgtt > + * address range (2^48). > + * > + * Any resource used with flat/heapless (0x00000000-0xfffff000) > + * General State Heap (GSH) or Intructions State Heap (ISH) must > + * be in a 32-bit range. 48-bit range will only be used when explicitly > + * requested. > + * > + * \param bo Buffer to set the use_48b_address_range flag. > + * \param enable The flag value. > + */ > + void (*bo_use_48b_address_range) (drm_intel_bo *bo, uint32_t enable); > + > + /** > * Add relocation entry in reloc_buf, which will be updated with the > * target buffer's real offset on on command submission. > * > -- > 2.5.0 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx