With the advent of full per-process GTT, the per context GTT may be a different size to the global GTT as reported by the get_aperture ioctl. It is also likely to be 4GiB or larger, exposing some fragility in the code for summing batch sizes. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- intel/intel_bufmgr_gem.c | 118 +++++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 49 deletions(-) diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index a5549a6..82673b6 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -249,7 +249,7 @@ struct _drm_intel_bo_gem { * Used to avoid costly tree walking in * drm_intel_bufmgr_check_aperture in the common case. */ - int reloc_tree_size; + uint64_t reloc_tree_size; /** * Number of potential fence registers required by this buffer and its @@ -261,10 +261,10 @@ struct _drm_intel_bo_gem { bool mapped_cpu_write; }; -static unsigned int +static uint64_t drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); -static unsigned int +static uint64_t drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); static int @@ -288,12 +288,13 @@ static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo) return (drm_intel_bo_gem *)bo; } -static unsigned long -drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, +static uint64_t +drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, + uint64_t size, uint32_t *tiling_mode) { - unsigned long min_size, max_size; - unsigned long i; + uint64_t min_size, max_size; + uint64_t i; if (*tiling_mode == I915_TILING_NONE) return size; @@ -372,7 +373,7 @@ drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, static struct drm_intel_gem_bo_bucket * drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, - unsigned long size) + uint64_t size) { int i; @@ -531,7 +532,7 @@ drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, drm_intel_bo_gem *bo_gem, unsigned int alignment) { - unsigned int size; + uint64_t size; assert(!bo_gem->used_as_reloc_target); @@ -663,7 +664,7 @@ drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, static drm_intel_bo * drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name, - unsigned long size, + uint64_t size, unsigned long flags, uint32_t tiling_mode, unsigned long stride, @@ -673,7 +674,7 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, drm_intel_bo_gem *bo_gem; struct drm_intel_gem_bo_bucket *bucket; bool alloc_from_cache; - unsigned long bo_size; + uint64_t bo_size; int ret; /* Round the allocated size up to a power of two number of pages. */ @@ -825,7 +826,7 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, unsigned long *pitch, unsigned long flags) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; - unsigned long size, stride; + uint64_t size, stride; uint32_t tiling; do { @@ -2247,14 +2248,14 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, ret = -errno; if (errno == ENOSPC) { DBG("Execbuffer fails to pin. " - "Estimate: %u. Actual: %u. Available: %u\n", - drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, + "Estimate: %llu. Actual: %llu. Available: %llu\n", + (long long)drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, bufmgr_gem-> exec_count), - drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, + (long long)drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, bufmgr_gem-> exec_count), - (unsigned int)bufmgr_gem->gtt_size); + (long long)bufmgr_gem->gtt_size); } } drm_intel_update_buffer_offsets(bufmgr_gem); @@ -2345,12 +2346,12 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, ret = -errno; if (ret == -ENOSPC) { DBG("Execbuffer fails to pin. " - "Estimate: %u. Actual: %u. Available: %u\n", - drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, + "Estimate: %llu. Actual: %llu. Available: %llu\n", + (long long)drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, bufmgr_gem->exec_count), - drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, + (long long)drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, bufmgr_gem->exec_count), - (unsigned int) bufmgr_gem->gtt_size); + (long long)bufmgr_gem->gtt_size); } } drm_intel_update_buffer_offsets2(bufmgr_gem); @@ -2681,12 +2682,12 @@ drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) * Return the additional aperture space required by the tree of buffer objects * rooted at bo. */ -static int +static uint64_t drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) { drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + uint64_t total = 0; int i; - int total = 0; if (bo == NULL || bo_gem->included_in_check_aperture) return 0; @@ -2751,11 +2752,11 @@ drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) * Return a conservative estimate for the amount of aperture required * for a collection of buffers. This may double-count some buffers. */ -static unsigned int +static uint64_t drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) { + uint64_t total = 0; int i; - unsigned int total = 0; for (i = 0; i < count; i++) { drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; @@ -2770,11 +2771,11 @@ drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) * This avoids double counting any buffers, at the cost of looking * at every buffer in the set. */ -static unsigned int +static uint64_t drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) { + uint64_t total = 0; int i; - unsigned int total = 0; for (i = 0; i < count; i++) { total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); @@ -2820,8 +2821,8 @@ drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; - unsigned int total = 0; - unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; + uint64_t threshold = bufmgr_gem->gtt_size * 3 / 4; + uint64_t total = 0; int total_fences; /* Check for fence reg constraints if necessary */ @@ -2839,11 +2840,13 @@ drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) if (total > threshold) { DBG("check_space: overflowed available aperture, " "%dkb vs %dkb\n", - total / 1024, (int)bufmgr_gem->gtt_size / 1024); + (int)(total / 1024), + (int)(bufmgr_gem->gtt_size / 1024)); return -ENOSPC; } else { - DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, - (int)bufmgr_gem->gtt_size / 1024); + DBG("drm_check_space: total %dkb vs bufgr %dkb\n", + (int)(total / 1024), + (int)(bufmgr_gem->gtt_size / 1024)); return 0; } } @@ -3345,6 +3348,40 @@ void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo) return bo_gem->wc_virtual; } +static uint64_t get_gtt_size(int fd) +{ + struct drm_i915_gem_get_aperture aperture; + struct local_i915_gem_context_param { + uint32_t context; + uint32_t size; + uint64_t param; +#define LOCAL_CONTEXT_PARAM_BAN_PERIOD 0x1 +#define LOCAL_CONTEXT_PARAM_NO_ZEROMAP 0x2 +#define LOCAL_CONTEXT_PARAM_GTT_SIZE 0x3 + uint64_t value; + } p; +#define LOCAL_I915_GEM_CONTEXT_GETPARAM 0x34 +#define LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CONTEXT_GETPARAM, struct local_i915_gem_context_param) + + memclear(aperture); + + memclear(p); + p.param = LOCAL_CONTEXT_PARAM_GTT_SIZE; + if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p) == 0) + aperture.aper_size = p.value; + if (aperture.aper_size == 0) + (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (aperture.aper_size == 0) { + aperture.aper_size = 128 * 1024 * 1024; + fprintf(stderr, + "Unable to query GTT size, assuming %dkB available aperture size.\n" + "May lead to reduced performance or incorrect rendering.\n", + (int)(aperture.aper_size / 1024)); + } + + return aperture.aper_size; +} + /** * Initializes the GEM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. @@ -3355,7 +3392,6 @@ drm_intel_bufmgr * drm_intel_bufmgr_gem_init(int fd, int batch_size) { drm_intel_bufmgr_gem *bufmgr_gem; - struct drm_i915_gem_get_aperture aperture; drm_i915_getparam_t gp; int ret, tmp; bool exec2 = false; @@ -3379,23 +3415,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) goto exit; } - memclear(aperture); - ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_GET_APERTURE, - &aperture); - - if (ret == 0) - bufmgr_gem->gtt_size = aperture.aper_available_size; - else { - fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", - strerror(errno)); - bufmgr_gem->gtt_size = 128 * 1024 * 1024; - fprintf(stderr, "Assuming %dkB available aperture size.\n" - "May lead to reduced performance or incorrect " - "rendering.\n", - (int)bufmgr_gem->gtt_size / 1024); - } - + bufmgr_gem->gtt_size = get_gtt_size(bufmgr_gem->fd); bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); if (IS_GEN2(bufmgr_gem->pci_device)) -- 2.6.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx