In order to handle tiled partial GTT mmappings, we need to associate the fence with an individual vma. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 15 +- drivers/gpu/drm/i915/i915_drv.h | 81 ++++-- drivers/gpu/drm/i915/i915_gem.c | 34 ++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 21 +- drivers/gpu/drm/i915/i915_gem_fence.c | 381 +++++++++++------------------ drivers/gpu/drm/i915/i915_gem_gtt.c | 7 + drivers/gpu/drm/i915/i915_gem_gtt.h | 9 + drivers/gpu/drm/i915/i915_gem_tiling.c | 65 +++-- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 57 ++--- drivers/gpu/drm/i915/intel_fbc.c | 30 ++- drivers/gpu/drm/i915/intel_fbdev.c | 4 +- drivers/gpu/drm/i915/intel_overlay.c | 2 +- 13 files changed, 324 insertions(+), 384 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 418b80de5246..f15ed7793969 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -133,9 +133,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) for_each_ring(ring, dev_priv, i) seq_printf(m, "%x ", i915_gem_request_get_seqno(obj->last_read[i].request)); - seq_printf(m, "] %x %x%s%s%s", + seq_printf(m, "] %x %s%s%s", i915_gem_request_get_seqno(obj->last_write.request), - i915_gem_request_get_seqno(obj->last_fence.request), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -148,8 +147,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (pinned x %d)", pin_count); if (obj->pin_display) seq_printf(m, " (display)"); - if (obj->fence_reg != I915_FENCE_REG_NONE) - seq_printf(m, " (fence: %d)", obj->fence_reg); list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -159,6 +156,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) vma->node.start, vma->node.size); if (vma->is_ggtt) seq_printf(m, ", type: %u", vma->ggtt_view.type); + if (vma->fence) + seq_printf(m, " , fence: %d%s", + vma->fence->id, + vma->last_fence.request ? "*" : ""); seq_puts(m, ")"); } if (obj->stolen) @@ -948,14 +949,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs); for (i = 0; i < dev_priv->num_fence_regs; i++) { - struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj; + struct i915_vma *vma = dev_priv->fence_regs[i].vma; seq_printf(m, "Fence %d, pin count = %d, object = ", i, dev_priv->fence_regs[i].pin_count); - if (obj == NULL) + if (vma == NULL) seq_puts(m, "unused"); else - describe_obj(m, obj); + describe_obj(m, vma->obj); seq_putc(m, '\n'); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cfc4430d3b50..bb0f750bb5b5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -458,15 +458,21 @@ struct intel_opregion { struct intel_overlay; struct intel_overlay_error_state; -#define I915_FENCE_REG_NONE -1 -#define I915_MAX_NUM_FENCES 32 -/* 32 fences + sign bit for FENCE_REG_NONE */ -#define I915_MAX_NUM_FENCE_BITS 6 - struct drm_i915_fence_reg { struct list_head lru_list; - struct drm_i915_gem_object *obj; + struct drm_i915_private *i915; + struct i915_vma *vma; int pin_count; + int id; + /** + * Whether the tiling parameters for the currently + * associated fence register have changed. Note that + * for the purposes of tracking tiling changes we also + * treat the unfenced register, the register slot that + * the object occupies whilst it executes a fenced + * command (such as BLT on gen2/3), as a "fence". + */ + bool dirty; }; struct sdvo_device_mapping { @@ -2053,13 +2059,6 @@ struct drm_i915_gem_object { unsigned int dirty:1; /** - * Fence register bits (if any) for this object. Will be set - * as needed when mapped into the GTT. - * Protected by dev->struct_mutex. - */ - signed int fence_reg:I915_MAX_NUM_FENCE_BITS; - - /** * Advice: are the backing pages purgeable? */ unsigned int madv:2; @@ -2068,14 +2067,6 @@ struct drm_i915_gem_object { * Current tiling mode for the object. */ unsigned int tiling_mode:2; - /** - * Whether the tiling parameters for the currently associated fence - * register have changed. Note that for the purposes of tracking - * tiling changes we also treat the unfenced register, the register - * slot that the object occupies whilst it executes a fenced - * command (such as BLT on gen2/3), as a "fence". - */ - unsigned int fence_dirty:1; /** * Whether the current gtt mapping needs to be mappable (and isn't just @@ -2118,7 +2109,6 @@ struct drm_i915_gem_object { */ struct i915_gem_active last_read[I915_NUM_RINGS]; struct i915_gem_active last_write; - struct i915_gem_active last_fence; /** Current tiling stride for the object, if it's tiled. */ uint32_t stride; @@ -2945,11 +2935,50 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, } /* i915_gem_fence.c */ -int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj); -int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj); +int __must_check i915_vma_get_fence(struct i915_vma *vma); +int __must_check i915_vma_put_fence(struct i915_vma *vma); -bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj); -void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj); +/** + * i915_vma_pin_fence - pin fencing state + * @vma: vma to pin fencing for + * + * This pins the fencing state (whether tiled or untiled) to make sure the + * vma (and its object) is ready to be used as a scanout target. Fencing + * status must be synchronize first by calling i915_vma_get_fence(): + * + * The resulting fence pin reference must be released again with + * i915_vma_unpin_fence(). + * + * Returns: + * + * True if the vma has a fence, false otherwise. + */ +static inline bool +i915_vma_pin_fence(struct i915_vma *vma) +{ + if (vma->fence) { + vma->fence->pin_count++; + return true; + } else + return false; +} + +/** + * i915_vma_unpin_fence - unpin fencing state + * @vma: vma to unpin fencing for + * + * This releases the fence pin reference acquired through + * i915_vma_pin_fence. It will handle both objects with and without an + * attached fence correctly, callers do not need to distinguish this. + */ +static inline void +i915_vma_unpin_fence(struct i915_vma *vma) +{ + if (vma->fence) { + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; + } +} void i915_gem_restore_fences(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5bb21b20c36a..70397c1022d1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -863,11 +863,11 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, goto out; } - ret = i915_gem_object_set_to_gtt_domain(obj, true); + ret = i915_vma_put_fence(vma); if (ret) goto out_unpin; - ret = i915_gem_object_put_fence(obj); + ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) goto out_unpin; @@ -1507,7 +1507,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto unpin; - ret = i915_gem_object_get_fence(obj); + ret = i915_vma_get_fence(ggtt); if (ret) goto unpin; @@ -2112,12 +2112,6 @@ void *i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj) } static void -i915_gem_object_retire__fence(struct i915_gem_active *active, - struct drm_i915_gem_request *req) -{ -} - -static void i915_gem_object_retire__write(struct i915_gem_active *active, struct drm_i915_gem_request *request) { @@ -2646,6 +2640,7 @@ static void i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(vma->node.allocated); GEM_BUG_ON(vma->active); GEM_BUG_ON(!vma->closed); + GEM_BUG_ON(vma->fence); list_del(&vma->vm_link); if (!vma->is_ggtt) @@ -2695,7 +2690,7 @@ int i915_vma_unbind(struct i915_vma *vma) i915_gem_object_finish_gtt(obj); /* release the fence reg _after_ flushing */ - ret = i915_gem_object_put_fence(obj); + ret = i915_vma_put_fence(vma); if (ret) return ret; @@ -3163,9 +3158,11 @@ restart: * dropped the fence as all snoopable access is * supposed to be linear. */ - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + ret = i915_vma_put_fence(vma); + if (ret) + return ret; + } } else { /* We either have incoherent backing store and * so no GTT access or the architecture is fully @@ -3722,15 +3719,12 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, i915_gem_object_retire__read); init_request_active(&obj->last_write, i915_gem_object_retire__write); - init_request_active(&obj->last_fence, - i915_gem_object_retire__fence); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); obj->ops = ops; - obj->fence_reg = I915_FENCE_REG_NONE; obj->madv = I915_MADV_WILLNEED; i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); @@ -4241,8 +4235,6 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->mm.fence_list); for (i = 0; i < I915_NUM_RINGS; i++) init_ring_lists(&dev_priv->ring[i]); - for (i = 0; i < I915_MAX_NUM_FENCES; i++) - INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); INIT_DELAYED_WORK(&dev_priv->mm.retire_work, i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->mm.idle_work, @@ -4266,6 +4258,12 @@ i915_gem_load(struct drm_device *dev) /* Initialize fence registers to zero */ INIT_LIST_HEAD(&dev_priv->mm.fence_list); + for (i = 0; i < dev_priv->num_fence_regs; i++) { + struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; + fence->i915 = dev_priv; + fence->id = i; + list_add_tail(&fence->lru_list, &dev_priv->mm.fence_list); + } i915_gem_restore_fences(dev); i915_gem_detect_bit_6_swizzle(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d13b7e507b3d..691da0085ff4 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -247,7 +247,6 @@ static void i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry; - struct drm_i915_gem_object *obj = vma->obj; if (!drm_mm_node_allocated(&vma->node)) return; @@ -255,7 +254,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) entry = vma->exec_entry; if (entry->flags & __EXEC_OBJECT_HAS_FENCE) - i915_gem_object_unpin_fence(obj); + i915_vma_unpin_fence(vma); if (entry->flags & __EXEC_OBJECT_HAS_PIN) __i915_vma_unpin(vma); @@ -409,11 +408,11 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return NULL; - ret = i915_gem_object_set_to_gtt_domain(obj, true); + ret = i915_vma_put_fence(vma); if (ret) return ERR_PTR(ret); - ret = i915_gem_object_put_fence(obj); + ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) return ERR_PTR(ret); @@ -746,11 +745,11 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, entry->flags |= __EXEC_OBJECT_HAS_PIN; if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - ret = i915_gem_object_get_fence(obj); + ret = i915_vma_get_fence(vma); if (ret) return ret; - if (i915_gem_object_pin_fence(obj)) + if (i915_vma_pin_fence(vma)) entry->flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -1227,14 +1226,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } - if (flags & EXEC_OBJECT_NEEDS_FENCE) { - i915_gem_request_mark_active(req, &obj->last_fence); - if (flags & __EXEC_OBJECT_HAS_FENCE) { - struct drm_i915_private *dev_priv = req->i915; - list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, - &dev_priv->mm.fence_list); - } - } + if (flags & EXEC_OBJECT_NEEDS_FENCE) + i915_gem_request_mark_active(req, &vma->last_fence); vma->active |= 1 << engine; i915_gem_request_mark_active(req, &vma->last_read[engine]); diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index e0f5fba22931..073601ec227a 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -55,67 +55,66 @@ * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. */ -static void i965_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = dev->dev_private; i915_reg_t fence_reg_lo, fence_reg_hi; int fence_pitch_shift; + u64 val; - if (INTEL_INFO(dev)->gen >= 6) { - fence_reg_lo = FENCE_REG_GEN6_LO(reg); - fence_reg_hi = FENCE_REG_GEN6_HI(reg); + if (INTEL_INFO(fence->i915)->gen >= 6) { + fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); + fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; + } else { - fence_reg_lo = FENCE_REG_965_LO(reg); - fence_reg_hi = FENCE_REG_965_HI(reg); + fence_reg_lo = FENCE_REG_965_LO(fence->id); + fence_reg_hi = FENCE_REG_965_HI(fence->id); fence_pitch_shift = I965_FENCE_PITCH_SHIFT; } - /* To w/a incoherency with non-atomic 64-bit register updates, - * we split the 64-bit update into two 32-bit writes. In order - * for a partial fence not to be evaluated between writes, we - * precede the update with write to turn off the fence register, - * and only enable the fence as the last step. - * - * For extra levels of paranoia, we make sure each step lands - * before applying the next step. - */ - I915_WRITE(fence_reg_lo, 0); - POSTING_READ(fence_reg_lo); - - if (obj) { - struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL); - u32 row_size = obj->stride * (obj->tiling_mode == I915_TILING_Y ? 32 : 8); + if (vma) { + u32 stride = vma->obj->stride; + unsigned tiling_y = vma->obj->tiling_mode == I915_TILING_Y; + u32 row_size = stride * (tiling_y ? 32 : 8); u32 size = (u32)vma->node.size / row_size * row_size; - u64 val; val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; val |= vma->node.start & 0xfffff000; - val |= (u64)((obj->stride / 128) - 1) << fence_pitch_shift; - if (obj->tiling_mode == I915_TILING_Y) + val |= (u64)((stride / 128) - 1) << fence_pitch_shift; + if (tiling_y) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; + } else + val = 0; - I915_WRITE(fence_reg_hi, val >> 32); - POSTING_READ(fence_reg_hi); + if (1) { + struct drm_i915_private *dev_priv = fence->i915; - I915_WRITE(fence_reg_lo, val); + /* To w/a incoherency with non-atomic 64-bit register updates, + * we split the 64-bit update into two 32-bit writes. In order + * for a partial fence not to be evaluated between writes, we + * precede the update with write to turn off the fence register, + * and only enable the fence as the last step. + * + * For extra levels of paranoia, we make sure each step lands + * before applying the next step. + */ + I915_WRITE(fence_reg_lo, 0); + POSTING_READ(fence_reg_lo); + + I915_WRITE(fence_reg_hi, upper_32_bits(val)); + I915_WRITE(fence_reg_lo, lower_32_bits(val)); POSTING_READ(fence_reg_lo); - } else { - I915_WRITE(fence_reg_hi, 0); - POSTING_READ(fence_reg_hi); } } -static void i915_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = dev->dev_private; u32 val; - if (obj) { - struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL); + if (vma) { int pitch_val; int tile_width; @@ -127,17 +126,18 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg, vma->map_and_fenceable, (long)vma->node.size); - if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) + if (vma->obj->tiling_mode == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(fence->i915)) tile_width = 128; else tile_width = 512; /* Note: pitch better be a power of two tile widths */ - pitch_val = obj->stride / tile_width; + pitch_val = vma->obj->stride / tile_width; pitch_val = ffs(pitch_val) - 1; val = vma->node.start; - if (obj->tiling_mode == I915_TILING_Y) + if (vma->obj->tiling_mode == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; val |= I915_FENCE_SIZE_BITS(vma->node.size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; @@ -145,18 +145,20 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg, } else val = 0; - I915_WRITE(FENCE_REG(reg), val); - POSTING_READ(FENCE_REG(reg)); + if (1) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); + I915_WRITE(reg, val); + POSTING_READ(reg); + } } -static void i830_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t val; + u32 val; - if (obj) { - struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL); + if (vma) { uint32_t pitch_val; WARN((vma->node.start & ~I830_FENCE_START_MASK) || @@ -165,11 +167,11 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, "object 0x%08lx not 512K or pot-size 0x%08lx aligned\n", (long)vma->node.start, (long)vma->node.size); - pitch_val = obj->stride / 128; + pitch_val = vma->obj->stride / 128; pitch_val = ffs(pitch_val) - 1; val = vma->node.start; - if (obj->tiling_mode == I915_TILING_Y) + if (vma->obj->tiling_mode == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; val |= I830_FENCE_SIZE_BITS(vma->node.size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; @@ -177,87 +179,85 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, } else val = 0; - I915_WRITE(FENCE_REG(reg), val); - POSTING_READ(FENCE_REG(reg)); + if (1) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); + I915_WRITE(reg, val); + POSTING_READ(reg); + } } -inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) +static void fence_write(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; -} + /* Previous access through the fence register is marshalled by + * the mb() inside the fault handlers (i915_gem_release_mmaps) + * and explicitly managed for internal users. + */ -static void i915_gem_write_fence(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + if (IS_GEN2(fence->i915)) + i830_write_fence_reg(fence, vma); + else if (IS_GEN3(fence->i915)) + i915_write_fence_reg(fence, vma); + else + i965_write_fence_reg(fence, vma); - /* Ensure that all CPU reads are completed before installing a fence - * and all writes before removing the fence. - */ - if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) - mb(); - - WARN(obj && (!obj->stride || !obj->tiling_mode), - "bogus fence setup with stride: 0x%x, tiling mode: %i\n", - obj->stride, obj->tiling_mode); - - if (IS_GEN2(dev)) - i830_write_fence_reg(dev, reg, obj); - else if (IS_GEN3(dev)) - i915_write_fence_reg(dev, reg, obj); - else if (INTEL_INFO(dev)->gen >= 4) - i965_write_fence_reg(dev, reg, obj); - - /* And similarly be paranoid that no direct access to this region - * is reordered to before the fence is installed. + /* Access through the fenced region afterwards is + * ordered by the posting reads whilst writing the registers. */ - if (i915_gem_object_needs_mb(obj)) - mb(); -} -static inline int fence_number(struct drm_i915_private *dev_priv, - struct drm_i915_fence_reg *fence) -{ - return fence - dev_priv->fence_regs; + fence->dirty = false; } -static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, - struct drm_i915_fence_reg *fence, - bool enable) +static int fence_update(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - int reg = fence_number(dev_priv, fence); + int ret; - i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); + if (vma) { + if (!vma->map_and_fenceable) + return -EINVAL; - if (enable) { - obj->fence_reg = reg; - fence->obj = obj; - list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); - } else { - obj->fence_reg = I915_FENCE_REG_NONE; - fence->obj = NULL; - list_del_init(&fence->lru_list); + if (WARN(!vma->obj->stride || !vma->obj->tiling_mode, + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", + vma->obj->stride, vma->obj->tiling_mode)) + return -EINVAL; + + ret = i915_wait_request(vma->last_fence.request); + if (ret) + return ret; } - obj->fence_dirty = false; -} -static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) -{ - if (obj->tiling_mode) - i915_gem_release_mmap(obj); + if (fence->vma) { + ret = i915_wait_request(fence->vma->last_fence.request); + if (ret) + return ret; + } - /* As we do not have an associated fence register, we will force - * a tiling change if we ever need to acquire one. - */ - obj->fence_dirty = false; - obj->fence_reg = I915_FENCE_REG_NONE; -} + if (fence->vma && fence->vma != vma) { + /* Ensure that all userspace CPU access is completed before + * stealing the fence. + */ + i915_gem_release_mmap(fence->vma->obj); -static int -i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) -{ - return i915_wait_request(obj->last_fence.request); + fence->vma->fence = NULL; + fence->vma = NULL; + + list_move(&fence->lru_list, &fence->i915->mm.fence_list); + } + + fence_write(fence, vma); + + if (vma) { + if (fence->vma != vma) { + vma->fence = fence; + fence->vma = vma; + } + + list_move_tail(&fence->lru_list, &fence->i915->mm.fence_list); + } + + return 0; } /** @@ -272,62 +272,32 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) * 0 on success, negative error code on failure. */ int -i915_gem_object_put_fence(struct drm_i915_gem_object *obj) +i915_vma_put_fence(struct i915_vma *vma) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct drm_i915_fence_reg *fence; - int ret; + struct drm_i915_fence_reg *fence = vma->fence; - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; - - if (obj->fence_reg == I915_FENCE_REG_NONE) + if (fence == NULL) return 0; - fence = &dev_priv->fence_regs[obj->fence_reg]; - if (WARN_ON(fence->pin_count)) return -EBUSY; - i915_gem_object_fence_lost(obj); - i915_gem_object_update_fence(obj, fence, false); - - return 0; + return fence_update(fence, NULL); } -static struct drm_i915_fence_reg * -i915_find_fence_reg(struct drm_device *dev) +static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_fence_reg *reg, *avail; - int i; - - /* First try to find a free reg */ - avail = NULL; - for (i = 0; i < dev_priv->num_fence_regs; i++) { - reg = &dev_priv->fence_regs[i]; - if (!reg->obj) - return reg; - - if (!reg->pin_count) - avail = reg; - } - - if (avail == NULL) - goto deadlock; + struct drm_i915_fence_reg *fence; - /* None available, try to steal one or wait for a user to finish */ - list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->pin_count) + list_for_each_entry(fence, &dev_priv->mm.fence_list, lru_list) { + if (fence->pin_count) continue; - return reg; + return fence; } -deadlock: /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(dev)) + if (intel_has_pending_fb_unpin(dev_priv->dev)) return ERR_PTR(-EAGAIN); return ERR_PTR(-EDEADLK); @@ -352,95 +322,27 @@ deadlock: * 0 on success, negative error code on failure. */ int -i915_gem_object_get_fence(struct drm_i915_gem_object *obj) +i915_vma_get_fence(struct i915_vma *vma) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - bool enable = obj->tiling_mode != I915_TILING_NONE; - struct drm_i915_fence_reg *reg; - int ret; - - /* Have we updated the tiling parameters upon the object and so - * will need to serialise the write to the associated fence register? - */ - if (obj->fence_dirty) { - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; - } + struct drm_i915_fence_reg *fence; + struct i915_vma *set = vma->obj->tiling_mode ? vma : NULL; /* Just update our place in the LRU if our fence is getting reused. */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - reg = &dev_priv->fence_regs[obj->fence_reg]; - if (!obj->fence_dirty) { - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); + if (vma->fence) { + fence = vma->fence; + if (!fence->dirty) { + list_move_tail(&fence->lru_list, + &fence->i915->mm.fence_list); return 0; } - } else if (enable) { - reg = i915_find_fence_reg(dev); - if (IS_ERR(reg)) - return PTR_ERR(reg); - - if (reg->obj) { - struct drm_i915_gem_object *old = reg->obj; - - ret = i915_gem_object_wait_fence(old); - if (ret) - return ret; - - i915_gem_object_fence_lost(old); - } + } else if (set) { + fence = fence_find(to_i915(vma->vm->dev)); + if (IS_ERR(fence)) + return PTR_ERR(fence); } else return 0; - i915_gem_object_update_fence(obj, reg, enable); - - return 0; -} - -/** - * i915_gem_object_pin_fence - pin fencing state - * @obj: object to pin fencing for - * - * This pins the fencing state (whether tiled or untiled) to make sure the - * object is ready to be used as a scanout target. Fencing status must be - * synchronize first by calling i915_gem_object_get_fence(): - * - * The resulting fence pin reference must be released again with - * i915_gem_object_unpin_fence(). - * - * Returns: - * - * True if the object has a fence, false otherwise. - */ -bool -i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - dev_priv->fence_regs[obj->fence_reg].pin_count++; - return true; - } else - return false; -} - -/** - * i915_gem_object_unpin_fence - unpin fencing state - * @obj: object to unpin fencing for - * - * This releases the fence pin reference acquired through - * i915_gem_object_pin_fence. It will handle both objects with and without an - * attached fence correctly, callers do not need to distinguish this. - */ -void -i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); - dev_priv->fence_regs[obj->fence_reg].pin_count--; - } + return fence_update(fence, set); } /** @@ -462,12 +364,7 @@ void i915_gem_restore_fences(struct drm_device *dev) * Commit delayed tiling changes if we have an object still * attached to the fence, otherwise just clear the fence. */ - if (reg->obj) { - i915_gem_object_update_fence(reg->obj, reg, - reg->obj->tiling_mode); - } else { - i915_gem_write_fence(dev, i, NULL); - } + fence_write(reg, reg->vma); } } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 59e7b11bf0ac..3db8cdf56dcc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3265,6 +3265,12 @@ i915_vma_retire(struct i915_gem_active *active, WARN_ON(i915_vma_unbind(vma)); } +static void +i915_vma_retire__fence(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ +} + static struct i915_vma * __i915_gem_vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -3282,6 +3288,7 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&vma->exec_list); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) init_request_active(&vma->last_read[i], i915_vma_retire); + init_request_active(&vma->last_fence, i915_vma_retire__fence); list_add(&vma->vm_link, &vm->unbound_list); vma->vm = vm; vma->obj = obj; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0e0570e13a68..c0ada0402335 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -36,7 +36,14 @@ #include "i915_gem_request.h" +#define I915_FENCE_REG_NONE -1 +#define I915_MAX_NUM_FENCES 32 +/* 32 fences + sign bit for FENCE_REG_NONE */ +#define I915_MAX_NUM_FENCE_BITS 6 + + struct drm_i915_file_private; +struct drm_i915_fence_reg; typedef uint32_t gen6_pte_t; typedef uint64_t gen8_pte_t; @@ -181,10 +188,12 @@ struct i915_vma { struct drm_mm_node node; struct drm_i915_gem_object *obj; struct i915_address_space *vm; + struct drm_i915_fence_reg *fence; void *iomap; u64 size; struct i915_gem_active last_read[I915_NUM_RINGS]; + struct i915_gem_active last_fence; union { struct { diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 7c2da8060757..57aab59c6a5c 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -113,12 +113,37 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) return true; } +static bool i915_vma_fence_ok(struct i915_vma *vma, int tiling_mode) +{ + u32 size; + + if (!vma->map_and_fenceable) + return true; + + if (INTEL_INFO(vma->vm->dev)->gen == 3) { + if (vma->node.start & ~I915_FENCE_START_MASK) + return false; + } else { + if (vma->node.start & ~I830_FENCE_START_MASK) + return false; + } + + size = i915_gem_get_gtt_size(vma->vm->dev, vma->size, tiling_mode); + if (vma->node.size < size) + return false; + + if (vma->node.start & (size - 1)) + return false; + + return true; +} + /* Is the current GTT allocation valid for the change in tiling? */ static int i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) { struct i915_vma *vma; - u32 size; + int ret; if (tiling_mode == I915_TILING_NONE) return 0; @@ -126,32 +151,16 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) if (INTEL_INFO(obj->base.dev)->gen >= 4) return 0; - vma = i915_gem_object_to_ggtt(obj, NULL); - if (vma == NULL) - return 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (i915_vma_fence_ok(vma, tiling_mode)) + continue; - if (!vma->map_and_fenceable) - return 0; - - if (INTEL_INFO(obj->base.dev)->gen == 3) { - if (vma->node.start & ~I915_FENCE_START_MASK) - goto bad; - } else { - if (vma->node.start & ~I830_FENCE_START_MASK) - goto bad; + ret = i915_vma_unbind(vma); + if (ret) + return ret; } - size = i915_gem_get_gtt_size(obj->base.dev, vma->size, tiling_mode); - if (vma->node.size < size) - goto bad; - - if (vma->node.start & (size - 1)) - goto bad; - return 0; - -bad: - return i915_vma_unbind(vma); } /** @@ -240,6 +249,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, */ ret = i915_gem_object_fence_ok(obj, args->tiling_mode); if (ret == 0) { + struct i915_vma *vma; + if (obj->pages && obj->madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { @@ -249,10 +260,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, i915_gem_object_pin_pages(obj); } - obj->fence_dirty = - obj->last_fence.request || - obj->fence_reg != I915_FENCE_REG_NONE; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!vma->fence) + continue; + vma->fence->dirty = true; + } obj->tiling_mode = args->tiling_mode; obj->stride = args->stride; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 69ce355e00ea..e5907ac666ad 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -711,7 +711,7 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; - err->fence_reg = obj->fence_reg; + err->fence_reg = vma->fence ? vma->fence->id : -1; err->tiling = obj->tiling_mode; err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 218bfd3c99fc..13d283e4b0a3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2333,7 +2333,6 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane, struct i915_ggtt_view view; struct i915_vma *vma; u32 alignment; - int ret; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -2381,43 +2380,33 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane, intel_runtime_pm_get(dev_priv); vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_pm; - } + if (IS_ERR(vma)) + goto err; - /* Install a fence for tiled scan-out. Pre-i965 always needs a - * fence, whereas 965+ only requires a fence if using - * framebuffer compression. For simplicity, we always install - * a fence as the cost is not that onerous. - */ if (vma->map_and_fenceable) { - ret = i915_gem_object_get_fence(obj); - if (ret == -EDEADLK) { - /* - * -EDEADLK means there are no free fences - * no pending flips. - * - * This is propagated to atomic, but it uses - * -EDEADLK to force a locking recovery, so - * change the returned error to -EBUSY. - */ - ret = -EBUSY; - goto err_unpin; - } else if (ret) - goto err_unpin; - - i915_gem_object_pin_fence(obj); + /* Install a fence for tiled scan-out. Pre-i965 always needs a + * fence, whereas 965+ only requires a fence if using + * framebuffer compression. For simplicity, we always, when + * possible, install a fence as the cost is not that onerous. + * + * If we fail to fence the tiled scanout, then either the + * modeset will reject the change (which is highly unlikely as + * the affected systems, all but one, do not have unmappable + * space) or we will not be able to enable full powersaving + * techniques (also likely not to apply due to various limits + * FBC and the like impose on the size of the buffer, which + * presumably we violated anyway with this unmappable buffer). + * Anyway, it is presumably better to stumble onwards with + * something and try to run the system in a "less than optimal" + * mode that matches the user configuration. + */ + if (i915_vma_get_fence(vma) == 0) + i915_vma_pin_fence(vma); } +err: intel_runtime_pm_put(dev_priv); return vma; - -err_unpin: - i915_gem_object_unpin_from_display_plane(vma); -err_pm: - intel_runtime_pm_put(dev_priv); - return ERR_PTR(ret); } static void intel_unpin_fb_obj(struct drm_framebuffer *fb, @@ -2432,9 +2421,7 @@ static void intel_unpin_fb_obj(struct drm_framebuffer *fb, intel_fill_fb_ggtt_view(&view, fb, state); vma = i915_gem_object_to_ggtt(obj, &view); - if (vma->map_and_fenceable) - i915_gem_object_unpin_fence(obj); - + i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); } diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 8d8f1ce7f1ae..db48e3ccd7f7 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -130,11 +130,17 @@ static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv) } } +/* XXX replace me when we have VMA tracking for intel_plane_state */ +static int get_fence_id(struct drm_framebuffer *fb) +{ + struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL); + return vma->fence ? vma->fence->id : I915_FENCE_REG_NONE; +} + static void i8xx_fbc_activate(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = crtc->base.dev->dev_private; struct drm_framebuffer *fb = crtc->base.primary->fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); int cfb_pitch; int i; u32 fbc_ctl; @@ -173,7 +179,7 @@ static void i8xx_fbc_activate(struct intel_crtc *crtc) if (IS_I945GM(dev_priv)) fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; - fbc_ctl |= obj->fence_reg; + fbc_ctl |= get_fence_id(fb); I915_WRITE(FBC_CONTROL, fbc_ctl); } @@ -186,7 +192,6 @@ static void g4x_fbc_activate(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = crtc->base.dev->dev_private; struct drm_framebuffer *fb = crtc->base.primary->fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 dpfc_ctl; dev_priv->fbc.active = true; @@ -196,7 +201,8 @@ static void g4x_fbc_activate(struct intel_crtc *crtc) dpfc_ctl |= DPFC_CTL_LIMIT_2X; else dpfc_ctl |= DPFC_CTL_LIMIT_1X; - dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg; + dpfc_ctl |= get_fence_id(fb); + dpfc_ctl |= DPFC_CTL_FENCE_EN; I915_WRITE(DPFC_FENCE_YOFF, get_crtc_fence_y_offset(crtc)); @@ -234,7 +240,6 @@ static void ilk_fbc_activate(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = crtc->base.dev->dev_private; struct drm_framebuffer *fb = crtc->base.primary->fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 dpfc_ctl; int threshold = dev_priv->fbc.threshold; unsigned int y_offset; @@ -257,19 +262,21 @@ static void ilk_fbc_activate(struct intel_crtc *crtc) dpfc_ctl |= DPFC_CTL_LIMIT_1X; break; } - dpfc_ctl |= DPFC_CTL_FENCE_EN; if (IS_GEN5(dev_priv)) - dpfc_ctl |= obj->fence_reg; + dpfc_ctl |= get_fence_id(fb); + dpfc_ctl |= DPFC_CTL_FENCE_EN; y_offset = get_crtc_fence_y_offset(crtc); I915_WRITE(ILK_DPFC_FENCE_YOFF, y_offset); - I915_WRITE(ILK_FBC_RT_BASE, i915_gem_object_ggtt_offset(obj, NULL) | ILK_FBC_RT_VALID); + I915_WRITE(ILK_FBC_RT_BASE, + i915_gem_object_ggtt_offset(intel_fb_obj(fb), NULL) | + ILK_FBC_RT_VALID); /* enable it... */ I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); if (IS_GEN6(dev_priv)) { I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | obj->fence_reg); + SNB_CPU_FENCE_ENABLE | get_fence_id(fb)); I915_WRITE(DPFC_CPU_FENCE_OFFSET, y_offset); } @@ -299,7 +306,6 @@ static void gen7_fbc_activate(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = crtc->base.dev->dev_private; struct drm_framebuffer *fb = crtc->base.primary->fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 dpfc_ctl; int threshold = dev_priv->fbc.threshold; @@ -345,7 +351,7 @@ static void gen7_fbc_activate(struct intel_crtc *crtc) I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | obj->fence_reg); + SNB_CPU_FENCE_ENABLE | get_fence_id(fb)); I915_WRITE(DPFC_CPU_FENCE_OFFSET, get_crtc_fence_y_offset(crtc)); intel_fbc_recompress(dev_priv); @@ -781,7 +787,7 @@ static void __intel_fbc_update(struct intel_crtc *crtc) * by the CPU to the scanout and trigger updates to the FBC. */ if (obj->tiling_mode != I915_TILING_X || - obj->fence_reg == I915_FENCE_REG_NONE) { + get_fence_id(fb) == I915_FENCE_REG_NONE) { set_no_fbc_reason(dev_priv, "framebuffer not tiled or fenced"); goto out_disable; } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 8e7c341951fd..0c8de9420776 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -282,7 +282,7 @@ static int intelfb_create(struct drm_fb_helper *helper, out_destroy_fbi: drm_fb_helper_release_fbi(helper); out_unpin: - i915_gem_object_unpin_fence(vma->obj); + i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); out_unlock: mutex_unlock(&dev->struct_mutex); @@ -523,7 +523,7 @@ static void intel_fbdev_destroy(struct drm_device *dev, struct intel_fbdev *ifbdev) { if (ifbdev->vma) { - i915_gem_object_unpin_fence(ifbdev->vma->obj); + i915_vma_unpin_fence(ifbdev->vma); i915_gem_object_unpin_from_display_plane(ifbdev->vma); } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index d1401f4c4762..97b75414263d 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -754,7 +754,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (IS_ERR(vma)) return PTR_ERR(vma); - ret = i915_gem_object_put_fence(new_bo); + ret = i915_vma_put_fence(vma); if (ret) goto out_unpin; -- 2.7.0.rc3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx