In the next patch, request tracking is made more generic and for that we need a new expanded struct and to separate out the logic changes from the mechanical churn, we split out the structure renaming into this patch. v2: Writer's block. Add some spiel about why we track requests. v3: Now i915_gem_active. v4: Now with i915_gem_active_set() for attaching to the active request. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 11 +++--- drivers/gpu/drm/i915/i915_drv.h | 9 +++-- drivers/gpu/drm/i915/i915_gem.c | 58 +++++++++++++++--------------- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 8 +++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-- drivers/gpu/drm/i915/i915_gem_fence.c | 6 ++-- drivers/gpu/drm/i915/i915_gem_request.h | 41 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 7 ++-- drivers/gpu/drm/i915/intel_display.c | 8 ++--- 11 files changed, 98 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d3852828878f..dd832eace487 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -155,10 +155,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.write_domain); for_each_engine_id(engine, dev_priv, id) seq_printf(m, "%x ", - i915_gem_request_get_seqno(obj->last_read_req[id])); + i915_gem_request_get_seqno(obj->last_read[id].request)); seq_printf(m, "] %x %x%s%s%s", - i915_gem_request_get_seqno(obj->last_write_req), - i915_gem_request_get_seqno(obj->last_fenced_req), + i915_gem_request_get_seqno(obj->last_write.request), + i915_gem_request_get_seqno(obj->last_fence.request), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -195,9 +195,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) *t = '\0'; seq_printf(m, " (%s mappable)", s); } - if (obj->last_write_req != NULL) - seq_printf(m, " (%s)", - i915_gem_request_get_engine(obj->last_write_req)->name); + if (obj->last_write.request) + seq_printf(m, " (%s)", obj->last_write.request->engine->name); if (obj->frontbuffer_bits) seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 27e1182544a2..5b096c65646c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2238,11 +2238,10 @@ struct drm_i915_gem_object { * requests on one ring where the write request is older than the * read request. This allows for the CPU to read from an active * buffer by only waiting for the write to complete. - * */ - struct drm_i915_gem_request *last_read_req[I915_NUM_ENGINES]; - struct drm_i915_gem_request *last_write_req; - /** Breadcrumb of last fenced GPU access to the buffer. */ - struct drm_i915_gem_request *last_fenced_req; + */ + struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_write; + struct i915_gem_active last_fence; /** Current tiling stride for the object, if it's tiled. */ uint32_t stride; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b6497e9961ab..5f302faf86e7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1353,23 +1353,23 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, int ret, i; if (readonly) { - if (obj->last_write_req != NULL) { - ret = i915_wait_request(obj->last_write_req); + if (obj->last_write.request) { + ret = i915_wait_request(obj->last_write.request); if (ret) return ret; - i = obj->last_write_req->engine->id; - if (obj->last_read_req[i] == obj->last_write_req) + i = obj->last_write.request->engine->id; + if (obj->last_read[i].request == obj->last_write.request) i915_gem_object_retire__read(obj, i); else i915_gem_object_retire__write(obj); } } else { for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) + if (!obj->last_read[i].request) continue; - ret = i915_wait_request(obj->last_read_req[i]); + ret = i915_wait_request(obj->last_read[i].request); if (ret) return ret; @@ -1397,9 +1397,9 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj, { int ring = req->engine->id; - if (obj->last_read_req[ring] == req) + if (obj->last_read[ring].request == req) i915_gem_object_retire__read(obj, ring); - else if (obj->last_write_req == req) + else if (obj->last_write.request == req) i915_gem_object_retire__write(obj); if (!i915_reset_in_progress(&req->i915->gpu_error)) @@ -1428,7 +1428,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (readonly) { struct drm_i915_gem_request *req; - req = obj->last_write_req; + req = obj->last_write.request; if (req == NULL) return 0; @@ -1437,7 +1437,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req == NULL) continue; @@ -2375,7 +2375,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->active |= intel_engine_flag(engine); list_move_tail(&obj->engine_list[engine->id], &engine->active_list); - i915_gem_request_assign(&obj->last_read_req[engine->id], req); + i915_gem_active_set(&obj->last_read[engine->id], req); list_move_tail(&vma->vm_link, &vma->vm->active_list); } @@ -2383,10 +2383,10 @@ void i915_vma_move_to_active(struct i915_vma *vma, static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj) { - GEM_BUG_ON(obj->last_write_req == NULL); - GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); + GEM_BUG_ON(!obj->last_write.request); + GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write.request->engine))); - i915_gem_request_assign(&obj->last_write_req, NULL); + i915_gem_request_assign(&obj->last_write.request, NULL); intel_fb_obj_flush(obj, true, ORIGIN_CS); } @@ -2395,13 +2395,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) { struct i915_vma *vma; - GEM_BUG_ON(obj->last_read_req[ring] == NULL); + GEM_BUG_ON(!obj->last_read[ring].request); GEM_BUG_ON(!(obj->active & (1 << ring))); list_del_init(&obj->engine_list[ring]); - i915_gem_request_assign(&obj->last_read_req[ring], NULL); + i915_gem_request_assign(&obj->last_read[ring].request, NULL); - if (obj->last_write_req && obj->last_write_req->engine->id == ring) + if (obj->last_write.request && obj->last_write.request->engine->id == ring) i915_gem_object_retire__write(obj); obj->active &= ~(1 << ring); @@ -2420,7 +2420,7 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) list_move_tail(&vma->vm_link, &vma->vm->inactive_list); } - i915_gem_request_assign(&obj->last_fenced_req, NULL); + i915_gem_request_assign(&obj->last_fence.request, NULL); i915_gem_object_put(obj); } @@ -2618,7 +2618,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) struct drm_i915_gem_object, engine_list[engine->id]); - if (!list_empty(&obj->last_read_req[engine->id]->list)) + if (!list_empty(&obj->last_read[engine->id].request->list)) break; i915_gem_object_retire__read(obj, engine->id); @@ -2746,7 +2746,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req == NULL) continue; @@ -2822,10 +2822,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_object_put(obj); for (i = 0; i < I915_NUM_ENGINES; i++) { - if (obj->last_read_req[i] == NULL) + if (!obj->last_read[i].request) continue; - req[n++] = i915_gem_request_get(obj->last_read_req[i]); + req[n++] = i915_gem_request_get(obj->last_read[i].request); } mutex_unlock(&dev->struct_mutex); @@ -2916,12 +2916,12 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, n = 0; if (readonly) { - if (obj->last_write_req) - req[n++] = obj->last_write_req; + if (obj->last_write.request) + req[n++] = obj->last_write.request; } else { for (i = 0; i < I915_NUM_ENGINES; i++) - if (obj->last_read_req[i]) - req[n++] = obj->last_read_req[i]; + if (obj->last_read[i].request) + req[n++] = obj->last_read[i].request; } for (i = 0; i < n; i++) { ret = __i915_gem_object_sync(obj, to, req[i]); @@ -4022,12 +4022,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req) args->busy |= 1 << (16 + req->engine->exec_id); } - if (obj->last_write_req) - args->busy |= obj->last_write_req->engine->exec_id; + if (obj->last_write.request) + args->busy |= obj->last_write.request->engine->exec_id; } unref: diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 85a5c86ba80a..aa767ca28532 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -227,13 +227,15 @@ static void export_fences(struct drm_i915_gem_object *obj, { struct reservation_object *resv = dma_buf->resv; struct drm_i915_gem_request *req; + unsigned long active; int idx; mutex_lock(&obj->base.dev->struct_mutex); mutex_lock(&resv->lock.base); - for (idx = 0; idx < ARRAY_SIZE(obj->last_read_req); idx++) { - req = obj->last_read_req[idx]; + active = obj->active; + for_each_active(active, idx) { + req = obj->last_read[idx].request; if (!req) continue; @@ -241,7 +243,7 @@ static void export_fences(struct drm_i915_gem_object *obj, reservation_object_add_shared_fence(resv, &req->fence); } - req = obj->last_write_req; + req = obj->last_write.request; if (req) reservation_object_add_excl_fence(resv, &req->fence); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 5bfb2e2f3e67..22261a511d7d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1143,7 +1143,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, req); eb_export_fence(obj, req); if (obj->base.write_domain) { - i915_gem_request_assign(&obj->last_write_req, req); + i915_gem_active_set(&obj->last_write, req); intel_fb_obj_invalidate(obj, ORIGIN_CS); @@ -1151,7 +1151,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - i915_gem_request_assign(&obj->last_fenced_req, req); + i915_gem_active_set(&obj->last_fence, req); if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { struct drm_i915_private *dev_priv = engine->i915; list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 251d7a95af89..9838046801bd 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -261,12 +261,12 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) static int i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) { - if (obj->last_fenced_req) { - int ret = i915_wait_request(obj->last_fenced_req); + if (obj->last_fence.request) { + int ret = i915_wait_request(obj->last_fence.request); if (ret) return ret; - i915_gem_request_assign(&obj->last_fenced_req, NULL); + i915_gem_request_assign(&obj->last_fence.request, NULL); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 928fc8baf435..ff8c54fa955f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -247,4 +247,45 @@ static inline bool i915_spin_request(const struct drm_i915_gem_request *request, __i915_spin_request(request, state, timeout_us)); } +/* We treat requests as fences. This is not be to confused with our + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. + * We use the fences to synchronize access from the CPU with activity on the + * GPU, for example, we should not rewrite an object's PTE whilst the GPU + * is reading them. We also track fences at a higher level to provide + * implicit synchronisation around GEM objects, e.g. set-domain will wait + * for outstanding GPU rendering before marking the object ready for CPU + * access, or a pageflip will wait until the GPU is complete before showing + * the frame on the scanout. + * + * In order to use a fence, the object must track the fence it needs to + * serialise with. For example, GEM objects want to track both read and + * write access so that we can perform concurrent read operations between + * the CPU and GPU engines, as well as waiting for all rendering to + * complete, or waiting for the last GPU user of a "fence register". The + * object then embeds a @i915_gem_active to track the most recent (in + * retirment order) request relevant for the desired mode of access. + * The @i915_gem_active is updated with i915_gem_request_mark_active() to + * track the most recent fence request, typically this is done as part of + * i915_vma_move_to_active(). + * + * When the @i915_gem_active completes (is retired), it will + * signal its completion to the owner through a callback as well as mark + * itself as idle (i915_gem_active.request == NULL). The owner + * can then perform any action, such as delayed freeing of an active + * resource including itself. + */ +struct i915_gem_active { + struct drm_i915_gem_request *request; +}; + +static inline void +i915_gem_active_set(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + i915_gem_request_assign(&active->request, request); +} + +#define for_each_active(mask, idx) \ + for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~(1 << idx)) + #endif /* I915_GEM_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index e83fc2d0433c..00d796da65fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -242,7 +242,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } obj->fence_dirty = - obj->last_fenced_req || + obj->last_fence.request || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e935b327f3f9..32f50a70ea42 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -74,7 +74,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj) for (i = 0; i < I915_NUM_ENGINES; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req == NULL) continue; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2fbe81d51af1..5e12b8ee49d2 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -749,8 +749,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->size = obj->base.size; err->name = obj->base.name; for (i = 0; i < I915_NUM_ENGINES; i++) - err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]); - err->wseqno = i915_gem_request_get_seqno(obj->last_write_req); + err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read[i].request); + err->wseqno = i915_gem_request_get_seqno(obj->last_write.request); err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; @@ -762,8 +762,7 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; - err->ring = obj->last_write_req ? - i915_gem_request_get_engine(obj->last_write_req)->id : -1; + err->ring = obj->last_write.request ? obj->last_write.request->engine->id : -1; err->cache_level = obj->cache_level; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5f4bdc3d4972..27ac6db4e26a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11465,7 +11465,7 @@ static bool use_mmio_flip(struct intel_engine_cs *engine, if (resv && !reservation_object_test_signaled_rcu(resv, false)) return true; - return engine != i915_gem_request_get_engine(obj->last_write_req); + return engine != i915_gem_request_get_engine(obj->last_write.request); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@ -11768,7 +11768,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { engine = &dev_priv->engine[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - engine = i915_gem_request_get_engine(obj->last_write_req); + engine = i915_gem_request_get_engine(obj->last_write.request); if (engine == NULL || engine->id != RCS) engine = &dev_priv->engine[BCS]; } else { @@ -11790,7 +11790,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); i915_gem_request_assign(&work->flip_queued_req, - obj->last_write_req); + obj->last_write.request); schedule_work(&work->mmio_work); } else { @@ -14096,7 +14096,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, to_intel_plane_state(new_state); i915_gem_request_assign(&plane_state->wait_req, - obj->last_write_req); + obj->last_write.request); } return ret; -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx