--- drivers/gpu/drm/i915/i915_debugfs.c | 162 +--------------- drivers/gpu/drm/i915/i915_drv.h | 84 ++++++--- drivers/gpu/drm/i915/i915_gem.c | 150 ++++----------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +- drivers/gpu/drm/i915/i915_gem_request.c | 286 +++++++++++++++-------------- drivers/gpu/drm/i915/i915_gem_request.h | 5 + drivers/gpu/drm/i915/i915_gpu_error.c | 60 +----- drivers/gpu/drm/i915/intel_engine_cs.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 3 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 1 - drivers/gpu/drm/i915/intel_ringbuffer.h | 14 +- 12 files changed, 251 insertions(+), 522 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0c8aeed4ca8f..6cf921ebfb60 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -366,28 +366,6 @@ static int per_file_stats(int id, void *ptr, void *data) stats.unbound); \ } while (0) -static void print_batch_pool_stats(struct seq_file *m, - struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *obj; - struct file_stats stats; - struct intel_engine_cs *engine; - int j; - - memset(&stats, 0, sizeof(stats)); - - for_each_engine(engine, dev_priv) { - for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) { - list_for_each_entry(obj, - &engine->batch_pool.cache_list[j], - batch_pool_link) - per_file_stats(0, obj, &stats); - } - } - - print_file_stats(m, "[k]batch pool", stats); -} - static int per_file_ctx_stats(int id, void *ptr, void *data) { struct i915_gem_context *ctx = ptr; @@ -545,7 +523,6 @@ static int i915_gem_object_info(struct seq_file *m, void* data) ggtt->base.total, ggtt->mappable_end - ggtt->base.start); seq_putc(m, '\n'); - print_batch_pool_stats(m, dev_priv); mutex_unlock(&dev->struct_mutex); mutex_lock(&dev->filelist_mutex); @@ -655,10 +632,9 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) if (work->flip_queued_req) { struct intel_engine_cs *engine = i915_gem_request_get_engine(work->flip_queued_req); - seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n", + seq_printf(m, "Flip queued on %s at seqno %x, current breadcrumb %x, completed? %d\n", engine->name, i915_gem_request_get_seqno(work->flip_queued_req), - dev_priv->next_seqno, intel_engine_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); } else @@ -688,99 +664,6 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) return 0; } -static int i915_gem_batch_pool_info(struct seq_file *m, void *data) -{ - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - int total = 0; - int ret, j; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - for_each_engine(engine, dev_priv) { - for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) { - int count; - - count = 0; - list_for_each_entry(obj, - &engine->batch_pool.cache_list[j], - batch_pool_link) - count++; - seq_printf(m, "%s cache[%d]: %d objects\n", - engine->name, j, count); - - list_for_each_entry(obj, - &engine->batch_pool.cache_list[j], - batch_pool_link) { - seq_puts(m, " "); - describe_obj(m, obj); - seq_putc(m, '\n'); - } - - total += count; - } - } - - seq_printf(m, "total: %d\n", total); - - mutex_unlock(&dev->struct_mutex); - - return 0; -} - -static int i915_gem_request_info(struct seq_file *m, void *data) -{ - struct drm_info_node *node = m->private; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_engine_cs *engine; - struct drm_i915_gem_request *req; - int ret, any; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - any = 0; - for_each_engine(engine, dev_priv) { - int count; - - count = 0; - list_for_each_entry(req, &engine->request_list, link) - count++; - if (count == 0) - continue; - - seq_printf(m, "%s requests: %d\n", engine->name, count); - list_for_each_entry(req, &engine->request_list, link) { - struct pid *pid = req->ctx->pid; - struct task_struct *task; - - rcu_read_lock(); - task = pid ? pid_task(pid, PIDTYPE_PID) : NULL; - seq_printf(m, " %x @ %d: %s [%d]\n", - req->fence.seqno, - (int) (jiffies - req->emitted_jiffies), - task ? task->comm : "<unknown>", - task ? task->pid : -1); - rcu_read_unlock(); - } - - any++; - } - mutex_unlock(&dev->struct_mutex); - - if (any == 0) - seq_puts(m, "No requests\n"); - - return 0; -} - static void i915_ring_seqno_info(struct seq_file *m, struct intel_engine_cs *engine) { @@ -1151,43 +1034,6 @@ static const struct file_operations i915_error_state_fops = { .release = i915_error_state_release, }; -static int -i915_next_seqno_get(void *data, u64 *val) -{ - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - *val = dev_priv->next_seqno; - mutex_unlock(&dev->struct_mutex); - - return 0; -} - -static int -i915_next_seqno_set(void *data, u64 val) -{ - struct drm_device *dev = data; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - ret = i915_gem_set_seqno(dev, val); - mutex_unlock(&dev->struct_mutex); - - return ret; -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, - i915_next_seqno_get, i915_next_seqno_set, - "0x%llx\n"); - static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_info_node *node = m->private; @@ -2353,8 +2199,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) struct drm_file *file; seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); - seq_printf(m, "GPU busy? %s [%x]\n", - yesno(dev_priv->gt.awake), dev_priv->gt.active_engines); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n", intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), @@ -2693,7 +2537,6 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) if (!HAS_RUNTIME_PM(dev_priv)) seq_puts(m, "Runtime power management not supported\n"); - seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake)); seq_printf(m, "IRQs disabled: %s\n", yesno(!intel_irqs_enabled(dev_priv))); #ifdef CONFIG_PM @@ -5284,7 +5127,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST}, {"i915_gem_stolen", i915_gem_stolen_list_info }, {"i915_gem_pageflip", i915_gem_pageflip_info, 0}, - {"i915_gem_request", i915_gem_request_info, 0}, {"i915_gem_seqno", i915_gem_seqno_info, 0}, {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, {"i915_gem_interrupt", i915_interrupt_info, 0}, @@ -5292,7 +5134,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_gem_hws_blt", i915_hws_info, 0, (void *)BCS}, {"i915_gem_hws_bsd", i915_hws_info, 0, (void *)VCS}, {"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS}, - {"i915_gem_batch_pool", i915_gem_batch_pool_info, 0}, {"i915_guc_info", i915_guc_info, 0}, {"i915_guc_load_status", i915_guc_load_status_info, 0}, {"i915_guc_log_dump", i915_guc_log_dump, 0}, @@ -5344,7 +5185,6 @@ static const struct i915_debugfs_files { {"i915_ring_test_irq", &i915_ring_test_irq_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, {"i915_error_state", &i915_error_state_fops}, - {"i915_next_seqno", &i915_next_seqno_fops}, {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, {"i915_pri_wm_latency", &i915_pri_wm_latency_fops}, {"i915_spr_wm_latency", &i915_spr_wm_latency_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 27381531ac1c..9ea15fa3b4f2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -867,6 +867,58 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_vma; + +struct i915_timeline { + u32 id; + u32 next_seqno; + + struct mutex mutex; + struct list_head link; + struct drm_i915_private *i915; + + struct i915_timeline_engine { + u32 mask; + + struct i915_timeline *timeline; + struct list_head requests; + + u32 last_submitted_seqno; + + /* + * A pool of objects to use as shadow copies of client batch + * buffers when the command parser is enabled. Prevents the + * client from modifying the batch contents after software + * parsing. + */ + struct i915_gem_batch_pool batch_pool; + } engine[I915_NUM_ENGINES]; + + struct { + struct i915_vma *vma; + uint32_t *map; + } hws; + + /** + * Is the GPU currently considered idle, or busy executing + * userspace requests? Whilst idle, we allow runtime power + * management to power down the hardware and display clocks. + * In order to reduce the effect on performance, there + * is a slight delay before we do so. + */ + unsigned active_engines; + + /** + * We leave the user IRQ off as much as possible, + * but this means that requests will finish and never + * be retired once the system goes idle. Set a timer to + * fire periodically while the ring is running. When it + * fires, go retire requests. + */ + struct delayed_work retire_work; +}; + + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -1791,8 +1843,8 @@ struct drm_i915_private { struct pci_dev *bridge_dev; struct i915_gem_context *kernel_context; struct intel_engine_cs engine[I915_NUM_ENGINES]; + struct i915_timeline kernel_timeline; struct i915_vma *semaphore_vma; - u32 next_seqno; struct drm_dma_handle *status_page_dmah; struct resource mch_res; @@ -2042,33 +2094,9 @@ struct drm_i915_private { struct { void (*cleanup_engine)(struct intel_engine_cs *engine); - /** - * Is the GPU currently considered idle, or busy executing - * userspace requests? Whilst idle, we allow runtime power - * management to power down the hardware and display clocks. - * In order to reduce the effect on performance, there - * is a slight delay before we do so. - */ - unsigned int active_engines; - bool awake; - - /** - * We leave the user IRQ off as much as possible, - * but this means that requests will finish and never - * be retired once the system goes idle. Set a timer to - * fire periodically while the ring is running. When it - * fires, go retire requests. - */ - struct delayed_work retire_work; + struct list_head timelines; - /** - * When we detect an idle GPU, we want to turn on - * powersaving features. So once we see that there - * are no more requests outstanding and no more - * arrive within a small period of time, we fire - * off the idle_work. - */ - struct delayed_work idle_work; + atomic_t active; } gt; /* perform PHY state sanity checks? */ @@ -3213,7 +3241,7 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); struct drm_i915_gem_request * -i915_gem_find_active_request(struct intel_engine_cs *engine); +i915_gem_find_active_request(struct i915_timeline_engine *te); void i915_gem_retire_requests(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 666bf3aa1790..d915ab8b9d95 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2409,7 +2409,7 @@ static void i915_set_reset_status(struct i915_gem_context *ctx, } struct drm_i915_gem_request * -i915_gem_find_active_request(struct intel_engine_cs *engine) +i915_gem_find_active_request(struct i915_timeline_engine *te) { struct drm_i915_gem_request *request; @@ -2421,7 +2421,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - list_for_each_entry(request, &engine->request_list, link) { + list_for_each_entry(request, &te->requests, link) { if (i915_gem_request_completed(request)) continue; @@ -2431,35 +2431,37 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) return NULL; } -static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) +static void i915_gem_reset_timeline_status(struct i915_timeline *timeline) { - struct drm_i915_gem_request *request; + int i; - request = i915_gem_find_active_request(engine); - if (request == NULL) - return; + for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { + struct i915_timeline_engine *te = &timeline->engine[i]; + struct drm_i915_gem_request *request; + + if (list_empty(&te->requests)) + continue; + + request = i915_gem_find_active_request(te); + if (request) { + i915_set_reset_status(request->ctx, + i915_gem_request_started(request)); + list_for_each_entry_continue(request, &te->requests, link) + i915_set_reset_status(request->ctx, false); + } - i915_set_reset_status(request->ctx, - i915_gem_request_started(request)); - list_for_each_entry_continue(request, &engine->request_list, link) - i915_set_reset_status(request->ctx, false); + request = list_last_entry(&te->requests, + struct drm_i915_gem_request, + link); + i915_gem_request_retire_upto(request); + //intel_engine_init_seqno(te, te->last_submitted_seqno); + } } static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; struct intel_ring *ring; - request = i915_gem_active_peek(&engine->last_request, - &engine->i915->drm.struct_mutex); - - /* Mark all pending requests as complete so that any concurrent - * (lockless) lookup doesn't try and wait upon the request as we - * reset it. - */ - if (request) - intel_engine_init_seqno(engine, request->fence.seqno); - /* * Clear the execlists queue up before freeing the requests, as those * are the ones that keep the context and ringbuffer backing objects @@ -2477,17 +2479,6 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) NULL); } - /* - * We must free the requests after all the corresponding objects have - * been moved off active lists. Which is the same order as the normal - * retire_requests function does. This is important if object hold - * implicit references on things like e.g. ppgtt address spaces through - * the request. - */ - if (request) - i915_gem_request_retire_upto(request); - GEM_BUG_ON(intel_engine_is_active(engine)); - /* Having flushed all requests from all queues, we know that all * ringbuffers must now be empty. However, since we do not reclaim * all space when retiring the request (to prevent HEADs colliding @@ -2500,94 +2491,32 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) intel_ring_update_space(ring); } - engine->i915->gt.active_engines &= ~intel_engine_flag(engine); + //engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } void i915_gem_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine; + struct i915_timeline *timeline; /* * Before we free the objects from the requests, we need to inspect * them for finding the guilty party. As the requests only borrow * their reference to the objects, the inspection must be done first. */ - for_each_engine(engine, dev_priv) - i915_gem_reset_engine_status(engine); + list_for_each_entry(timeline, &dev_priv->gt.timelines, link) + i915_gem_reset_timeline_status(timeline); for_each_engine(engine, dev_priv) i915_gem_reset_engine_cleanup(engine); - mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); + //mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); i915_gem_context_reset(dev); i915_gem_restore_fences(dev); } -static void -i915_gem_retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), gt.retire_work.work); - struct drm_device *dev = &dev_priv->drm; - - /* Come back later if the device is busy... */ - if (mutex_trylock(&dev->struct_mutex)) { - i915_gem_retire_requests(dev_priv); - mutex_unlock(&dev->struct_mutex); - } - - /* Keep the retire handler running until we are finally idle. - * We do not need to do this test under locking as in the worst-case - * we queue the retire worker once too often. - */ - if (READ_ONCE(dev_priv->gt.awake)) { - //i915_queue_hangcheck(dev_priv); /* XXX */ - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); - } -} - -static void -i915_gem_idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), gt.idle_work.work); - struct drm_device *dev = &dev_priv->drm; - struct intel_engine_cs *engine; - - if (!READ_ONCE(dev_priv->gt.awake)) - return; - - if (READ_ONCE(dev_priv->gt.active_engines)) - return; - - if (!mutex_trylock(&dev->struct_mutex)) { - /* Currently busy, come back later */ - mod_delayed_work(dev_priv->wq, - &dev_priv->gt.idle_work, - msecs_to_jiffies(50)); - return; - } - - if (dev_priv->gt.active_engines) - goto out_unlock; - - for_each_engine(engine, dev_priv) - i915_gem_batch_pool_fini(&engine->batch_pool); - - GEM_BUG_ON(!dev_priv->gt.awake); - dev_priv->gt.awake = false; - - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_idle(dev_priv); - intel_runtime_pm_put(dev_priv); -out_unlock: - mutex_unlock(&dev->struct_mutex); -} - void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { struct drm_i915_gem_object *obj = to_intel_bo(gem); @@ -4146,14 +4075,14 @@ int i915_gem_suspend(struct drm_device *dev) i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); - cancel_delayed_work_sync(&dev_priv->gt.retire_work); - flush_delayed_work(&dev_priv->gt.idle_work); + //cancel_delayed_work_sync(&dev_priv->gt.retire_work); + //flush_delayed_work(&dev_priv->gt.idle_work); flush_work(&dev_priv->mm.free_work); /* Assert that we sucessfully flushed all the work and * reset the GPU back to its idle, low power state. */ - WARN_ON(dev_priv->gt.awake); + //WARN_ON(dev_priv->gt.awake); return 0; @@ -4362,13 +4291,6 @@ i915_gem_cleanup_engines(struct drm_device *dev) dev_priv->gt.cleanup_engine(engine); } -static void -init_engine_lists(struct intel_engine_cs *engine) -{ - /* Early initialisation so that core GEM works during engine setup */ - INIT_LIST_HEAD(&engine->request_list); -} - void i915_gem_load_init_fences(struct drm_i915_private *dev_priv) { @@ -4405,7 +4327,6 @@ void i915_gem_load_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int i; dev_priv->objects = kmem_cache_create("i915_gem_object", @@ -4431,12 +4352,7 @@ i915_gem_load_init(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); - for (i = 0; i < I915_NUM_ENGINES; i++) - init_engine_lists(&dev_priv->engine[i]); - INIT_DELAYED_WORK(&dev_priv->gt.retire_work, - i915_gem_retire_work_handler); - INIT_DELAYED_WORK(&dev_priv->gt.idle_work, - i915_gem_idle_work_handler); + INIT_LIST_HEAD(&dev_priv->gt.timelines); init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b6fe45ad03e7..df80737d7cc8 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -61,6 +61,7 @@ struct i915_execbuffer { struct drm_i915_gem_exec_object2 *exec; struct intel_engine_cs *engine; struct i915_gem_context *ctx; + struct i915_timeline_engine *timeline; struct i915_address_space *vm; struct i915_vma *batch; struct drm_i915_gem_request *request; @@ -1587,7 +1588,7 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) struct i915_vma *vma; int ret; - shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, + shadow_batch_obj = i915_gem_batch_pool_get(&eb->timeline->batch_pool, PAGE_ALIGN(eb->args->batch_len)); if (IS_ERR(shadow_batch_obj)) return ERR_CAST(shadow_batch_obj); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index cbe43db1f405..2f64bd25bc97 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -26,6 +26,125 @@ #include "i915_drv.h" +static void i915_gem_request_retire(struct drm_i915_gem_request *request); + +static bool +i915_timeline_engine_retire(struct i915_timeline_engine *te) +{ + struct drm_i915_gem_request *request, *next; + + list_for_each_entry_safe(request, next, &te->requests, link) { + if (!i915_gem_request_completed(request)) + return false; + + i915_gem_request_retire(request); + } + return true; +} + +static bool i915_timeline_retire_requests(struct i915_timeline *tl) +{ + int i; + + if (READ_ONCE(tl->active_engines) == 0) + return false; + + for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { + if (i915_timeline_engine_retire(&tl->engine[i])) + tl->active_engines &= ~(1 << i); + } + + if (tl->active_engines == 0 && + atomic_dec_and_test(&tl->i915->gt.active)) { + if (INTEL_GEN(tl->i915) >= 6) + gen6_rps_idle(tl->i915); + intel_runtime_pm_put(tl->i915); + } + + return true; +} + +void i915_gem_retire_requests(struct drm_i915_private *dev_priv) +{ + struct i915_timeline *timeline; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + list_for_each_entry(timeline, &dev_priv->gt.timelines, link) + i915_timeline_retire_requests(timeline); +} + +static void +i915_timeline_retire_work(struct work_struct *work) +{ + struct i915_timeline *tl = + container_of(work, typeof(*tl), retire_work.work); + + /* Keep the retire handler running until we are finally idle. + * We do not need to do this test under locking as in the worst-case + * we queue the retire worker once too often. + */ + if (i915_timeline_retire_requests(tl)) + queue_delayed_work(tl->i915->wq, &tl->retire_work, + round_jiffies_up_relative(HZ)); +} + +static int i915_timeline_init_seqno(struct i915_timeline *tl, u32 seqno) +{ + struct intel_engine_cs *engine; + int ret; + + if (tl->id) + return 0; + + /* Carefully retire all requests without writing to the rings */ + for_each_engine(engine, tl->i915) { + ret = intel_engine_idle(engine); + if (ret) + return ret; + } + i915_timeline_retire_requests(tl); + + /* Finally reset hw state */ + for_each_engine(engine, tl->i915) + intel_engine_init_seqno(engine, seqno); + + return 0; +} + +static int +i915_timeline_get_seqno(struct i915_timeline *tl, u32 *seqno) +{ + /* reserve 0 for non-seqno */ + if (unlikely(tl->next_seqno == 0)) { + int ret = i915_timeline_init_seqno(tl, 0); + if (ret) + return ret; + + tl->next_seqno = 2; + } + + /* Each request uses a start / stop sequence */ + GEM_BUG_ON(tl->next_seqno & 1); + *seqno = tl->next_seqno; + tl->next_seqno += 2; + return 0; +} + +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *tl) +{ + int i; + + tl->i915 = i915; + list_add(&tl->link, &i915->gt.timelines); + tl->next_seqno = 2; + + for (i = 0; i < ARRAY_SIZE(tl->engine); i++) + INIT_LIST_HEAD(&tl->engine[i].requests); + + INIT_DELAYED_WORK(&tl->retire_work, i915_timeline_retire_work); +} + static const char *i915_fence_get_driver_name(struct fence *fence) { return "i915"; @@ -179,16 +298,14 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; + struct i915_timeline_engine *te = req->timeline; struct drm_i915_gem_request *tmp; lockdep_assert_held(&req->i915->drm.struct_mutex); GEM_BUG_ON(list_empty(&req->link)); do { - tmp = list_first_entry(&engine->request_list, - typeof(*tmp), link); - + tmp = list_first_entry(&te->requests, typeof(*tmp), link); i915_gem_request_retire(tmp); } while (tmp != req); } @@ -213,73 +330,6 @@ static int i915_gem_check_wedge(struct drm_i915_private *dev_priv) return 0; } -static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) -{ - struct intel_engine_cs *engine; - int ret; - - /* Carefully retire all requests without writing to the rings */ - for_each_engine(engine, dev_priv) { - ret = intel_engine_idle(engine); - if (ret) - return ret; - } - i915_gem_retire_requests(dev_priv); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { - while (intel_kick_waiters(dev_priv) || - intel_kick_signalers(dev_priv)) - yield(); - } - - /* Finally reset hw state */ - for_each_engine(engine, dev_priv) - intel_engine_init_seqno(engine, seqno); - - return 0; -} - -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - seqno = (seqno + 1) & ~1; - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - ret = i915_gem_init_seqno(dev_priv, seqno - 2); - if (ret) - return ret; - - dev_priv->next_seqno = seqno; - return 0; -} - -static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) -{ - /* reserve 0 for non-seqno */ - if (unlikely(dev_priv->next_seqno == 0)) { - int ret; - - ret = i915_gem_init_seqno(dev_priv, 0); - if (ret) - return ret; - - dev_priv->next_seqno = 2; - } - - /* Each request uses a start / stop sequence */ - GEM_BUG_ON(dev_priv->next_seqno & 1); - *seqno = dev_priv->next_seqno; - dev_priv->next_seqno += 2; - return 0; -} - static void __kfence_call submit_notify(struct kfence *fence) { struct drm_i915_gem_request *request = @@ -311,6 +361,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, { struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_gem_request *req, *prev; + struct i915_timeline *tl = &dev_priv->kernel_timeline; + struct i915_timeline_engine *te = &tl->engine[engine->id]; u32 seqno; int ret; @@ -323,9 +375,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, return ERR_PTR(ret); /* Move the oldest request to the slab-cache (if not in use!) */ - if (!list_empty(&engine->request_list)) { - req = list_first_entry(&engine->request_list, - typeof(*req), link); + if (!list_empty(&te->requests)) { + req = list_first_entry(&te->requests, typeof(*req), link); if (i915_gem_request_completed(req)) i915_gem_request_retire(req); } @@ -334,7 +385,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (!req) return ERR_PTR(-ENOMEM); - ret = i915_gem_get_seqno(dev_priv, &seqno); + ret = i915_timeline_get_seqno(tl, &seqno); if (ret) goto err; @@ -350,6 +401,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->i915 = dev_priv; req->file_priv = NULL; req->engine = engine; + req->timeline = te; req->signaling.wait.tsk = NULL; req->ctx = ctx; req->previous_context = NULL; @@ -398,25 +450,28 @@ err: return ERR_PTR(ret); } -static void i915_gem_mark_busy(const struct intel_engine_cs *engine) +static void i915_gem_mark_busy(struct drm_i915_gem_request *req) { - struct drm_i915_private *dev_priv = engine->i915; + struct i915_timeline *timeline = req->timeline->timeline; - dev_priv->gt.active_engines |= intel_engine_flag(engine); - if (dev_priv->gt.awake) + if (timeline->active_engines & req->timeline->mask) return; - intel_runtime_pm_get_noresume(dev_priv); - dev_priv->gt.awake = true; - - intel_enable_gt_powersave(dev_priv); - i915_update_gfx_val(dev_priv); - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_busy(dev_priv); + if (timeline->active_engines == 0) { + queue_delayed_work(req->i915->wq, + &timeline->retire_work, + round_jiffies_up_relative(HZ)); + + if (atomic_inc_return(&req->i915->gt.active) == 1) { + intel_runtime_pm_get_noresume(req->i915); + intel_enable_gt_powersave(req->i915); + i915_update_gfx_val(req->i915); + if (INTEL_GEN(req->i915) >= 6) + gen6_rps_busy(req->i915); + } + } - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); + timeline->active_engines |= req->timeline->mask; } static void i915_gem_request_cancel(struct drm_i915_gem_request *request) @@ -486,7 +541,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) */ request->emitted_jiffies = jiffies; i915_gem_active_set(&engine->last_request, request); - list_add_tail(&request->link, &engine->request_list); + list_add_tail(&request->link, &request->timeline->requests); /* Record the position of the start of the breadcrumb so that * should we detect the updated seqno part-way through the @@ -505,7 +560,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) "for adding the request (%d bytes)\n", reserved_tail, ret); - i915_gem_mark_busy(engine); + i915_gem_mark_busy(request); kfence_signal(&request->submit); kfence_put(&request->submit); @@ -731,7 +786,6 @@ complete: static int wait_for_space(struct intel_ring *ring, int bytes) { - struct intel_engine_cs *engine = ring->engine; struct drm_i915_gem_request *target; int ret; @@ -739,17 +793,9 @@ static int wait_for_space(struct intel_ring *ring, int bytes) if (ring->space >= bytes) return 0; - list_for_each_entry(target, &engine->request_list, link) { + list_for_each_entry(target, &ring->timeline->requests, link) { unsigned space; - /* - * The request queue is per-engine, so can contain requests - * from multiple ring. Here, we must ignore any that - * aren't from the ring we're considering. - */ - if (target->ring != ring) - continue; - /* Would completion of this request free enough space? */ space = __intel_ring_space(target->postfix, ring->tail, ring->size); @@ -757,7 +803,7 @@ static int wait_for_space(struct intel_ring *ring, int bytes) break; } - if (WARN_ON(&target->link == &engine->request_list)) + if (WARN_ON(&target->link == &ring->timeline->requests)) return -ENOSPC; ret = i915_wait_request(target, true, NULL, NO_WAITBOOST); @@ -862,39 +908,3 @@ int i915_gem_request_align(struct drm_i915_gem_request *req) memset(out, 0, bytes); return 0; } - -static bool i915_gem_retire_requests_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_request *request, *next; - - list_for_each_entry_safe(request, next, &engine->request_list, link) { - if (!i915_gem_request_completed(request)) - return false; - - i915_gem_request_retire(request); - } - - return true; -} - -void i915_gem_retire_requests(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (dev_priv->gt.active_engines == 0) - return; - - GEM_BUG_ON(!dev_priv->gt.awake); - - for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines) { - if (i915_gem_retire_requests_ring(engine)) - dev_priv->gt.active_engines &= ~intel_engine_flag(engine); - } - - if (dev_priv->gt.active_engines == 0) - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.idle_work, - msecs_to_jiffies(100)); -} diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index bbc54bdc06f4..cb2e649dde97 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -40,6 +40,9 @@ struct intel_signal_node { struct intel_wait wait; }; +struct i915_timeline; +struct i915_timeline_engine; + /** * Request queue structure. * @@ -75,6 +78,8 @@ struct drm_i915_gem_request { struct intel_ring *ring; struct intel_signal_node signaling; + struct i915_timeline_engine *timeline; + struct kfence submit; /** HWS sequence number location */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2fa56086ea6c..4dd728b7c1d0 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -982,61 +982,6 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, } } -static void engine_record_requests(struct intel_engine_cs *engine, - struct drm_i915_gem_request *first, - struct drm_i915_error_engine *ering) -{ - struct drm_i915_gem_request *request; - int count; - - count = 0; - request = first; - list_for_each_entry_from(request, &engine->request_list, link) - count++; - - ering->requests = NULL; - kcalloc(count, sizeof(*ering->requests), - GFP_ATOMIC); - if (ering->requests == NULL) - return; - ering->num_requests = count; - - count = 0; - request = first; - list_for_each_entry_from(request, &engine->request_list, link) { - struct drm_i915_error_request *erq; - - if (count >= ering->num_requests) { - /* - * If the ring request list was changed in - * between the point where the error request - * list was created and dimensioned and this - * point then just exit early to avoid crashes. - * - * We don't need to communicate that the - * request list changed state during error - * state capture and that the error state is - * slightly incorrect as a consequence since we - * are typically only interested in the request - * list state at the point of error state - * capture, not in any changes happening during - * the capture. - */ - break; - } - - erq = &ering->requests[count++]; - erq->seqno = request->fence.seqno; - erq->jiffies = request->emitted_jiffies; - erq->head = request->head; - erq->tail = request->tail; - - rcu_read_lock(); - erq->pid = request->ctx ? pid_nr(request->ctx->pid) : 0; - rcu_read_unlock(); - } -} - static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) { @@ -1058,7 +1003,8 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, i915_record_ring_state(dev_priv, error, engine, ering); engine_record_waiters(engine, ering); - request = i915_gem_find_active_request(engine); + //request = i915_gem_find_active_request(engine); + request = NULL; if (request) { struct intel_ring *ring; struct pid *pid; @@ -1105,8 +1051,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ering->cpu_ring_tail = ring->tail; ering->ringbuffer = i915_error_object_create(dev_priv, ring->vma); - - engine_record_requests(engine, request, ering); } ering->hws_page = diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e52c7ba5f965..1cada95d190f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -169,7 +169,6 @@ intel_engine_retire(struct i915_gem_active *active, static void intel_engine_init_requests(struct intel_engine_cs *engine) { init_request_active(&engine->last_request, intel_engine_retire); - INIT_LIST_HEAD(&engine->request_list); } /** @@ -190,7 +189,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) engine->fence_context = fence_context_alloc(1); intel_engine_init_requests(engine); - i915_gem_batch_pool_init(engine, &engine->batch_pool); + //i915_gem_batch_pool_init(engine, &engine->batch_pool); i915_cmd_parser_init_ring(engine); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3797fcf646a7..7c43a9175fa5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1561,7 +1561,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) i915_cmd_parser_fini_ring(engine); i915_gem_render_state_fini(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); + //i915_gem_batch_pool_fini(&engine->batch_pool); intel_engine_fini_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4a619e7a4cb0..630b116988f6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4909,8 +4909,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ - if (!(dev_priv->gt.awake && - dev_priv->rps.enabled && + if (!(dev_priv->rps.enabled && dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) return; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 36e061a46aa4..71c319fb1e88 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2124,7 +2124,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) i915_gem_render_state_fini(engine); i915_cmd_parser_fini_ring(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); intel_engine_fini_breadcrumbs(engine); intel_ring_context_unpin(dev_priv->kernel_context, engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 60cfafa12ad2..8287e9a4f67a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -68,6 +68,7 @@ struct intel_ring { void *vaddr; struct intel_engine_cs *engine; + struct i915_timeline_engine *timeline; struct list_head link; u32 head; @@ -171,13 +172,6 @@ struct intel_engine_cs { bool rpm_wakelock : 1; } breadcrumbs; - /* - * A pool of objects to use as shadow copies of client batch buffers - * when the command parser is enabled. Prevents the client from - * modifying the batch contents after software parsing. - */ - struct i915_gem_batch_pool batch_pool; - struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; @@ -282,12 +276,6 @@ struct intel_engine_cs { bool preempt_wa; u32 ctx_desc_template; - /** - * List of breadcrumbs associated with GPU requests currently - * outstanding. - */ - struct list_head request_list; - /* An RCU guarded pointer to the last request. No reference is * held to the request, users must carefully acquire a reference to * the request using i915_gem_active_get_request_rcu(), or hold the -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx