Currently, the list of timelines is serialised by the struct_mutex, but to alleviate difficulties with using that mutex in future, move the list management under its own dedicated mutex. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 89 +++++++++---------- drivers/gpu/drm/i915/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/i915_timeline.c | 27 +++++- drivers/gpu/drm/i915/i915_timeline.h | 3 + drivers/gpu/drm/i915/i915_vma.c | 6 ++ .../gpu/drm/i915/selftests/mock_gem_device.c | 7 +- .../gpu/drm/i915/selftests/mock_timeline.c | 3 +- 8 files changed, 83 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5ef7499166b4..4a8c45949c4d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1958,6 +1958,7 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); + struct mutex timeline_lock; struct list_head timelines; struct list_head active_rings; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 37270ee360c8..09c7ded8f498 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3209,33 +3209,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static long wait_for_timeline(struct i915_timeline *tl, - unsigned int flags, long timeout) -{ - struct i915_request *rq; - - rq = i915_gem_active_get_unlocked(&tl->last_request); - if (!rq) - return timeout; - - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used a synchronous - * step prior to idling, e.g. in suspend for flushing all - * current operations to memory before sleeping. These we - * want to complete as quickly as possible to avoid prolonged - * stalls, so allow the gpu to boost to maximum clocks. - */ - if (flags & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq, NULL); - - timeout = i915_request_wait(rq, flags, timeout); - i915_request_put(rq); - - return timeout; -} - static int wait_for_engines(struct drm_i915_private *i915) { if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { @@ -3252,6 +3225,8 @@ static int wait_for_engines(struct drm_i915_private *i915) int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags, long timeout) { + struct i915_timeline *tl; + GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); @@ -3260,17 +3235,45 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, if (!READ_ONCE(i915->gt.awake)) return 0; + mutex_lock(&i915->gt.timeline_lock); + list_for_each_entry(tl, &i915->gt.timelines, link) { + struct i915_request *rq; + + rq = i915_gem_active_get_unlocked(&tl->last_request); + if (!rq) + continue; + + mutex_unlock(&i915->gt.timeline_lock); + + /* + * "Race-to-idle". + * + * Switching to the kernel context is often used a synchronous + * step prior to idling, e.g. in suspend for flushing all + * current operations to memory before sleeping. These we + * want to complete as quickly as possible to avoid prolonged + * stalls, so allow the gpu to boost to maximum clocks. + */ + if (flags & I915_WAIT_FOR_IDLE_BOOST) + gen6_rps_boost(rq, NULL); + + timeout = i915_request_wait(rq, flags, timeout); + i915_request_put(rq); + if (timeout < 0) + return timeout; + + mutex_lock(&i915->gt.timeline_lock); + + /* restart after dropping the lock */ + tl = list_entry(&i915->gt.timelines, typeof(*tl), link); + } + mutex_unlock(&i915->gt.timeline_lock); + if (flags & I915_WAIT_LOCKED) { - struct i915_timeline *tl; int err; lockdep_assert_held(&i915->drm.struct_mutex); - list_for_each_entry(tl, &i915->gt.timelines, link) { - timeout = wait_for_timeline(tl, flags, timeout); - if (timeout < 0) - return timeout; - } if (GEM_SHOW_DEBUG() && !timeout) { /* Presume that timeout was non-zero to begin with! */ dev_warn(&i915->drm.pdev->dev, @@ -3284,17 +3287,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); - } else { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - struct i915_timeline *tl = &engine->timeline; - - timeout = wait_for_timeline(tl, flags, timeout); - if (timeout < 0) - return timeout; - } } return 0; @@ -4995,6 +4987,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->gt.cleanup_engine = intel_engine_cleanup; } + i915_timelines_init(dev_priv); + ret = i915_gem_init_userptr(dev_priv); if (ret) return ret; @@ -5117,8 +5111,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_uc_misc: intel_uc_fini_misc(dev_priv); - if (ret != -EIO) + if (ret != -EIO) { i915_gem_cleanup_userptr(dev_priv); + i915_timelines_fini(dev_priv); + } if (ret == -EIO) { mutex_lock(&dev_priv->drm.struct_mutex); @@ -5169,6 +5165,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_misc(dev_priv); i915_gem_cleanup_userptr(dev_priv); + i915_timelines_fini(dev_priv); i915_gem_drain_freed_objects(dev_priv); @@ -5271,7 +5268,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; - INIT_LIST_HEAD(&dev_priv->gt.timelines); INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); @@ -5315,7 +5311,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); - WARN_ON(!list_empty(&dev_priv->gt.timelines)); kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 35de9e739104..2e881b8d9936 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -87,7 +87,7 @@ struct drm_i915_gem_object { struct { /** - * @vma.lock: protect the list/tre of vmas + * @vma.lock: protect the list/tree of vmas */ struct spinlock lock; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 4667cc08c416..0434b5e0d3e1 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -13,8 +13,6 @@ void i915_timeline_init(struct drm_i915_private *i915, struct i915_timeline *timeline, const char *name) { - lockdep_assert_held(&i915->drm.struct_mutex); - /* * Ideally we want a set of engines on a single leaf as we expect * to mostly be tracking synchronisation between engines. It is not @@ -23,9 +21,12 @@ void i915_timeline_init(struct drm_i915_private *i915, */ BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + timeline->i915 = i915; timeline->name = name; + mutex_lock(&i915->gt.timeline_lock); list_add(&timeline->link, &i915->gt.timelines); + mutex_unlock(&i915->gt.timeline_lock); /* Called during early_init before we know how many engines there are */ @@ -39,6 +40,15 @@ void i915_timeline_init(struct drm_i915_private *i915, i915_syncmap_init(&timeline->sync); } +void i915_timelines_init(struct drm_i915_private *i915) +{ + mutex_init(&i915->gt.timeline_lock); + INIT_LIST_HEAD(&i915->gt.timelines); + + /* via i915_gem_wait_for_idle() */ + i915_gem_shrinker_taints_mutex(i915, &i915->gt.timeline_lock); +} + /** * i915_timelines_park - called when the driver idles * @i915: the drm_i915_private device @@ -53,8 +63,7 @@ void i915_timelines_park(struct drm_i915_private *i915) { struct i915_timeline *timeline; - lockdep_assert_held(&i915->drm.struct_mutex); - + mutex_lock(&i915->gt.timeline_lock); list_for_each_entry(timeline, &i915->gt.timelines, link) { /* * All known fences are completed so we can scrap @@ -64,6 +73,7 @@ void i915_timelines_park(struct drm_i915_private *i915) */ i915_syncmap_free(&timeline->sync); } + mutex_unlock(&i915->gt.timeline_lock); } void i915_timeline_fini(struct i915_timeline *timeline) @@ -72,7 +82,9 @@ void i915_timeline_fini(struct i915_timeline *timeline) i915_syncmap_free(&timeline->sync); + mutex_lock(&timeline->i915->gt.timeline_lock); list_del(&timeline->link); + mutex_unlock(&timeline->i915->gt.timeline_lock); } struct i915_timeline * @@ -99,6 +111,13 @@ void __i915_timeline_free(struct kref *kref) kfree(timeline); } +void i915_timelines_fini(struct drm_i915_private *i915) +{ + GEM_BUG_ON(!list_empty(&i915->gt.timelines)); + + mutex_destroy(&i915->gt.timeline_lock); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_timeline.c" #include "selftests/i915_timeline.c" diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 38c1e15e927a..87ad2dd31c20 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -66,6 +66,7 @@ struct i915_timeline { struct list_head link; const char *name; + struct drm_i915_private *i915; struct kref kref; }; @@ -134,6 +135,8 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); } +void i915_timelines_init(struct drm_i915_private *i915); void i915_timelines_park(struct drm_i915_private *i915); +void i915_timelines_fini(struct drm_i915_private *i915); #endif diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 3a680fe2bb8b..d83b8ad5f859 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -201,6 +201,11 @@ vma_create(struct drm_i915_gem_object *obj, rb = *p; pos = rb_entry(rb, struct i915_vma, obj_node); + /* + * If the view already exists in the tree, another thread + * already created a matching vma, so return the older instance + * and dispose of ours. + */ cmp = i915_vma_compare(pos, vm, view); if (cmp == 0) { spin_unlock(&obj->vma.lock); @@ -294,6 +299,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, vma = vma_lookup(obj, vm, view); spin_unlock(&obj->vma.lock); + /* vma_create() will resolve the race if another creates the vma */ if (unlikely(!vma)) vma = vma_create(obj, vm, view); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index e756cbd0b1f4..66a2e47ad888 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_fini(i915); mutex_unlock(&i915->drm.struct_mutex); + i915_timelines_fini(i915); + drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(i915); mutex_unlock(&i915->drm.struct_mutex); - WARN_ON(!list_empty(&i915->gt.timelines)); destroy_workqueue(i915->wq); @@ -227,7 +228,8 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; - INIT_LIST_HEAD(&i915->gt.timelines); + i915_timelines_init(i915); + INIT_LIST_HEAD(&i915->gt.active_rings); INIT_LIST_HEAD(&i915->gt.closed_vma); @@ -254,6 +256,7 @@ struct drm_i915_private *mock_gem_device(void) i915_gem_contexts_fini(i915); err_unlock: mutex_unlock(&i915->drm.struct_mutex); + i915_timelines_fini(i915); kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index dcf3b16f5a07..cf39ccd9fc05 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -10,6 +10,7 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) { + timeline->i915 = NULL; timeline->fence_context = context; spin_lock_init(&timeline->lock); @@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) void mock_timeline_fini(struct i915_timeline *timeline) { - i915_timeline_fini(timeline); + i915_syncmap_free(&timeline->sync); } -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx