From: Ben Widawsky <benjamin.widawsky@xxxxxxxxx> The last patch made PPGTT free cases correct. It left a major problem though where in many cases it was possible to have to idle the GPU in order to destroy a VM. This is really unfortunate as it is stalling the active GPU process for the dying GPU process. The workqueue grew very tricky. I left in my original wait based version as #if 0, and used Chris' recommendation with the seqno check. I haven't measure one vs the other, but I am in favor of the code as it is. I am just leaving the old code for people to observe. NOTE: I somewhat expect this patch to not get merged because of work in the pipeline. I won't argue much against that, and I'll simply say, this solves a real problem, or platforms running with PPGTT. PPGTT is not enabled by default yet, and I therefore see little harm in merging this. Because I was expecting this to not get merged, I did not spent much time investigating any corner cases, in particular, cases where I need to cleanup the wq. If this patch does head in the merge direction, I will take a closer look at that. Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 10 +++ drivers/gpu/drm/i915/i915_gem.c | 110 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 40 ++++++++---- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + 5 files changed, 150 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a5ca462..0098ff9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1118,6 +1118,15 @@ struct i915_gem_mm { struct delayed_work idle_work; /** + * PPGTT freeing often happens during interruptible times (fd close, + * execbuf, busy_ioctl). It therefore becomes difficult to clean up the + * PPGTT when the refcount reaches 0 if a signal comes in. This + * workqueue defers the cleanup of the VM to a later time, and allows + * userspace to continue on. + */ + struct delayed_work ppgtt_work; + + /** * Are we in a non-interruptible section of code like * modesetting? */ @@ -1502,6 +1511,7 @@ struct drm_i915_private { struct mutex modeset_restore_lock; struct list_head vm_list; /* Global list of all address spaces */ + struct list_head ppgtt_free_list; struct i915_gtt gtt; /* VM representing the global address space */ struct i915_gem_mm mm; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ef047bc..2df4d86 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2707,6 +2707,112 @@ i915_gem_idle_work_handler(struct work_struct *work) intel_mark_idle(dev_priv->dev); } +static void +i915_gem_ppgtt_work_handler(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), mm.ppgtt_work.work); + struct i915_address_space *vm, *v; +#if 0 + unsigned reset_counter; + int ret; +#endif + + if (!mutex_trylock(&dev_priv->dev->struct_mutex)) { + queue_delayed_work(dev_priv->wq, &dev_priv->mm.ppgtt_work, + round_jiffies_up_relative(HZ)); + return; + } + + list_for_each_entry_safe(vm, v, &dev_priv->ppgtt_free_list, global_link) { + struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); + struct i915_vma *vma = NULL, *vma_active = NULL, *vma_inactive = NULL; + + /* The following attempts to find the newest (most recently + * activated/inactivated) VMA. + */ + if (!list_empty(&ppgtt->base.active_list)) { + vma_active = list_last_entry(&ppgtt->base.active_list, + typeof(*vma), mm_list); + } + if (!list_empty(&ppgtt->base.inactive_list)) { + vma_inactive = list_last_entry(&ppgtt->base.inactive_list, + typeof(*vma), mm_list); + } + + if (vma_active) + vma = vma_active; + else if (vma_inactive) + vma = vma_inactive; + + /* Sanity check */ + if (vma_active && vma_inactive) { + if (WARN_ON(vma_active->retire_seqno <= vma_inactive->retire_seqno)) + vma = vma_inactive; + } + + if (!vma) + goto finish; + + /* Another sanity check */ + if (WARN_ON(!vma->retire_seqno)) + continue; + + /* NOTE: We could wait here for the seqno, but that makes things + * significantly more complex. */ + if (!i915_seqno_passed(vma->retire_ring->get_seqno(vma->retire_ring, true), vma->retire_seqno)) + break; + +#if 0 + reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); + mutex_unlock(&dev_priv->dev->struct_mutex); + ret = __wait_seqno(vma->retire_ring, vma->retire_seqno, + reset_counter, true, NULL, NULL); + + if (i915_mutex_lock_interruptible(dev_priv->dev)) { + queue_delayed_work(dev_priv->wq, &dev_priv->mm.ppgtt_work, + round_jiffies_up_relative(HZ)); + return; + } + + if (ret) + continue; + +#endif +finish: + /* If the object was destroyed while our VMA was dangling, the + * object free code did the synchronous cleanup for us. + * Therefore every node on this list should have a living object + * (and VMA), but let's try not to use it in case future code + * allows a VMA to outlive an object. + */ + while (!list_empty(&ppgtt->base.mm.head_node.node_list)) { + struct drm_mm_node *node; + struct i915_vma *vma; + struct drm_i915_gem_object *obj; + + node = list_first_entry(&ppgtt->base.mm.head_node.node_list, + struct drm_mm_node, + node_list); + vma = container_of(node, struct i915_vma, node); + obj = vma->obj; + + drm_mm_remove_node(node); + i915_gem_vma_destroy(vma); + i915_gem_object_unpin_pages(obj); + } + + ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); + } + + if (!list_empty(&dev_priv->ppgtt_free_list)) + queue_delayed_work(dev_priv->wq, &dev_priv->mm.ppgtt_work, + round_jiffies_up_relative(HZ)); + + mutex_unlock(&dev_priv->dev->struct_mutex); +} + /** * Ensures that an object will eventually get non-busy by flushing any required * write domains, emitting any outstanding lazy request and retiring and @@ -4553,6 +4659,7 @@ i915_gem_suspend(struct drm_device *dev) mutex_unlock(&dev->struct_mutex); del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); + cancel_delayed_work_sync(&dev_priv->mm.ppgtt_work); cancel_delayed_work_sync(&dev_priv->mm.retire_work); cancel_delayed_work_sync(&dev_priv->mm.idle_work); @@ -4894,6 +5001,7 @@ i915_gem_load(struct drm_device *dev) NULL); INIT_LIST_HEAD(&dev_priv->vm_list); + INIT_LIST_HEAD(&dev_priv->ppgtt_free_list); i915_init_vm(dev_priv, &dev_priv->gtt.base); INIT_LIST_HEAD(&dev_priv->context_list); @@ -4908,6 +5016,8 @@ i915_gem_load(struct drm_device *dev) i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->mm.idle_work, i915_gem_idle_work_handler); + INIT_DELAYED_WORK(&dev_priv->mm.ppgtt_work, + i915_gem_ppgtt_work_handler); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 84c65aa..e221d5b 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -96,7 +96,7 @@ #define GEN6_CONTEXT_ALIGN (64<<10) #define GEN7_CONTEXT_ALIGN 4096 -static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) +static int do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) { struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -131,7 +131,7 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) if (ppgtt == dev_priv->mm.aliasing_ppgtt || (list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) { ppgtt->base.cleanup(&ppgtt->base); - return; + return 0; } /* @@ -153,17 +153,30 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) /* We have a problem here where VM teardown cannot be interrupted, or * else the ppgtt cleanup will fail. As an example, a precisely timed - * SIGKILL could leads to an OOPS, or worse. There are two options: - * 1. Make the eviction uninterruptible - * 2. Defer the eviction if it was interrupted. - * - * Option #1 is not the friendliest, but it's the easiest to implement, - * and least error prone. - * TODO: Implement option 2 + * SIGKILL could leads to an OOPS, or worse. The real solution is to + * properly track the VMA <-> OBJ relationship. This temporary bandaid + * will simply defer the free until we know the seqno has passed for + * this VMA. */ ret = i915_gem_evict_vm(&ppgtt->base, do_idle, !do_idle); - if (ret == -ERESTARTSYS) - ret = i915_gem_evict_vm(&ppgtt->base, do_idle, false); + if (ret == -ERESTARTSYS) { + struct drm_mm_node *entry; + /* First mark all VMAs */ + drm_mm_for_each_node(entry, &ppgtt->base.mm) { + struct i915_vma *vma = container_of(entry, struct i915_vma, node); + vma->retire_seqno = vma->obj->last_read_seqno; + vma->retire_ring = vma->obj->ring; + /* It's okay to lose the object, we just can't lose the + * VMA */ + } + + list_move_tail(&ppgtt->base.global_link, &dev_priv->ppgtt_free_list); + queue_delayed_work(dev_priv->wq, + &dev_priv->mm.ppgtt_work, + round_jiffies_up_relative(HZ)); + return -EAGAIN; + } + WARN_ON(ret); WARN_ON(!list_empty(&vm->active_list)); @@ -173,6 +186,7 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) } ppgtt->base.cleanup(&ppgtt->base); + return 0; } static void ppgtt_release(struct kref *kref) @@ -180,8 +194,8 @@ static void ppgtt_release(struct kref *kref) struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); - do_ppgtt_cleanup(ppgtt); - kfree(ppgtt); + if (!do_ppgtt_cleanup(ppgtt)) + kfree(ppgtt); } static size_t get_context_alignment(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0b2b982..ebf81cd 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1030,7 +1030,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) container_of(vm, struct i915_hw_ppgtt, base); list_del(&vm->global_link); - drm_mm_takedown(&ppgtt->base.mm); + drm_mm_takedown(&vm->mm); drm_mm_remove_node(&ppgtt->node); gen6_ppgtt_unmap_pages(ppgtt); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 1d75801..d0dc567 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -137,6 +137,8 @@ struct i915_vma { struct hlist_node exec_node; unsigned long exec_handle; struct drm_i915_gem_exec_object2 *exec_entry; + uint32_t retire_seqno; /* Last active seqno at context desruction */ + struct intel_engine_cs *retire_ring; /* Last ring for retire_seqno */ /** * How many users have pinned this object in GTT space. The following -- 1.9.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx