Re: [PATCH v4] drm/i915: Clean up associated VMAs on context destruction

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Oct 05, 2015 at 01:26:36PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
> 
> Prevent leaking VMAs and PPGTT VMs when objects are imported
> via flink.
> 
> Scenario is that any VMAs created by the importer will be left
> dangling after the importer exits, or destroys the PPGTT context
> with which they are associated.
> 
> This is caused by object destruction not running when the
> importer closes the buffer object handle due the reference held
> by the exporter. This also leaks the VM since the VMA has a
> reference on it.
> 
> In practice these leaks can be observed by stopping and starting
> the X server on a kernel with fbcon compiled in. Every time
> X server exits another VMA will be leaked against the fbcon's
> frame buffer object.
> 
> Also on systems where flink buffer sharing is used extensively,
> like Android, this leak has even more serious consequences.
> 
> This version is takes a general approach from the  earlier work
> by Rafael Barbalho (drm/i915: Clean-up PPGTT on context
> destruction) and tries to incorporate the subsequent discussion
> between Chris Wilson and Daniel Vetter.
> 
> v2:
> 
> Removed immediate cleanup on object retire - it was causing a
> recursive VMA unbind via i915_gem_object_wait_rendering. And
> it is in fact not even needed since by definition context
> cleanup worker runs only after the last context reference has
> been dropped, hence all VMAs against the VM belonging to the
> context are already on the inactive list.
> 
> v3:
> 
> Previous version could deadlock since VMA unbind waits on any
> rendering on an object to complete. Objects can be busy in a
> different VM which would mean that the cleanup loop would do
> the wait with the struct mutex held.
> 
> This is an even simpler approach where we just unbind VMAs
> without waiting since we know all VMAs belonging to this VM
> are idle, and there is nothing in flight, at the point
> context destructor runs.
> 
> v4:
> 
> Double underscore prefix for __915_vma_unbind_no_wait and a
> commit message typo fix. (Michel Thierry)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
> Testcase: igt/gem_ppgtt.c/flink-and-exit-vma-leak
> Reviewed-by: Michel Thierry <michel.thierry@xxxxxxxxx>
> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx>
> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Rafael Barbalho <rafael.barbalho@xxxxxxxxx>
> Cc: Michel Thierry <michel.thierry@xxxxxxxxx>

> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  5 +++++
>  drivers/gpu/drm/i915/i915_gem.c         | 20 ++++++++++++++++----
>  drivers/gpu/drm/i915/i915_gem_context.c | 24 ++++++++++++++++++++++++
>  3 files changed, 45 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 824e7245044d..58afa1bb2957 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2840,6 +2840,11 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>  		  u32 flags);
>  int __must_check i915_vma_unbind(struct i915_vma *vma);
> +/*
> + * BEWARE: Do not use the function below unless you can _absolutely_
> + * _guarantee_ VMA in question is _not in use_ anywhere.
> + */
> +int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma);
>  int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
>  void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
>  void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f0cfbb9ee12c..52642aff1dab 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3208,7 +3208,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
>  					    old_write_domain);
>  }
>  
> -int i915_vma_unbind(struct i915_vma *vma)
> +static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> @@ -3227,9 +3227,11 @@ int i915_vma_unbind(struct i915_vma *vma)
>  
>  	BUG_ON(obj->pages == NULL);
>  
> -	ret = i915_gem_object_wait_rendering(obj, false);
> -	if (ret)
> -		return ret;
> +	if (wait) {
> +		ret = i915_gem_object_wait_rendering(obj, false);
> +		if (ret)
> +			return ret;
> +	}
>  
>  	if (i915_is_ggtt(vma->vm) &&
>  	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
> @@ -3274,6 +3276,16 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	return 0;
>  }
>  
> +int i915_vma_unbind(struct i915_vma *vma)
> +{
> +	return __i915_vma_unbind(vma, true);
> +}
> +
> +int __i915_vma_unbind_no_wait(struct i915_vma *vma)
> +{
> +	return __i915_vma_unbind(vma, false);
> +}
> +
>  int i915_gpu_idle(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 74aa0c9929ba..680b4c9f6b73 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -133,6 +133,23 @@ static int get_context_size(struct drm_device *dev)
>  	return ret;
>  }
>  
> +static void i915_gem_context_clean(struct intel_context *ctx)
> +{
> +	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
> +	struct i915_vma *vma, *next;
> +
> +	if (WARN_ON_ONCE(!ppgtt))
> +		return;

[   80.242892] [drm:i915_gem_open] 
[   80.250485] ------------[ cut here ]------------
[   80.258938] WARNING: CPU: 1 PID: 277 at ../drivers/gpu/drm/i915/i915_gem_context.c:141 i915_gem_context_free+0xef/0x27b [i915]()
[   80.275344] WARN_ON_ONCE(!ppgtt)
[   80.278816] Modules linked in: i915 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm intel_gtt agpgart netconsole psmouse iTCO_wdt atkbd libps2 mmc_block coretemp hwmon intel_rapl punit_atom_debug efivars pcspkr r8169 lpc_ich mii i2c_i801 processor_thermal_device intel_soc_dts_iosf iosf_mbi i8042 serio snd_soc_rt5670 snd_soc_rl6231 snd_intel_sst_acpi snd_intel_sst_core snd_soc_sst_mfld_platform snd_soc_core i2c_hid snd_compress snd_pcm snd_timer snd i2c_designware_platform i2c_designware_core soundcore spi_pxa2xx_platform mousedev pwm_lpss_platform pwm_lpss sdhci_acpi sdhci mmc_core evdev int3403_thermal int3400_thermal acpi_thermal_rel int340x_thermal_zone hid_generic usbhid hid sch_fq_codel ip_tables x_tables ipv6 autofs4
[   80.375327] CPU: 0 PID: 277 Comm: Xorg Tainted: G     U          4.3.0-rc4-bsw+ #2490
[   80.387946] Hardware name: Intel Corporation CHERRYVIEW C0 PLATFORM/Braswell CRB, BIOS BRAS.X64.B085.R00.1509110553 09/11/2015
[   80.405845]  0000000000000000 ffff880176fa3bf0 ffffffff8128d59e ffff880176fa3c38
[   80.418365]  ffff880176fa3c28 ffffffff810680cb ffffffffa0374395 ffff88007a8cd500
[   80.431055]  0000000000000000 0000000000000000 ffff880176fa3cf8 ffff880176fa3c90
[   80.443838] Call Trace:
[   80.450725]  [<ffffffff8128d59e>] dump_stack+0x4e/0x79
[   80.460612]  [<ffffffff810680cb>] warn_slowpath_common+0x9f/0xb8
[   80.471481]  [<ffffffffa0374395>] ? i915_gem_context_free+0xef/0x27b [i915]
[   80.483498]  [<ffffffff8106812d>] warn_slowpath_fmt+0x49/0x52
[   80.494090]  [<ffffffffa0374521>] ? i915_gem_context_free+0x27b/0x27b [i915]
[   80.506165]  [<ffffffffa0374395>] i915_gem_context_free+0xef/0x27b [i915]
[   80.517937]  [<ffffffffa0374521>] ? i915_gem_context_free+0x27b/0x27b [i915]
[   80.530129]  [<ffffffffa037453b>] context_idr_cleanup+0x1a/0x1e [i915]
[   80.541520]  [<ffffffff8128e235>] idr_for_each+0xba/0xe1
[   80.551489]  [<ffffffff814e039c>] ? __mutex_unlock_slowpath+0x118/0x132
[   80.562925]  [<ffffffffa0374d87>] i915_gem_context_close+0x26/0x31 [i915]
[   80.574614]  [<ffffffffa03f50c4>] i915_driver_preclose+0x2d/0x52 [i915]
[   80.586032]  [<ffffffffa02ab657>] drm_release+0xca/0x49b [drm]
[   80.596558]  [<ffffffff8118577c>] __fput+0x100/0x1b3
[   80.606071]  [<ffffffff81185865>] ____fput+0xe/0x10
[   80.615435]  [<ffffffff81084540>] task_work_run+0x6a/0x93
[   80.625427]  [<ffffffff810015d6>] prepare_exit_to_usermode+0x9e/0xaf
[   80.636455]  [<ffffffff810017d6>] syscall_return_slowpath+0x1ef/0x264
[   80.647577]  [<ffffffff81084462>] ? task_work_add+0x44/0x53
[   80.657683]  [<ffffffff811858e3>] ? fput+0x7c/0x83
[   80.666935]  [<ffffffff810a6e52>] ? trace_hardirqs_on_caller+0x16/0x196
[   80.678213]  [<ffffffff81000c87>] ? trace_hardirqs_on_thunk+0x17/0x19
[   80.689273]  [<ffffffff814e2ab1>] int_ret_from_sys_call+0x25/0x9f

> +
> +	WARN_ON(!list_empty(&ppgtt->base.active_list));
> +
> +	list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
> +				 mm_list) {
> +		if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
> +			break;
> +	}
> +}
> +
>  void i915_gem_context_free(struct kref *ctx_ref)
>  {
>  	struct intel_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
> @@ -142,6 +159,13 @@ void i915_gem_context_free(struct kref *ctx_ref)
>  	if (i915.enable_execlists)
>  		intel_lr_context_free(ctx);
>  
> +	/*
> +	 * This context is going away and we need to remove all VMAs still
> +	 * around. This is to handle imported shared objects for which
> +	 * destructor did not run when their handles were closed.
> +	 */
> +	i915_gem_context_clean(ctx);
> +
>  	i915_ppgtt_put(ctx->ppgtt);
>  
>  	if (ctx->legacy_hw_ctx.rcs_state)
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux