On Tue, Feb 20, 2018 at 10:50:11AM +0000, Chris Wilson wrote: > Currently we make the unilateral decision inside > i915_gem_object_pin_to_display() where the VMA should resided (inside > the fence and mappable region or above?). This is not our decision to > make as it impacts on how the display engine can use the resulting > scanout object, and it would rather instruct us where to place the VMA so > that it can enable the features it wants. As such, make the pin flags an > argument to i915_gem_object_pin_to_display() and control them from > intel_pin_and_fence_fb_obj() > > Whilst taking control of the mapping for ourselves, start tracking how > we use it to avoid trying to free a fence we never claimed: > > <3>[ 227.151869] GEM_BUG_ON(vma->fence->pin_count <= 0) > <4>[ 227.152064] ------------[ cut here ]------------ > <2>[ 227.152068] kernel BUG at drivers/gpu/drm/i915/i915_vma.h:391! > <4>[ 227.152084] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI > <0>[ 227.152092] Dumping ftrace buffer: > <0>[ 227.152099] (ftrace buffer empty) > <4>[ 227.152102] Modules linked in: i915 snd_hda_codec_analog snd_hda_codec_generic coretemp snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm lpc_ich e1000e mei_me mei prime_numbers > <4>[ 227.152131] CPU: 1 PID: 1587 Comm: kworker/u16:49 Tainted: G U 4.16.0-rc1-gbab67b2f6177-kasan_7+ #1 > <4>[ 227.152134] Hardware name: Dell Inc. OptiPlex 755 /0PU052, BIOS A08 02/19/2008 > <4>[ 227.152236] Workqueue: events_unbound intel_atomic_commit_work [i915] > <4>[ 227.152292] RIP: 0010:intel_unpin_fb_vma+0x23a/0x2a0 [i915] > <4>[ 227.152295] RSP: 0018:ffff88005aad7b68 EFLAGS: 00010286 > <4>[ 227.152300] RAX: 0000000000000026 RBX: ffff88005c359580 RCX: 0000000000000000 > <4>[ 227.152304] RDX: 0000000000000026 RSI: ffffffff8707d840 RDI: ffffed000b55af63 > <4>[ 227.152307] RBP: ffff880056817e58 R08: 0000000000000001 R09: 0000000000000000 > <4>[ 227.152311] R10: ffff88005aad7b88 R11: 0000000000000000 R12: ffff8800568184d0 > <4>[ 227.152314] R13: ffff880065b5ab08 R14: 0000000000000000 R15: dffffc0000000000 > <4>[ 227.152318] FS: 0000000000000000(0000) GS:ffff88006ac40000(0000) knlGS:0000000000000000 > <4>[ 227.152322] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > <4>[ 227.152325] CR2: 00007f5fb25550a8 CR3: 0000000068c78000 CR4: 00000000000006e0 > <4>[ 227.152328] Call Trace: > <4>[ 227.152385] intel_cleanup_plane_fb+0x6b/0xd0 [i915] > <4>[ 227.152395] drm_atomic_helper_cleanup_planes+0x166/0x280 > <4>[ 227.152452] intel_atomic_commit_tail+0x159d/0x3380 [i915] > <4>[ 227.152463] ? process_one_work+0x66e/0x1460 > <4>[ 227.152516] ? skl_update_crtcs+0x9c0/0x9c0 [i915] > <4>[ 227.152523] ? lock_acquire+0x13d/0x390 > <4>[ 227.152527] ? lock_acquire+0x13d/0x390 > <4>[ 227.152534] process_one_work+0x71a/0x1460 > <4>[ 227.152540] ? __schedule+0x815/0x1e20 > <4>[ 227.152547] ? pwq_dec_nr_in_flight+0x2b0/0x2b0 > <4>[ 227.152553] ? _raw_spin_lock_irq+0xa/0x40 > <4>[ 227.152559] worker_thread+0xdf/0xf60 > <4>[ 227.152569] ? process_one_work+0x1460/0x1460 > <4>[ 227.152573] kthread+0x2cf/0x3c0 > <4>[ 227.152578] ? _kthread_create_on_node+0xa0/0xa0 > <4>[ 227.152583] ret_from_fork+0x3a/0x50 > <4>[ 227.152591] Code: c6 00 11 86 c0 48 c7 c7 e0 bd 85 c0 e8 60 e7 a9 c4 0f ff e9 1f fe ff ff 48 c7 c6 40 10 86 c0 48 c7 c7 e0 ca 85 c0 e8 2b 95 bd c4 <0f> 0b 48 89 ef e8 4c 44 e8 c4 e9 ef fd ff ff e8 42 44 e8 c4 e9 > <1>[ 227.152720] RIP: intel_unpin_fb_vma+0x23a/0x2a0 [i915] RSP: ffff88005aad7b68 > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx > --- > drivers/gpu/drm/i915/i915_drv.h | 3 ++- > drivers/gpu/drm/i915/i915_gem.c | 26 ++++++------------ > drivers/gpu/drm/i915/intel_atomic_plane.c | 1 + > drivers/gpu/drm/i915/intel_display.c | 45 ++++++++++++++++++++++++------- > drivers/gpu/drm/i915/intel_drv.h | 9 +++++-- > drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++--- > drivers/gpu/drm/i915/intel_overlay.c | 3 ++- > 7 files changed, 62 insertions(+), 35 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 76bfe909168c..20575b3ee406 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3413,7 +3413,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); > struct i915_vma * __must_check > i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > u32 alignment, > - const struct i915_ggtt_view *view); > + const struct i915_ggtt_view *view, > + unsigned int flags); > void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); > int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, > int align); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 1854a69bc354..bdc2069d5433 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -4078,7 +4078,8 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, > struct i915_vma * > i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > u32 alignment, > - const struct i915_ggtt_view *view) > + const struct i915_ggtt_view *view, > + unsigned int flags) > { > struct i915_vma *vma; > int ret; > @@ -4115,25 +4116,14 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > * try to preserve the existing ABI). > */ > vma = ERR_PTR(-ENOSPC); > - if (!view || view->type == I915_GGTT_VIEW_NORMAL) > + if ((flags & PIN_MAPPABLE) == 0 && > + (!view || view->type == I915_GGTT_VIEW_NORMAL)) > vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, > - PIN_MAPPABLE | PIN_NONBLOCK); > - if (IS_ERR(vma)) { > - struct drm_i915_private *i915 = to_i915(obj->base.dev); > - unsigned int flags; > - > - /* Valleyview is definitely limited to scanning out the first > - * 512MiB. Lets presume this behaviour was inherited from the > - * g4x display engine and that all earlier gen are similarly > - * limited. Testing suggests that it is a little more > - * complicated than this. For example, Cherryview appears quite > - * happy to scanout from anywhere within its global aperture. > - */ > - flags = 0; > - if (HAS_GMCH_DISPLAY(i915)) > - flags = PIN_MAPPABLE; > + flags | > + PIN_MAPPABLE | > + PIN_NONBLOCK); > + if (IS_ERR(vma)) > vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); > - } > if (IS_ERR(vma)) > goto err_unpin_global; > > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c > index 0ee32275994a..7481ce85746b 100644 > --- a/drivers/gpu/drm/i915/intel_atomic_plane.c > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c > @@ -85,6 +85,7 @@ intel_plane_duplicate_state(struct drm_plane *plane) > __drm_atomic_helper_plane_duplicate_state(plane, state); > > intel_state->vma = NULL; > + intel_state->flags = 0; > > return state; > } > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 280c04326f64..8075e1990157 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -2068,7 +2068,9 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, > } > > struct i915_vma * > -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) > +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, > + unsigned int rotation, > + unsigned long *out_flags) > { > struct drm_device *dev = fb->dev; > struct drm_i915_private *dev_priv = to_i915(dev); > @@ -2076,6 +2078,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) > struct i915_ggtt_view view; > struct i915_vma *vma; > u32 alignment; > + unsigned int pinctl; > > WARN_ON(!mutex_is_locked(&dev->struct_mutex)); > > @@ -2102,7 +2105,20 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) > > atomic_inc(&dev_priv->gpu_error.pending_fb_pin); > > - vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view); > + pinctl = 0; > + > + /* Valleyview is definitely limited to scanning out the first > + * 512MiB. Lets presume this behaviour was inherited from the > + * g4x display engine and that all earlier gen are similarly > + * limited. Testing suggests that it is a little more > + * complicated than this. For example, Cherryview appears quite > + * happy to scanout from anywhere within its global aperture. > + */ > + if (HAS_GMCH_DISPLAY(dev_priv)) > + pinctl |= PIN_MAPPABLE; > + > + vma = i915_gem_object_pin_to_display_plane(obj, > + alignment, &view, pinctl); > if (IS_ERR(vma)) > goto err; > > @@ -2123,7 +2139,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) > * something and try to run the system in a "less than optimal" > * mode that matches the user configuration. > */ > - i915_vma_pin_fence(vma); > + if (i915_vma_pin_fence(vma) == 0) > + *out_flags |= PLANE_HAS_FENCE; > } > > i915_vma_get(vma); > @@ -2134,11 +2151,12 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) > return vma; > } > > -void intel_unpin_fb_vma(struct i915_vma *vma) > +void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) > { > lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); > > - i915_vma_unpin_fence(vma); > + if (flags & PLANE_HAS_FENCE) > + i915_vma_unpin_fence(vma); > i915_gem_object_unpin_from_display_plane(vma); > i915_vma_put(vma); > } > @@ -2808,7 +2826,9 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, > valid_fb: > mutex_lock(&dev->struct_mutex); > intel_state->vma = > - intel_pin_and_fence_fb_obj(fb, primary->state->rotation); > + intel_pin_and_fence_fb_obj(fb, > + primary->state->rotation, > + &intel_state->flags); > mutex_unlock(&dev->struct_mutex); > if (IS_ERR(intel_state->vma)) { > DRM_ERROR("failed to pin boot fb on pipe %d: %li\n", > @@ -12713,7 +12733,9 @@ intel_prepare_plane_fb(struct drm_plane *plane, > } else { > struct i915_vma *vma; > > - vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); > + vma = intel_pin_and_fence_fb_obj(fb, > + new_state->rotation, > + &to_intel_plane_state(new_state)->flags); > if (!IS_ERR(vma)) > to_intel_plane_state(new_state)->vma = vma; > else > @@ -12768,7 +12790,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, > vma = fetch_and_zero(&to_intel_plane_state(old_state)->vma); > if (vma) { > mutex_lock(&plane->dev->struct_mutex); > - intel_unpin_fb_vma(vma); > + intel_unpin_fb_vma(vma, to_intel_plane_state(old_state)->flags); > mutex_unlock(&plane->dev->struct_mutex); > } > } > @@ -13129,7 +13151,9 @@ intel_legacy_cursor_update(struct drm_plane *plane, > goto out_unlock; > } > } else { > - vma = intel_pin_and_fence_fb_obj(fb, new_plane_state->rotation); > + vma = intel_pin_and_fence_fb_obj(fb, > + new_plane_state->rotation, > + &to_intel_plane_state(new_plane_state)->flags); > if (IS_ERR(vma)) { > DRM_DEBUG_KMS("failed to pin object\n"); > > @@ -13160,7 +13184,8 @@ intel_legacy_cursor_update(struct drm_plane *plane, > > old_vma = fetch_and_zero(&to_intel_plane_state(old_plane_state)->vma); > if (old_vma) > - intel_unpin_fb_vma(old_vma); > + intel_unpin_fb_vma(old_vma, > + to_intel_plane_state(old_plane_state)->flags); > > out_unlock: > mutex_unlock(&dev_priv->drm.struct_mutex); > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index 898064e8bea7..50874f4035cf 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -204,6 +204,7 @@ struct intel_fbdev { > struct drm_fb_helper helper; > struct intel_framebuffer *fb; > struct i915_vma *vma; > + unsigned long vma_flags; > async_cookie_t cookie; > int preferred_bpp; > }; > @@ -490,6 +491,8 @@ struct intel_atomic_state { > struct intel_plane_state { > struct drm_plane_state base; > struct i915_vma *vma; > + unsigned long flags; long seems potentially wasteful when we have just the one flag. Although maybe the next thing gets aligned to 64bits anyway. Anyways, patch looks reasonable. Reviewed-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> fbc should probably start looking at the new flag instead of using the racy vma->fence checks it has now. > +#define PLANE_HAS_FENCE BIT(0) > > struct { > u32 offset; > @@ -1503,8 +1506,10 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, > struct intel_load_detect_pipe *old, > struct drm_modeset_acquire_ctx *ctx); > struct i915_vma * > -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); > -void intel_unpin_fb_vma(struct i915_vma *vma); > +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, > + unsigned int rotation, > + unsigned long *out_flags); > +void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags); > struct drm_framebuffer * > intel_framebuffer_create(struct drm_i915_gem_object *obj, > struct drm_mode_fb_cmd2 *mode_cmd); > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c > index da48af11eb6b..3d8ce3a62743 100644 > --- a/drivers/gpu/drm/i915/intel_fbdev.c > +++ b/drivers/gpu/drm/i915/intel_fbdev.c > @@ -177,6 +177,7 @@ static int intelfb_create(struct drm_fb_helper *helper, > struct fb_info *info; > struct drm_framebuffer *fb; > struct i915_vma *vma; > + unsigned long flags = 0; > bool prealloc = false; > void __iomem *vaddr; > int ret; > @@ -211,7 +212,9 @@ static int intelfb_create(struct drm_fb_helper *helper, > * This also validates that any existing fb inherited from the > * BIOS is suitable for own access. > */ > - vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_MODE_ROTATE_0); > + vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, > + DRM_MODE_ROTATE_0, > + &flags); > if (IS_ERR(vma)) { > ret = PTR_ERR(vma); > goto out_unlock; > @@ -268,6 +271,7 @@ static int intelfb_create(struct drm_fb_helper *helper, > DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n", > fb->width, fb->height, i915_ggtt_offset(vma)); > ifbdev->vma = vma; > + ifbdev->vma_flags = flags; > > intel_runtime_pm_put(dev_priv); > mutex_unlock(&dev->struct_mutex); > @@ -275,7 +279,7 @@ static int intelfb_create(struct drm_fb_helper *helper, > return 0; > > out_unpin: > - intel_unpin_fb_vma(vma); > + intel_unpin_fb_vma(vma, flags); > out_unlock: > intel_runtime_pm_put(dev_priv); > mutex_unlock(&dev->struct_mutex); > @@ -513,7 +517,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) > > if (ifbdev->vma) { > mutex_lock(&ifbdev->helper.dev->struct_mutex); > - intel_unpin_fb_vma(ifbdev->vma); > + intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags); > mutex_unlock(&ifbdev->helper.dev->struct_mutex); > } > > diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c > index 41e9465d44a8..89f568e739ee 100644 > --- a/drivers/gpu/drm/i915/intel_overlay.c > +++ b/drivers/gpu/drm/i915/intel_overlay.c > @@ -801,7 +801,8 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, > > atomic_inc(&dev_priv->gpu_error.pending_fb_pin); > > - vma = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL); > + vma = i915_gem_object_pin_to_display_plane(new_bo, > + 0, NULL, PIN_MAPPABLE); > if (IS_ERR(vma)) { > ret = PTR_ERR(vma); > goto out_pin_section; > -- > 2.16.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx