Re: [PATCH] drm/i915: get runtime PM reference around GEM set_caching IOCTL

Imre Deak <imre.deak@xxxxxxxxx> · Fri, 06 Nov 2015 01:24:39 +0200

On Fri, 2015-11-06 at 00:57 +0200, Imre Deak wrote:
> On Thu, 2015-11-05 at 11:56 +0000, Chris Wilson wrote:
> > On Thu, Nov 05, 2015 at 01:28:32PM +0200, Imre Deak wrote:
> > > On ke, 2015-11-04 at 20:57 +0000, Chris Wilson wrote:
> > > > On Wed, Nov 04, 2015 at 09:25:32PM +0200, Imre Deak wrote:
> > > > > After Damien's D3 fix I started to get runtime suspend residency for the
> > > > > first time and that revealed a breakage on the set_caching IOCTL path
> > > > > that accesses the HW but doesn't take an RPM ref. Fix this up.
> > > > 
> > > > Why here (and in so many random places) and not around the PTE write
> > > > itself?
> > > 
> > > Imo we should take the RPM ref outside of any of our locks. Otherwise we
> > > need hacks like we have currently in the runtime suspend handler to work
> > > around lock inversions. It works now, but we couldn't do the same trick
> > > if we needed to take struct_mutex for example in the resume handler too
> > > for some reason.
> > 
> > On the other hand, it leads to hard to track down bugs and improper
> > documentation. Neither solution is perfect.
> 
> I think I understand the documentation part, not sure how that could be
> solved. Perhaps RPM-held asserts right before the point where the HW
> needs to be on?
> 
> What do you mean by hard to track down bugs? Couldn't that also be
> tackled by asserts?
> 
> > Note since intel_runtime_suspend has ample barriers of its own, you can
> > avoid the struct_mutex if you have a dedicated dev_priv->mm.fault_list.
> > 
> > Something along the lines of:
> 
> Ok, looks interesting. But as you said we would have to then make
> callers of i915_gem_release_mmap() wake up the device, which isn't
> strictly necessary. (and imo as such goes somewhat against the
> documentation argument)

Actually, we could also use pm_runtime_get_noresume(). But I find this
would just complicate things without a real benefit.

> --Imre
> 
> 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 86734be..fe91ce5 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -180,11 +180,11 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> >         }
> >         if (obj->stolen)
> >                 seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
> > -       if (obj->pin_display || obj->fault_mappable) {
> > +       if (obj->pin_display || !list_empty(&obj->fault_link)) {
> >                 char s[3], *t = s;
> >                 if (obj->pin_display)
> >                         *t++ = 'p';
> > -               if (obj->fault_mappable)
> > +               if (!list_empty(&obj->fault_link))
> >                         *t++ = 'f';
> >                 *t = '\0';
> >                 seq_printf(m, " (%s mappable)", s);
> > @@ -474,7 +474,7 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
> >  
> >         size = count = mappable_size = mappable_count = 0;
> >         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> > -               if (obj->fault_mappable) {
> > +               if (!list_empty(&obj->fault_link)) {
> >                         size += i915_gem_obj_ggtt_size(obj);
> >                         ++count;
> >                 }
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 1d88745..179594e 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1447,28 +1447,10 @@ static int intel_runtime_suspend(struct device *device)
> >         DRM_DEBUG_KMS("Suspending device\n");
> >  
> >         /*
> > -        * We could deadlock here in case another thread holding struct_mutex
> > -        * calls RPM suspend concurrently, since the RPM suspend will wait
> > -        * first for this RPM suspend to finish. In this case the concurrent
> > -        * RPM resume will be followed by its RPM suspend counterpart. Still
> > -        * for consistency return -EAGAIN, which will reschedule this suspend.
> > -        */
> > -       if (!mutex_trylock(&dev->struct_mutex)) {
> > -               DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
> > -               /*
> > -                * Bump the expiration timestamp, otherwise the suspend won't
> > -                * be rescheduled.
> > -                */
> > -               pm_runtime_mark_last_busy(device);
> > -
> > -               return -EAGAIN;
> > -       }
> > -       /*
> >          * We are safe here against re-faults, since the fault handler takes
> >          * an RPM reference.
> >          */
> >         i915_gem_release_all_mmaps(dev_priv);
> > -       mutex_unlock(&dev->struct_mutex);
> >  
> >         intel_suspend_gt_powersave(dev);
> >         intel_runtime_pm_disable_interrupts(dev_priv);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 55611d8..5e4904a 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1268,6 +1268,8 @@ struct i915_gem_mm {
> >          */
> >         struct list_head unbound_list;
> >  
> > +       struct list_head fault_list;
> > +
> >         /** Usable portion of the GTT for GEM */
> >         unsigned long stolen_base; /* limited to low memory (32-bit) */
> >  
> > @@ -2025,6 +2027,8 @@ struct drm_i915_gem_object {
> >  
> >         struct list_head batch_pool_link;
> >  
> > +       struct list_head fault_link;
> > +
> >         /**
> >          * This is set if the object is on the active lists (has pending
> >          * rendering and so a non-zero seqno), and is not set if it i s on
> > @@ -2069,13 +2073,6 @@ struct drm_i915_gem_object {
> >          */
> >         unsigned int map_and_fenceable:1;
> >  
> > -       /**
> > -        * Whether the current gtt mapping needs to be mappable (and isn't just
> > -        * mappable by accident). Track pin and fault separate for a more
> > -        * accurate mappable working set.
> > -        */
> > -       unsigned int fault_mappable:1;
> > -
> >         /*
> >          * Is the object to be mapped as read-only to the GPU
> >          * Only honoured if hardware has relevant pte bit
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 407b6b3..a90d1d8 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1814,9 +1814,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> >                                 break;
> >                 }
> >  
> > -               obj->fault_mappable = true;
> > +               if (list_empty(&obj->fault_link))
> > +                       list_add(&obj->fault_link, &dev_priv->mm.fault_list);
> >         } else {
> > -               if (!obj->fault_mappable) {
> > +               if (list_empty(&obj->fault_link)) {
> >                         unsigned long size = min_t(unsigned long,
> >                                                    vma->vm_end - vma->vm_start,
> >                                                    obj->base.size);
> > @@ -1830,7 +1831,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> >                                         break;
> >                         }
> >  
> > -                       obj->fault_mappable = true;
> > +                       list_add(&obj->fault_link, &dev_priv->mm.fault_list);
> >                 } else
> >                         ret = vm_insert_pfn(vma,
> >                                             (unsigned long)vmf->virtual_address,
> > @@ -1903,20 +1904,20 @@ out:
> >  void
> >  i915_gem_release_mmap(struct drm_i915_gem_object *obj)
> >  {
> > -       if (!obj->fault_mappable)
> > +       if (list_empty(&obj->fault_link))
> >                 return;
> >  
> >         drm_vma_node_unmap(&obj->base.vma_node,
> >                            obj->base.dev->anon_inode->i_mapping);
> > -       obj->fault_mappable = false;
> > +       list_del_init(&obj->fault_link);
> >  }
> >  
> >  void
> >  i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
> >  {
> > -       struct drm_i915_gem_object *obj;
> > +       struct drm_i915_gem_object *obj, *n;
> >  
> > -       list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
> > +       list_for_each_entry_safe(obj, n, &dev_priv->mm.fault_list, fault_link)
> >                 i915_gem_release_mmap(obj);
> >  }
> >  
> > @@ -4224,6 +4229,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
> >         INIT_LIST_HEAD(&obj->obj_exec_link);
> >         INIT_LIST_HEAD(&obj->vma_list);
> >         INIT_LIST_HEAD(&obj->batch_pool_link);
> > +       INIT_LIST_HEAD(&obj->fault_link);
> >  
> >         obj->ops = ops;
> >  
> > @@ -4858,6 +4864,7 @@ i915_gem_load(struct drm_device *dev)
> >         INIT_LIST_HEAD(&dev_priv->context_list);
> >         INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
> >         INIT_LIST_HEAD(&dev_priv->mm.bound_list);
> > +       INIT_LIST_HEAD(&dev_priv->mm.fault_list);
> >         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
> >         for (i = 0; i < I915_NUM_RINGS; i++)
> >                 init_ring_lists(&dev_priv->ring[i]);
> > 
> > 
> > The tricky part is reviewing the i915_gem_release_mmap() callers to
> > ensure that they have the right barrier. If you make
> > i915_gem_release_mmap() assert it holds an rpm ref, and then make the
> > i915_gem_release_all_mmaps() unlink the node directly that should do the
> > trick.
> > -Chris
> > 
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx