On Tue, Oct 11, 2016 at 03:37:58PM +0100, Chris Wilson wrote: > We can remove the false coupling between RPM and struct mutex by the > observation that we can use the RPM wakeref as the barrier around user > mmap access. That is as we tear down the user's PTE atomically from > within rpm suspend and then to fault in new PTE requires the rpm > wakeref, means that no user access is possible through those PTE without > RPM being awake. Having made that observation, we can then remove the > presumption of having to take rpm outside of struct_mutex and so allow > fine grained acquisition of a wakeref around hw access rather than > having to remember to acquire the wakeref early on. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Imre Deak <imre.deak@xxxxxxxxx> > Cc: Daniel Vetter <daniel@xxxxxxxx> > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 22 ---------------------- > drivers/gpu/drm/i915/i915_drv.c | 19 ------------------- > drivers/gpu/drm/i915/i915_gem.c | 14 +++++--------- > drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++++++++++++---- > drivers/gpu/drm/i915/i915_gem_tiling.c | 4 ---- > drivers/gpu/drm/i915/intel_uncore.c | 6 +++--- > 6 files changed, 21 insertions(+), 61 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index d4ecc5283c2a..d4779abd89e3 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1400,14 +1400,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) > static int ironlake_drpc_info(struct seq_file *m) > { > struct drm_i915_private *dev_priv = node_to_i915(m->private); > - struct drm_device *dev = &dev_priv->drm; > u32 rgvmodectl, rstdbyctl; > u16 crstandvid; > - int ret; > > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > intel_runtime_pm_get(dev_priv); > > rgvmodectl = I915_READ(MEMMODECTL); > @@ -1415,7 +1410,6 @@ static int ironlake_drpc_info(struct seq_file *m) > crstandvid = I915_READ16(CRSTANDVID); > > intel_runtime_pm_put(dev_priv); > - mutex_unlock(&dev->struct_mutex); > > seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); > seq_printf(m, "Boost freq: %d\n", > @@ -2093,12 +2087,7 @@ static const char *swizzle_string(unsigned swizzle) > static int i915_swizzle_info(struct seq_file *m, void *data) > { > struct drm_i915_private *dev_priv = node_to_i915(m->private); > - struct drm_device *dev = &dev_priv->drm; > - int ret; > > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > intel_runtime_pm_get(dev_priv); > > seq_printf(m, "bit6 swizzle for X-tiling = %s\n", > @@ -2138,7 +2127,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data) > seq_puts(m, "L-shaped memory detected\n"); > > intel_runtime_pm_put(dev_priv); > - mutex_unlock(&dev->struct_mutex); > > return 0; > } > @@ -4797,13 +4785,9 @@ i915_wedged_set(void *data, u64 val) > if (i915_reset_in_progress(&dev_priv->gpu_error)) > return -EAGAIN; > > - intel_runtime_pm_get(dev_priv); > - > i915_handle_error(dev_priv, val, > "Manually setting wedged to %llu", val); > > - intel_runtime_pm_put(dev_priv); > - > return 0; > } > > @@ -5038,22 +5022,16 @@ static int > i915_cache_sharing_get(void *data, u64 *val) > { > struct drm_i915_private *dev_priv = data; > - struct drm_device *dev = &dev_priv->drm; > u32 snpcr; > - int ret; > > if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv))) > return -ENODEV; > > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > intel_runtime_pm_get(dev_priv); > > snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); > > intel_runtime_pm_put(dev_priv); > - mutex_unlock(&dev->struct_mutex); > > *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT; > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 89d322215c84..31eee32fcf6d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -2313,24 +2313,6 @@ static int intel_runtime_suspend(struct device *kdev) > > DRM_DEBUG_KMS("Suspending device\n"); > > - /* > - * We could deadlock here in case another thread holding struct_mutex > - * calls RPM suspend concurrently, since the RPM suspend will wait > - * first for this RPM suspend to finish. In this case the concurrent > - * RPM resume will be followed by its RPM suspend counterpart. Still > - * for consistency return -EAGAIN, which will reschedule this suspend. > - */ > - if (!mutex_trylock(&dev->struct_mutex)) { > - DRM_DEBUG_KMS("device lock contention, deffering suspend\n"); > - /* > - * Bump the expiration timestamp, otherwise the suspend won't > - * be rescheduled. > - */ > - pm_runtime_mark_last_busy(kdev); > - > - return -EAGAIN; > - } > - > disable_rpm_wakeref_asserts(dev_priv); > > /* > @@ -2338,7 +2320,6 @@ static int intel_runtime_suspend(struct device *kdev) > * an RPM reference. > */ > i915_gem_release_all_mmaps(dev_priv); > - mutex_unlock(&dev->struct_mutex); > > intel_guc_suspend(dev); > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 91910ffe0964..587a91af5a3f 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1469,7 +1469,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > */ > if (!i915_gem_object_has_struct_page(obj) || > cpu_write_needs_clflush(obj)) { > + intel_runtime_pm_get(dev_priv); > ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); > + intel_runtime_pm_put(dev_priv); I'd move the rpm_get/put into gtt_pwrite_fast - there's only one caller, and it's in the spirit of this patch of moving the rpm get/put closer to where we really need it. > /* Note that the gtt paths might fail with non-page-backed user > * pointers (e.g. gtt mappings when moving data between > * textures). Fallback to the shmem path in that case. */ > @@ -1850,6 +1852,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) > if (ret) > goto err_unpin; > > + assert_rpm_wakelock_held(dev_priv); > spin_lock(&dev_priv->mm.userfault_lock); > list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); > spin_unlock(&dev_priv->mm.userfault_lock); > @@ -1932,7 +1935,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) > * requirement that operations to the GGTT be made holding the RPM > * wakeref. > */ > - lockdep_assert_held(&obj->base.dev->struct_mutex); > + assert_rpm_wakelock_held(i915); > > spin_lock(&i915->mm.userfault_lock); > if (!list_empty(&obj->userfault_link)) { > @@ -3469,7 +3472,6 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, > int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, > struct drm_file *file) > { > - struct drm_i915_private *dev_priv = to_i915(dev); > struct drm_i915_gem_caching *args = data; > struct drm_i915_gem_object *obj; > enum i915_cache_level level; > @@ -3498,11 +3500,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, > return -EINVAL; > } > > - intel_runtime_pm_get(dev_priv); > - > ret = i915_mutex_lock_interruptible(dev); > if (ret) > - goto rpm_put; > + return ret; > > obj = i915_gem_object_lookup(file, args->handle); > if (!obj) { > @@ -3511,13 +3511,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, > } > > ret = i915_gem_object_set_cache_level(obj, level); > - > i915_gem_object_put(obj); > unlock: > mutex_unlock(&dev->struct_mutex); > -rpm_put: > - intel_runtime_pm_put(dev_priv); > - > return ret; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 2d846aa39ca5..c0a4ba478309 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -2596,6 +2596,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, > enum i915_cache_level cache_level, > u32 flags) > { > + struct drm_i915_private *i915 = to_i915(vma->vm->dev); > struct drm_i915_gem_object *obj = vma->obj; > u32 pte_flags = 0; > int ret; > @@ -2608,8 +2609,10 @@ static int ggtt_bind_vma(struct i915_vma *vma, > if (obj->gt_ro) > pte_flags |= PTE_READ_ONLY; > > + intel_runtime_pm_get(i915); > vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, > cache_level, pte_flags); > + intel_runtime_pm_put(i915); > > /* > * Without aliasing PPGTT there's no difference between > @@ -2625,6 +2628,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, > enum i915_cache_level cache_level, > u32 flags) > { > + struct drm_i915_private *i915 = to_i915(vma->vm->dev); > u32 pte_flags; > int ret; > > @@ -2639,14 +2643,15 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, > > > if (flags & I915_VMA_GLOBAL_BIND) { > + intel_runtime_pm_get(i915); > vma->vm->insert_entries(vma->vm, > vma->pages, vma->node.start, > cache_level, pte_flags); > + intel_runtime_pm_put(i915); > } > > if (flags & I915_VMA_LOCAL_BIND) { > - struct i915_hw_ppgtt *appgtt = > - to_i915(vma->vm->dev)->mm.aliasing_ppgtt; > + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; > appgtt->base.insert_entries(&appgtt->base, > vma->pages, vma->node.start, > cache_level, pte_flags); > @@ -2657,13 +2662,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, > > static void ggtt_unbind_vma(struct i915_vma *vma) > { > - struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; > + struct drm_i915_private *i915 = to_i915(vma->vm->dev); > + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; > const u64 size = min(vma->size, vma->node.size); > > - if (vma->flags & I915_VMA_GLOBAL_BIND) > + if (vma->flags & I915_VMA_GLOBAL_BIND) { > + intel_runtime_pm_get(i915); > vma->vm->clear_range(vma->vm, > vma->node.start, size, > true); > + intel_runtime_pm_put(i915); > + } > > if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) > appgtt->base.clear_range(&appgtt->base, > diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c > index a14b1e3d4c78..08f796a4f5f6 100644 > --- a/drivers/gpu/drm/i915/i915_gem_tiling.c > +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c > @@ -204,8 +204,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, > return -EINVAL; > } > > - intel_runtime_pm_get(dev_priv); > - > mutex_lock(&dev->struct_mutex); > if (obj->pin_display || obj->framebuffer_references) { > err = -EBUSY; > @@ -301,8 +299,6 @@ err: > i915_gem_object_put(obj); > mutex_unlock(&dev->struct_mutex); > > - intel_runtime_pm_put(dev_priv); > - > return err; > } > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index e2b188dcf908..e0c3bd941e38 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -1364,7 +1364,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, > struct register_whitelist const *entry = whitelist; > unsigned size; > i915_reg_t offset_ldw, offset_udw; > - int i, ret = 0; > + int i, ret; > > for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { > if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) && > @@ -1386,6 +1386,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, > > intel_runtime_pm_get(dev_priv); > > + ret = 0; > switch (size) { > case 8 | 1: > reg->val = I915_READ64_2x32(offset_ldw, offset_udw); > @@ -1404,10 +1405,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, > break; > default: > ret = -EINVAL; > - goto out; > + break; > } > > -out: > intel_runtime_pm_put(dev_priv); > return ret; Spurious hunks in i915_reg_read_ioctl, please remove from the patch. With the userfault_list/lock I'm happy with this now. With the 2 nits addressed. Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx> > } > -- > 2.9.3 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx