Quoting Tvrtko Ursulin (2019-01-02 13:21:17) > > On 02/01/2019 09:41, Chris Wilson wrote: > > Give it a bit of commit text please. A starting point to counter the pervasive struct_mutex. For the goal of avoiding (or at least blocking under them!) global locks during user request submission, a simple but important step is being able to manage each clients GTT separately. For which, we want to replace using the struct_mutex as the guard for all things GTT/VM and switch instead to a specific mutex inside i915_address_space. > > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > --- > > drivers/gpu/drm/i915/i915_gem.c | 14 ++++++++------ > > drivers/gpu/drm/i915/i915_gem_evict.c | 2 ++ > > drivers/gpu/drm/i915/i915_gem_gtt.c | 15 +++++++++++++-- > > drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 ++++ > > drivers/gpu/drm/i915/i915_gem_stolen.c | 2 ++ > > drivers/gpu/drm/i915/i915_vma.c | 11 +++++++++++ > > drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 3 +++ > > drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 3 +++ > > 8 files changed, 46 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index d9e1dcee176c..66445e34c93c 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -246,18 +246,19 @@ int > > i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, > > struct drm_file *file) > > { > > - struct drm_i915_private *dev_priv = to_i915(dev); > > - struct i915_ggtt *ggtt = &dev_priv->ggtt; > > + struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; > > struct drm_i915_gem_get_aperture *args = data; > > struct i915_vma *vma; > > u64 pinned; > > > > + mutex_lock(&ggtt->vm.mutex); > > + > > pinned = ggtt->vm.reserved; > > - mutex_lock(&dev->struct_mutex); > > list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) > > if (i915_vma_is_pinned(vma)) > > pinned += vma->node.size; > > - mutex_unlock(&dev->struct_mutex); > > + > > + mutex_unlock(&ggtt->vm.mutex); > > > > args->aper_size = ggtt->vm.total; > > args->aper_available_size = args->aper_size - pinned; > > @@ -1680,20 +1681,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > > > > static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) > > { > > - struct drm_i915_private *i915; > > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > > struct list_head *list; > > struct i915_vma *vma; > > > > GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); > > > > + mutex_lock(&i915->ggtt.vm.mutex); > > for_each_ggtt_vma(vma, obj) { > > if (!drm_mm_node_allocated(&vma->node)) > > continue; > > > > list_move_tail(&vma->vm_link, &vma->vm->bound_list); > > } > > + mutex_unlock(&i915->ggtt.vm.mutex); > > > > - i915 = to_i915(obj->base.dev); > > spin_lock(&i915->mm.obj_lock); > > list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; > > list_move_tail(&obj->mm.link, list); > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > > index 6a3608398d2a..b7c2a396a63f 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > > @@ -418,6 +418,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) > > } > > > > INIT_LIST_HEAD(&eviction_list); > > + mutex_lock(&vm->mutex); > > list_for_each_entry(vma, &vm->bound_list, vm_link) { > > if (i915_vma_is_pinned(vma)) > > continue; > > @@ -425,6 +426,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) > > __i915_vma_pin(vma); > > list_add(&vma->evict_link, &eviction_list); > > } > > + mutex_unlock(&vm->mutex); > > > > ret = 0; > > list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > > index e5e23c4ac769..7ae10fbb2ee0 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > > @@ -1932,7 +1932,10 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) > > vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ > > > > INIT_LIST_HEAD(&vma->obj_link); > > + > > + mutex_lock(&vma->vm->mutex); > > list_add(&vma->vm_link, &vma->vm->unbound_list); > > + mutex_unlock(&vma->vm->mutex); > > > > return vma; > > } > > @@ -3504,9 +3507,10 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) > > > > i915_check_and_clear_faults(dev_priv); > > > > + mutex_lock(&ggtt->vm.mutex); > > + > > /* First fill our portion of the GTT with scratch pages */ > > ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); > > - > > ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ > > > > /* clflush objects bound into the GGTT and rebind them. */ > > @@ -3516,19 +3520,26 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) > > if (!(vma->flags & I915_VMA_GLOBAL_BIND)) > > continue; > > > > + mutex_unlock(&ggtt->vm.mutex); > > + > > if (!i915_vma_unbind(vma)) > > What is the model - traverse under the new lock but then no need to get > a reference to operate on outside the lock? > > > - continue; > > + goto lock; > > > > WARN_ON(i915_vma_bind(vma, > > obj ? obj->cache_level : 0, > > PIN_UPDATE)); > > if (obj) > > WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); > > + > > +lock: > > + mutex_lock(&ggtt->vm.mutex); > > } > > > > ggtt->vm.closed = false; > > i915_ggtt_invalidate(dev_priv); > > > > + mutex_unlock(&ggtt->vm.mutex); > > + > > if (INTEL_GEN(dev_priv) >= 8) { > > struct intel_ppat *ppat = &dev_priv->ppat; > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c > > index 95af8cc9fa85..9d04c26c04f9 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c > > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c > > @@ -490,6 +490,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr > > } > > > > /* We also want to clear any cached iomaps as they wrap vmap */ > > + mutex_lock(&i915->ggtt.vm.mutex); > > list_for_each_entry_safe(vma, next, > > &i915->ggtt.vm.bound_list, vm_link) { > > unsigned long count = vma->node.size >> PAGE_SHIFT; > > @@ -497,9 +498,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr > > if (!vma->iomap || i915_vma_is_active(vma)) > > continue; > > > > + mutex_unlock(&i915->ggtt.vm.mutex); > > if (i915_vma_unbind(vma) == 0) > > freed_pages += count; > > + mutex_lock(&i915->ggtt.vm.mutex); > > } > > + mutex_unlock(&i915->ggtt.vm.mutex); > > > > out: > > shrinker_unlock(i915, unlock); > > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c > > index 75b97d71f072..21de3a5e9910 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c > > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c > > @@ -703,7 +703,9 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv > > vma->flags |= I915_VMA_GLOBAL_BIND; > > __i915_vma_set_map_and_fenceable(vma); > > > > + mutex_lock(&ggtt->vm.mutex); > > list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); > > + mutex_unlock(&ggtt->vm.mutex); > > > > spin_lock(&dev_priv->mm.obj_lock); > > list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > > index 7de28baffb8f..dcbd0d345c72 100644 > > --- a/drivers/gpu/drm/i915/i915_vma.c > > +++ b/drivers/gpu/drm/i915/i915_vma.c > > @@ -213,7 +213,10 @@ vma_create(struct drm_i915_gem_object *obj, > > } > > rb_link_node(&vma->obj_node, rb, p); > > rb_insert_color(&vma->obj_node, &obj->vma_tree); > > + > > + mutex_lock(&vm->mutex); > > list_add(&vma->vm_link, &vm->unbound_list); > > + mutex_unlock(&vm->mutex); > > > > return vma; > > > > @@ -656,7 +659,9 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > > GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); > > GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); > > > > + mutex_lock(&vma->vm->mutex); > > list_move_tail(&vma->vm_link, &vma->vm->bound_list); > > + mutex_unlock(&vma->vm->mutex); > > > > if (vma->obj) { > > struct drm_i915_gem_object *obj = vma->obj; > > @@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma) > > > > vma->ops->clear_pages(vma); > > > > + mutex_lock(&vma->vm->mutex); > > drm_mm_remove_node(&vma->node); > > list_move_tail(&vma->vm_link, &vma->vm->unbound_list); > > + mutex_unlock(&vma->vm->mutex); > > > > /* > > * Since the unbound list is global, only move to that list if > > @@ -802,7 +809,11 @@ static void __i915_vma_destroy(struct i915_vma *vma) > > GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); > > > > list_del(&vma->obj_link); > > + > > + mutex_lock(&vma->vm->mutex); > > list_del(&vma->vm_link); > > + mutex_unlock(&vma->vm->mutex); > > + > > if (vma->obj) > > rb_erase(&vma->obj_node, &vma->obj->vma_tree); > > > > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c > > index 9d0fe8aac219..eaefba7470f7 100644 > > --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c > > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c > > @@ -67,10 +67,13 @@ static int populate_ggtt(struct drm_i915_private *i915) > > > > static void unpin_ggtt(struct drm_i915_private *i915) > > { > > + struct i915_ggtt *ggtt = &i915->ggtt; > > struct i915_vma *vma; > > > > + mutex_lock(&ggtt->vm.mutex); > > list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) > > i915_vma_unpin(vma); > > + mutex_unlock(&ggtt->vm.mutex); > > } > > > > static void cleanup_objects(struct drm_i915_private *i915) > > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c > > index 18db008c0c62..850f9eff6029 100644 > > --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c > > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c > > @@ -1237,7 +1237,10 @@ static void track_vma_bind(struct i915_vma *vma) > > __i915_gem_object_pin_pages(obj); > > > > vma->pages = obj->mm.pages; > > + > > + mutex_lock(&vma->vm->mutex); > > list_move_tail(&vma->vm_link, &vma->vm->bound_list); > > + mutex_unlock(&vma->vm->mutex); > > } > > > > static int exercise_mock(struct drm_i915_private *i915, > > > > Why a mutex? Will there be sleeping under it in subsequent patches? Yeah, we will need to have a wait on GPU under a mutex at some point. And this is the most likely candidate. Those patches are a bit scarier and at the last check not deadlock free yet; I think it may need a few different rules. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx