We currently have to special case vma->obj being NULL because of gen6 ppgtt and mock_engine. Fix gen6 ppgtt, so we may soon be able to remove a few checks. As the object only exists as a fake object pointing to ggtt, we have no backing storage, so no real object is created. It just has to look real enough. Also kill pin_mutex, it's not compatible with ww locking, and we can use the vm lock instead. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 44 ++++--- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 122 +++++++++++-------- drivers/gpu/drm/i915/gt/gen6_ppgtt.h | 1 - drivers/gpu/drm/i915/i915_drv.h | 4 + 4 files changed, 99 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index a57a6b7013c2..c5150a1ee3d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -145,24 +145,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { .put_pages = i915_gem_object_put_pages_internal, }; -/** - * i915_gem_object_create_internal: create an object with volatile pages - * @i915: the i915 device - * @size: the size in bytes of backing storage to allocate for the object - * - * Creates a new object that wraps some internal memory for private use. - * This object is not backed by swappable storage, and as such its contents - * are volatile and only valid whilst pinned. If the object is reaped by the - * shrinker, its pages and data will be discarded. Equally, it is not a full - * GEM object and so not valid for access from userspace. This makes it useful - * for hardware interfaces like ringbuffers (which are pinned from the time - * the request is written to the time the hardware stops accessing it), but - * not for contexts (which need to be preserved when not active for later - * reuse). Note that it is not cleared upon allocation. - */ struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) +__i915_gem_object_create_internal(struct drm_i915_private *i915, + const struct drm_i915_gem_object_ops *ops, + phys_addr_t size) { static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; @@ -179,7 +165,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); + i915_gem_object_init(obj, ops, &lock_class, 0); obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; /* @@ -199,3 +185,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return obj; } + +/** + * i915_gem_object_create_internal: create an object with volatile pages + * @i915: the i915 device + * @size: the size in bytes of backing storage to allocate for the object + * + * Creates a new object that wraps some internal memory for private use. + * This object is not backed by swappable storage, and as such its contents + * are volatile and only valid whilst pinned. If the object is reaped by the + * shrinker, its pages and data will be discarded. Equally, it is not a full + * GEM object and so not valid for access from userspace. This makes it useful + * for hardware interfaces like ringbuffers (which are pinned from the time + * the request is written to the time the hardware stops accessing it), but + * not for contexts (which need to be preserved when not active for later + * reuse). Note that it is not cleared upon allocation. + */ +struct drm_i915_gem_object * +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) +{ + return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size); +} diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 9fdbd9d3372b..5caa1703716e 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -262,13 +262,10 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - __i915_vma_put(ppgtt->vma); - gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); mutex_destroy(&ppgtt->flush); - mutex_destroy(&ppgtt->pin_mutex); free_pd(&ppgtt->base.vm, ppgtt->base.pd); } @@ -331,37 +328,6 @@ static const struct i915_vma_ops pd_vma_ops = { .unbind_vma = pd_vma_unbind, }; -static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) -{ - struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; - struct i915_vma *vma; - - GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); - GEM_BUG_ON(size > ggtt->vm.total); - - vma = i915_vma_alloc(); - if (!vma) - return ERR_PTR(-ENOMEM); - - i915_active_init(&vma->active, NULL, NULL, 0); - - kref_init(&vma->ref); - mutex_init(&vma->pages_mutex); - vma->vm = i915_vm_get(&ggtt->vm); - vma->ops = &pd_vma_ops; - vma->private = ppgtt; - - vma->size = size; - vma->fence_size = size; - atomic_set(&vma->flags, I915_VMA_GGTT); - vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ - - INIT_LIST_HEAD(&vma->obj_link); - INIT_LIST_HEAD(&vma->closed_link); - - return vma; -} - int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); @@ -378,24 +344,84 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) return 0; - if (mutex_lock_interruptible(&ppgtt->pin_mutex)) - return -EINTR; + /* grab the ppgtt resv to pin the object */ + err = i915_vm_lock_objects(&ppgtt->base.vm, ww); + if (err) + return err; /* * PPGTT PDEs reside in the GGTT and consists of 512 entries. The * allocator works in address space sizes, so it's multiplied by page * size. We allocate at the top of the GTT to avoid fragmentation. */ - err = 0; - if (!atomic_read(&ppgtt->pin_count)) + if (!atomic_read(&ppgtt->pin_count)) { err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH); + + GEM_BUG_ON(ppgtt->vma->fence); + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma)); + } if (!err) atomic_inc(&ppgtt->pin_count); - mutex_unlock(&ppgtt->pin_mutex); return err; } +static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj) +{ + obj->mm.pages = ZERO_SIZE_PTR; + return 0; +} + +static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ +} + +static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { + .name = "pd_dummy_obj", + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = pd_dummy_obj_get_pages, + .put_pages = pd_dummy_obj_put_pages, +}; + +static struct i915_page_directory * +gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) +{ + struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt; + struct i915_page_directory *pd; + int err; + + pd = __alloc_pd(I915_PDES); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); + + pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915, &pd_dummy_obj_ops, I915_PDES * SZ_4K); + if (IS_ERR(pd->pt.base)) { + err = PTR_ERR(pd->pt.base); + pd->pt.base = NULL; + goto err_pd; + } + + pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm); + pd->pt.base->shares_resv_from = &ppgtt->base.vm; + + ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL); + if (IS_ERR(ppgtt->vma)) { + err = PTR_ERR(ppgtt->vma); + ppgtt->vma = NULL; + goto err_pd; + } + + /* The dummy object we create is special, override ops.. */ + ppgtt->vma->ops = &pd_vma_ops; + ppgtt->vma->private = ppgtt; + return pd; + +err_pd: + free_pd(&ppgtt->base.vm, pd); + return ERR_PTR(err); +} + void gen6_ppgtt_unpin(struct i915_ppgtt *base) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); @@ -416,7 +442,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) return ERR_PTR(-ENOMEM); mutex_init(&ppgtt->flush); - mutex_init(&ppgtt->pin_mutex); ppgtt_init(&ppgtt->base, gt, 0); ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); @@ -431,19 +456,13 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - ppgtt->base.pd = __alloc_pd(I915_PDES); - if (!ppgtt->base.pd) { - err = -ENOMEM; - goto err_free; - } - err = gen6_ppgtt_init_scratch(ppgtt); if (err) - goto err_pd; + goto err_free; - ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); - if (IS_ERR(ppgtt->vma)) { - err = PTR_ERR(ppgtt->vma); + ppgtt->base.pd = gen6_alloc_top_pd(ppgtt); + if (IS_ERR(ppgtt->base.pd)) { + err = PTR_ERR(ppgtt->base.pd); goto err_scratch; } @@ -451,10 +470,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) err_scratch: free_scratch(&ppgtt->base.vm); -err_pd: - free_pd(&ppgtt->base.vm, ppgtt->base.pd); err_free: - mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h index ab0eecb086dd..5e5cf2ec3309 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -19,7 +19,6 @@ struct gen6_ppgtt { u32 pp_dir; atomic_t pin_count; - struct mutex pin_mutex; bool scan_for_unused_pt; }; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 12256218634f..a8d733a7e1d9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1933,6 +1933,10 @@ int i915_gem_evict_vm(struct i915_address_space *vm); struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *dev_priv, phys_addr_t size); +struct drm_i915_gem_object * +__i915_gem_object_create_internal(struct drm_i915_private *dev_priv, + const struct drm_i915_gem_object_ops *ops, + phys_addr_t size); /* i915_gem_tiling.c */ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) -- 2.33.0