Re: [PATCH v2 1/3] drm/i915: Introduce refcounted sg-tables

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 28/10/2021 08:04, Thomas Hellström wrote:
On Wed, 2021-10-27 at 19:03 +0100, Matthew Auld wrote:
On 27/10/2021 11:52, Thomas Hellström wrote:
As we start to introduce asynchronous failsafe object migration,
where we update the object state and then submit asynchronous
commands we need to record what memory resources are actually used
by various part of the command stream. Initially for three
purposes:

1) Error capture.
2) Asynchronous migration error recovery.
3) Asynchronous vma bind.

At the time where these happens, the object state may have been
updated
to be several migrations ahead and object sg-tables discarded.

In order to make it possible to keep sg-tables with memory resource
information for these operations, introduce refcounted sg-tables
that
aren't freed until the last user is done with them.

The alternative would be to reference information sitting on the
corresponding ttm_resources which typically have the same lifetime
as
these refcountes sg_tables, but that leads to other awkward
constructs:
Due to the design direction chosen for ttm resource managers that
would
lead to diamond-style inheritance, the LMEM resources may sometimes
be
prematurely freed, and finally the subclassed struct ttm_resource
would
have to bleed into the asynchronous vma bind code.

Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
---
   drivers/gpu/drm/i915/gem/i915_gem_object.h    |   4 +-
   .../gpu/drm/i915/gem/i915_gem_object_types.h  |   3 +-
   drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  16 +-
   drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 188 +++++++++++--
-----
   drivers/gpu/drm/i915/i915_scatterlist.c       |  62 ++++--
   drivers/gpu/drm/i915/i915_scatterlist.h       |  76 ++++++-
   drivers/gpu/drm/i915/intel_region_ttm.c       |  15 +-
   drivers/gpu/drm/i915/intel_region_ttm.h       |   5 +-
   drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
   9 files changed, 262 insertions(+), 119 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index a5479ac7a4ad..c5ab364d4311 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -624,8 +624,8 @@ struct sg_table *shmem_alloc_st(struct
drm_i915_private *i915,
                                 size_t size, struct
intel_memory_region *mr,
                                 struct address_space *mapping,
                                 unsigned int max_segment);
-void shmem_free_st(struct sg_table *st, struct address_space
*mapping,
-                  bool dirty, bool backup);
+void shmem_free_st_table(struct sg_table *st, struct address_space
*mapping,
+                        bool dirty, bool backup);

s/st_table/sg_table/?

I thought st was shorthand for sg_table? Maybe shmem_sg_free_table?

Yes the naming is indeed a bit unfortunate here. To be consistent with
the core's sg_free_table(), I changed to
shmem_sg_free_table() / shmem_sg_alloc_table.



   void __shmem_writeback(size_t size, struct address_space
*mapping);
  #ifdef CONFIG_MMU_NOTIFIER
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index a4b69a43b898..604ed5ad77f5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -544,6 +544,7 @@ struct drm_i915_gem_object {
                  */
                 struct list_head region_link;
+               struct i915_refct_sgt *rsgt;
                 struct sg_table *pages;
                 void *mapping;
@@ -597,7 +598,7 @@ struct drm_i915_gem_object {
         } mm;
        struct {
-               struct sg_table *cached_io_st;
+               struct i915_refct_sgt *cached_io_rsgt;
                 struct i915_gem_object_page_iter get_io_page;
                 struct drm_i915_gem_object *backup;
                 bool created:1;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 01f332d8dbde..67c6bee695c7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec
*pvec)
         cond_resched();
   }
-void shmem_free_st(struct sg_table *st, struct address_space
*mapping,
-                  bool dirty, bool backup)
+void shmem_free_st_table(struct sg_table *st, struct address_space
*mapping,
+                        bool dirty, bool backup)
   {
         struct sgt_iter sgt_iter;
         struct pagevec pvec;
@@ -49,7 +49,6 @@ void shmem_free_st(struct sg_table *st, struct
address_space *mapping,
                 check_release_pagevec(&pvec);
        sg_free_table(st);
-       kfree(st);
   }
  struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
@@ -171,7 +170,8 @@ struct sg_table *shmem_alloc_st(struct
drm_i915_private *i915,
   err_sg:
         sg_mark_end(sg);
         if (sg != st->sgl) {
-               shmem_free_st(st, mapping, false, false);
+               shmem_free_st_table(st, mapping, false, false);
+               kfree(st);
         } else {
                 mapping_clear_unevictable(mapping);
                 sg_free_table(st);
@@ -254,7 +254,8 @@ static int shmem_get_pages(struct
drm_i915_gem_object *obj)
         return 0;
  err_pages:
-       shmem_free_st(st, mapping, false, false);
+       shmem_free_st_table(st, mapping, false, false);
+       kfree(st);
         /*
          * shmemfs first checks if there is enough memory to
allocate the page
          * and reports ENOSPC should there be insufficient, along
with the usual
@@ -374,8 +375,9 @@ void i915_gem_object_put_pages_shmem(struct
drm_i915_gem_object *obj, struct sg_
         if (i915_gem_object_needs_bit17_swizzle(obj))
                 i915_gem_object_save_bit_17_swizzle(obj, pages);
-       shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
-                     obj->mm.dirty, obj->mm.madv ==
I915_MADV_WILLNEED);
+       shmem_free_st_table(pages, file_inode(obj->base.filp)-
i_mapping,
+                           obj->mm.dirty, obj->mm.madv ==
I915_MADV_WILLNEED);
+       kfree(pages);
         obj->mm.dirty = false;
   }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4fd2edb20dd9..6826e3859e18 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -34,7 +34,7 @@
    * struct i915_ttm_tt - TTM page vector with additional private
information
    * @ttm: The base TTM page vector.
    * @dev: The struct device used for dma mapping and unmapping.
- * @cached_st: The cached scatter-gather table.
+ * @cached_rsgt: The cached scatter-gather table.
    * @is_shmem: Set if using shmem.
    * @filp: The shmem file, if using shmem backend.
    *
@@ -47,7 +47,7 @@
   struct i915_ttm_tt {
         struct ttm_tt ttm;
         struct device *dev;
-       struct sg_table *cached_st;
+       struct i915_refct_sgt cached_rsgt;
        bool is_shmem;
         struct file *filp;
@@ -221,14 +221,10 @@ static int i915_ttm_tt_shmem_populate(struct
ttm_device *bdev,
         if (IS_ERR(st))
                 return PTR_ERR(st);
-       err = dma_map_sg_attrs(i915_tt->dev,
-                              st->sgl, st->nents,
-                              DMA_BIDIRECTIONAL,
-                              DMA_ATTR_SKIP_CPU_SYNC);
-       if (err <= 0) {
-               err = -EINVAL;
+       err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
+                             DMA_ATTR_SKIP_CPU_SYNC);
+       if (err)
                 goto err_free_st;
-       }
        i = 0;
         for_each_sgt_page(page, sgt_iter, st)
@@ -237,11 +233,15 @@ static int i915_ttm_tt_shmem_populate(struct
ttm_device *bdev,
         if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
                 ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
-       i915_tt->cached_st = st;
+       i915_tt->cached_rsgt.table = *st;
+       kfree(st);

Will it work if the above just operates on the rsgt.table?

I'll change the shmem utility here to not allocate the struct sg_table
and then we can operate on it directly.


+
         return 0;
  err_free_st:
-       shmem_free_st(st, filp->f_mapping, false, false);
+       shmem_free_st_table(st, filp->f_mapping, false, false);
+       kfree(st);
+
         return err;
   }
@@ -249,16 +249,29 @@ static void
i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
   {
         struct i915_ttm_tt *i915_tt = container_of(ttm,
typeof(*i915_tt), ttm);
         bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
+       struct sg_table *st = &i915_tt->cached_rsgt.table;
-       dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
-                    i915_tt->cached_st->nents,
-                    DMA_BIDIRECTIONAL);
+       if (st->sgl)

Can we ever hit this? I would have presumed not? Below also.

Yes, here's where we typically free the scatterlist.

What I meant to say: can the above ever not be true? i.e sgl == NULL



+               shmem_free_st_table(st,
+                                   file_inode(i915_tt->filp)-
i_mapping,
+                                   backup, backup);
+}
-       shmem_free_st(fetch_and_zero(&i915_tt->cached_st),
-                     file_inode(i915_tt->filp)->i_mapping,
-                     backup, backup);
+static void i915_ttm_tt_release(struct kref *ref)
+{
+       struct i915_ttm_tt *i915_tt =
+               container_of(ref, typeof(*i915_tt),
cached_rsgt.kref);
+       struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+       GEM_WARN_ON(st->sgl);
+
+       kfree(i915_tt);
   }
+static const struct i915_refct_sgt_ops tt_rsgt_ops = {
+       .release = i915_ttm_tt_release
+};
+
   static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object
*bo,
                                          uint32_t page_flags)
   {
@@ -287,6 +300,9 @@ static struct ttm_tt *i915_ttm_tt_create(struct
ttm_buffer_object *bo,
         if (ret)
                 goto err_free;
+       i915_refct_sgt_init_ops(&i915_tt->cached_rsgt, bo-
base.size,
+                               &tt_rsgt_ops);
+
         i915_tt->dev = obj->base.dev->dev;
        return &i915_tt->ttm;
@@ -311,17 +327,15 @@ static int i915_ttm_tt_populate(struct
ttm_device *bdev,
   static void i915_ttm_tt_unpopulate(struct ttm_device *bdev,
struct ttm_tt *ttm)
   {
         struct i915_ttm_tt *i915_tt = container_of(ttm,
typeof(*i915_tt), ttm);
+       struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+       if (st->sgl)
+               dma_unmap_sgtable(i915_tt->dev, st,
DMA_BIDIRECTIONAL, 0);
        if (i915_tt->is_shmem) {
                 i915_ttm_tt_shmem_unpopulate(ttm);
         } else {
-               if (i915_tt->cached_st) {
-                       dma_unmap_sgtable(i915_tt->dev, i915_tt-
cached_st,
-                                         DMA_BIDIRECTIONAL, 0);
-                       sg_free_table(i915_tt->cached_st);
-                       kfree(i915_tt->cached_st);
-                       i915_tt->cached_st = NULL;
-               }
+               sg_free_table(st);
                 ttm_pool_free(&bdev->pool, ttm);
         }
   }
@@ -334,7 +348,7 @@ static void i915_ttm_tt_destroy(struct
ttm_device *bdev, struct ttm_tt *ttm)
                 fput(i915_tt->filp);
        ttm_tt_fini(ttm);
-       kfree(i915_tt);
+       i915_refct_sgt_put(&i915_tt->cached_rsgt);
   }
  static bool i915_ttm_eviction_valuable(struct ttm_buffer_object
*bo,
@@ -376,12 +390,12 @@ static int i915_ttm_move_notify(struct
ttm_buffer_object *bo)
         return 0;
   }
-static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object
*obj)
+static void i915_ttm_free_cached_io_rsgt(struct
drm_i915_gem_object *obj)
   {
         struct radix_tree_iter iter;
         void __rcu **slot;
-       if (!obj->ttm.cached_io_st)
+       if (!obj->ttm.cached_io_rsgt)
                 return;
        rcu_read_lock();
@@ -389,9 +403,8 @@ static void i915_ttm_free_cached_io_st(struct
drm_i915_gem_object *obj)
                 radix_tree_delete(&obj->ttm.get_io_page.radix,
iter.index);
         rcu_read_unlock();
-       sg_free_table(obj->ttm.cached_io_st);
-       kfree(obj->ttm.cached_io_st);
-       obj->ttm.cached_io_st = NULL;
+       i915_refct_sgt_put(obj->ttm.cached_io_rsgt);
+       obj->ttm.cached_io_rsgt = NULL;
   }
  static void
@@ -477,7 +490,7 @@ static int i915_ttm_purge(struct
drm_i915_gem_object *obj)
         obj->write_domain = 0;
         obj->read_domains = 0;
         i915_ttm_adjust_gem_after_move(obj);
-       i915_ttm_free_cached_io_st(obj);
+       i915_ttm_free_cached_io_rsgt(obj);
         obj->mm.madv = __I915_MADV_PURGED;
         return 0;
   }
@@ -532,7 +545,7 @@ static void i915_ttm_swap_notify(struct
ttm_buffer_object *bo)
         int ret = i915_ttm_move_notify(bo);
        GEM_WARN_ON(ret);
-       GEM_WARN_ON(obj->ttm.cached_io_st);
+       GEM_WARN_ON(obj->ttm.cached_io_rsgt);
         if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
                 i915_ttm_purge(obj);
   }
@@ -543,7 +556,7 @@ static void i915_ttm_delete_mem_notify(struct
ttm_buffer_object *bo)
        if (likely(obj)) {
                 __i915_gem_object_pages_fini(obj);
-               i915_ttm_free_cached_io_st(obj);
+               i915_ttm_free_cached_io_rsgt(obj);
         }
   }
@@ -563,40 +576,35 @@ i915_ttm_region(struct ttm_device *bdev, int
ttm_mem_type)
                                           ttm_mem_type -
I915_PL_LMEM0);
   }
-static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
+static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt
*ttm)
   {
         struct i915_ttm_tt *i915_tt = container_of(ttm,
typeof(*i915_tt), ttm);
         struct sg_table *st;
         int ret;
-       if (i915_tt->cached_st)
-               return i915_tt->cached_st;
-
-       st = kzalloc(sizeof(*st), GFP_KERNEL);
-       if (!st)
-               return ERR_PTR(-ENOMEM);
+       if (i915_tt->cached_rsgt.table.sgl)
+               return i915_refct_sgt_get(&i915_tt->cached_rsgt);
+       st = &i915_tt->cached_rsgt.table;
         ret = sg_alloc_table_from_pages_segment(st,
                         ttm->pages, ttm->num_pages,
                         0, (unsigned long)ttm->num_pages <<
PAGE_SHIFT,
                         i915_sg_segment_size(), GFP_KERNEL);
         if (ret) {
-               kfree(st);
+               st->sgl = NULL;
                 return ERR_PTR(ret);
         }
        ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
0);
         if (ret) {
                 sg_free_table(st);
-               kfree(st);
                 return ERR_PTR(ret);
         }
-       i915_tt->cached_st = st;
-       return st;
+       return i915_refct_sgt_get(&i915_tt->cached_rsgt);
   }
-static struct sg_table *
+static struct i915_refct_sgt *
   i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
                          struct ttm_resource *res)
   {
@@ -610,7 +618,21 @@ i915_ttm_resource_get_st(struct
drm_i915_gem_object *obj,
          * the resulting st. Might make sense for GGTT.
          */
         GEM_WARN_ON(!cpu_maps_iomem(res));
-       return intel_region_ttm_resource_to_st(obj->mm.region,
res);
+       if (bo->resource == res) {
+               if (!obj->ttm.cached_io_rsgt) {
+                       struct i915_refct_sgt *rsgt;
+
+                       rsgt =
intel_region_ttm_resource_to_rsgt(obj->mm.region,
+
res);
+                       if (IS_ERR(rsgt))
+                               return rsgt;
+
+                       obj->ttm.cached_io_rsgt = rsgt;
+               }
+               return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
+       }
+
+       return intel_region_ttm_resource_to_rsgt(obj->mm.region,
res);
   }
  static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
@@ -621,10 +643,7 @@ static int i915_ttm_accel_move(struct
ttm_buffer_object *bo,
   {
         struct drm_i915_private *i915 = container_of(bo->bdev,
typeof(*i915),
                                                      bdev);
-       struct ttm_resource_manager *src_man =
-               ttm_manager_type(bo->bdev, bo->resource->mem_type);
         struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-       struct sg_table *src_st;
         struct i915_request *rq;
         struct ttm_tt *src_ttm = bo->ttm;
         enum i915_cache_level src_level, dst_level;
@@ -650,17 +669,22 @@ static int i915_ttm_accel_move(struct
ttm_buffer_object *bo,
                 }
                 intel_engine_pm_put(i915->gt.migrate.context-
engine);
         } else {
-               src_st = src_man->use_tt ?
i915_ttm_tt_get_st(src_ttm) :
-                       obj->ttm.cached_io_st;
+               struct i915_refct_sgt *src_rsgt =
+                       i915_ttm_resource_get_st(obj, bo-
resource);
+
+               if (IS_ERR(src_rsgt))
+                       return PTR_ERR(src_rsgt);
                src_level = i915_ttm_cache_level(i915, bo-
resource, src_ttm);
                 intel_engine_pm_get(i915->gt.migrate.context-
engine);
                 ret = intel_context_migrate_copy(i915-
gt.migrate.context,
-                                                NULL, src_st->sgl,
src_level,
+                                                NULL, src_rsgt-
table.sgl,
+                                                src_level,
gpu_binds_iomem(bo->resource),
                                                  dst_st->sgl,
dst_level,
gpu_binds_iomem(dst_mem),
                                                  &rq);
+               i915_refct_sgt_put(src_rsgt);
                 if (!ret && rq) {
                         i915_request_wait(rq, 0,
MAX_SCHEDULE_TIMEOUT);
                         i915_request_put(rq);
@@ -674,13 +698,14 @@ static int i915_ttm_accel_move(struct
ttm_buffer_object *bo,
   static void __i915_ttm_move(struct ttm_buffer_object *bo, bool
clear,
                             struct ttm_resource *dst_mem,
                             struct ttm_tt *dst_ttm,
-                           struct sg_table *dst_st,
+                           struct i915_refct_sgt *dst_rsgt,
                             bool allow_accel)
   {
         int ret = -EINVAL;
        if (allow_accel)
-               ret = i915_ttm_accel_move(bo, clear, dst_mem,
dst_ttm, dst_st);
+               ret = i915_ttm_accel_move(bo, clear, dst_mem,
dst_ttm,
+                                         &dst_rsgt->table);
         if (ret) {
                 struct drm_i915_gem_object *obj =
i915_ttm_to_gem(bo);
                 struct intel_memory_region *dst_reg, *src_reg;
@@ -697,12 +722,13 @@ static void __i915_ttm_move(struct
ttm_buffer_object *bo, bool clear,
                 dst_iter = !cpu_maps_iomem(dst_mem) ?
                         ttm_kmap_iter_tt_init(&_dst_iter.tt,
dst_ttm) :
                         ttm_kmap_iter_iomap_init(&_dst_iter.io,
&dst_reg->iomap,
-                                                dst_st, dst_reg-
region.start);
+                                                &dst_rsgt->table,
+                                                dst_reg-
region.start);
                src_iter = !cpu_maps_iomem(bo->resource) ?
                         ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
ttm) :
                         ttm_kmap_iter_iomap_init(&_src_iter.io,
&src_reg->iomap,
-                                                obj-
ttm.cached_io_st,
+                                                &obj-
ttm.cached_io_rsgt->table,
                                                  src_reg-
region.start);
                ttm_move_memcpy(clear, dst_mem->num_pages,
dst_iter, src_iter);
@@ -718,7 +744,7 @@ static int i915_ttm_move(struct
ttm_buffer_object *bo, bool evict,
         struct ttm_resource_manager *dst_man =
                 ttm_manager_type(bo->bdev, dst_mem->mem_type);
         struct ttm_tt *ttm = bo->ttm;
-       struct sg_table *dst_st;
+       struct i915_refct_sgt *dst_rsgt;
         bool clear;
         int ret;
@@ -744,22 +770,24 @@ static int i915_ttm_move(struct
ttm_buffer_object *bo, bool evict,
                         return ret;
         }
-       dst_st = i915_ttm_resource_get_st(obj, dst_mem);
-       if (IS_ERR(dst_st))
-               return PTR_ERR(dst_st);
+       dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
+       if (IS_ERR(dst_rsgt))
+               return PTR_ERR(dst_rsgt);
        clear = !cpu_maps_iomem(bo->resource) && (!ttm ||
!ttm_tt_is_populated(ttm));
         if (!(clear && ttm && !(ttm->page_flags &
TTM_TT_FLAG_ZERO_ALLOC)))
-               __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
dst_st, true);
+               __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
dst_rsgt, true);
        ttm_bo_move_sync_cleanup(bo, dst_mem);
         i915_ttm_adjust_domains_after_move(obj);
-       i915_ttm_free_cached_io_st(obj);
+       i915_ttm_free_cached_io_rsgt(obj);
        if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) {
-               obj->ttm.cached_io_st = dst_st;
-               obj->ttm.get_io_page.sg_pos = dst_st->sgl;
+               obj->ttm.cached_io_rsgt = dst_rsgt;
+               obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
                 obj->ttm.get_io_page.sg_idx = 0;
+       } else {
+               i915_refct_sgt_put(dst_rsgt);
         }
        i915_ttm_adjust_lru(obj);
@@ -825,7 +853,6 @@ static int __i915_ttm_get_pages(struct
drm_i915_gem_object *obj,
                 .interruptible = true,
                 .no_wait_gpu = false,
         };
-       struct sg_table *st;
         int real_num_busy;
         int ret;
@@ -862,12 +889,16 @@ static int __i915_ttm_get_pages(struct
drm_i915_gem_object *obj,
         }
        if (!i915_gem_object_has_pages(obj)) {
-               /* Object either has a page vector or is an iomem
object */
-               st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj-
ttm.cached_io_st;
-               if (IS_ERR(st))
-                       return PTR_ERR(st);
+               struct i915_refct_sgt *rsgt =
+                       i915_ttm_resource_get_st(obj, bo-
resource);
+
+               if (IS_ERR(rsgt))
+                       return PTR_ERR(rsgt);
-               __i915_gem_object_set_pages(obj, st,
i915_sg_dma_sizes(st->sgl));
+               GEM_BUG_ON(obj->mm.rsgt);
+               obj->mm.rsgt = rsgt;
+               __i915_gem_object_set_pages(obj, &rsgt->table,
+                                           i915_sg_dma_sizes(rsgt-
table.sgl));
         }
        i915_ttm_adjust_lru(obj);
@@ -941,6 +972,13 @@ static void i915_ttm_put_pages(struct
drm_i915_gem_object *obj,
          * If the object is not destroyed next, The TTM eviction
logic
          * and shrinkers will move it out if needed.
          */
+
+       if (obj->mm.rsgt) {
+               i915_refct_sgt_put(obj->mm.rsgt);
+               obj->mm.rsgt = NULL;
+       }

i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt)) ?

+
+       i915_ttm_adjust_lru(obj);

Unrelated change, or do I need to look closer?

No that's unrelated. Probably a rebase mistake. Will remove.


   }
  static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
@@ -1278,7 +1316,7 @@ int i915_gem_obj_copy_ttm(struct
drm_i915_gem_object *dst,
         struct ttm_operation_ctx ctx = {
                 .interruptible = intr,
         };
-       struct sg_table *dst_st;
+       struct i915_refct_sgt *dst_rsgt;
         int ret;
        assert_object_held(dst);
@@ -1293,11 +1331,11 @@ int i915_gem_obj_copy_ttm(struct
drm_i915_gem_object *dst,
         if (ret)
                 return ret;
-       dst_st = gpu_binds_iomem(dst_bo->resource) ?
-               dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo-
ttm);
-
+       dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
         __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo-
ttm,
-                       dst_st, allow_accel);
+                       dst_rsgt, allow_accel);
+
+       i915_refct_sgt_put(dst_rsgt);
        return 0;
   }
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c
b/drivers/gpu/drm/i915/i915_scatterlist.c
index 4a6712dca838..8a510ee5d1ad 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.c
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -41,8 +41,32 @@ bool i915_sg_trim(struct sg_table *orig_st)
         return true;
   }
+static void i915_refct_sgt_release(struct kref *ref)
+{
+       struct i915_refct_sgt *rsgt =
+               container_of(ref, typeof(*rsgt), kref);
+
+       sg_free_table(&rsgt->table);
+       kfree(rsgt);
+}
+
+static const struct i915_refct_sgt_ops rsgt_ops = {
+       .release = i915_refct_sgt_release
+};
+
+/**
+ * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with
default ops
+ * @rsgt: The struct i915_refct_sgt to initialize.
+ * size: The size of the underlying memory buffer.
+ */
+void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
+{
+       i915_refct_sgt_init_ops(rsgt, size, &rsgt_ops);
+}
+
   /**
- * i915_sg_from_mm_node - Create an sg_table from a struct
drm_mm_node
+ * i915_rsgt_from_mm_node - Create a refcounted sg_table from a
struct
+ * drm_mm_node
    * @node: The drm_mm_node.
    * @region_start: An offset to add to the dma addresses of the sg
list.
    *
@@ -50,25 +74,28 @@ bool i915_sg_trim(struct sg_table *orig_st)
    * taking a maximum segment length into account, splitting into
segments
    * if necessary.
    *
- * Return: A pointer to a kmalloced struct sg_table on success,
negative
+ * Return: A pointer to a kmalloced struct i915_refct_sgt on
success, negative
    * error code cast to an error pointer on failure.
    */
-struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node
*node,
-                                     u64 region_start)
+struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct
drm_mm_node *node,
+                                             u64 region_start)
   {
         const u64 max_segment = SZ_1G; /* Do we have a limit on
this? */
         u64 segment_pages = max_segment >> PAGE_SHIFT;
         u64 block_size, offset, prev_end;
+       struct i915_refct_sgt *rsgt;
         struct sg_table *st;
         struct scatterlist *sg;
-       st = kmalloc(sizeof(*st), GFP_KERNEL);
-       if (!st)
+       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+       if (!rsgt)
                 return ERR_PTR(-ENOMEM);
+       i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
+       st = &rsgt->table;
         if (sg_alloc_table(st, DIV_ROUND_UP(node->size,
segment_pages),
                            GFP_KERNEL)) {
-               kfree(st);
+               i915_refct_sgt_put(rsgt);
                 return ERR_PTR(-ENOMEM);
         }
@@ -104,11 +131,11 @@ struct sg_table *i915_sg_from_mm_node(const
struct drm_mm_node *node,
         sg_mark_end(sg);
         i915_sg_trim(st);
-       return st;
+       return rsgt;
   }
  /**
- * i915_sg_from_buddy_resource - Create an sg_table from a struct
+ * i915_rsgt_from_buddy_resource - Create a refcounted sg_table
from a struct
    * i915_buddy_block list
    * @res: The struct i915_ttm_buddy_resource.
    * @region_start: An offset to add to the dma addresses of the sg
list.
@@ -117,11 +144,11 @@ struct sg_table *i915_sg_from_mm_node(const
struct drm_mm_node *node,
    * taking a maximum segment length into account, splitting into
segments
    * if necessary.
    *
- * Return: A pointer to a kmalloced struct sg_table on success,
negative
+ * Return: A pointer to a kmalloced struct i915_refct_sgts on
success, negative
    * error code cast to an error pointer on failure.
    */
-struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource
*res,
-                                            u64 region_start)
+struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct
ttm_resource *res,
+                                                    u64
region_start)
   {
         struct i915_ttm_buddy_resource *bman_res =
to_ttm_buddy_resource(res);
         const u64 size = res->num_pages << PAGE_SHIFT;
@@ -129,18 +156,21 @@ struct sg_table
*i915_sg_from_buddy_resource(struct ttm_resource *res,
         struct i915_buddy_mm *mm = bman_res->mm;
         struct list_head *blocks = &bman_res->blocks;
         struct i915_buddy_block *block;
+       struct i915_refct_sgt *rsgt;
         struct scatterlist *sg;
         struct sg_table *st;
         resource_size_t prev_end;
        GEM_BUG_ON(list_empty(blocks)); -       st = kmalloc(sizeof(*st), GFP_KERNEL);
-       if (!st)
+       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+       if (!rsgt)
                 return ERR_PTR(-ENOMEM);
+       i915_refct_sgt_init(rsgt, size);
+       st = &rsgt->table;
         if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) {
-               kfree(st);
+               i915_refct_sgt_put(rsgt);
                 return ERR_PTR(-ENOMEM);
         }
@@ -181,7 +211,7 @@ struct sg_table
*i915_sg_from_buddy_resource(struct ttm_resource *res,
         sg_mark_end(sg);
         i915_sg_trim(st);
-       return st;
+       return rsgt;
   }
  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h
b/drivers/gpu/drm/i915/i915_scatterlist.h
index b8bd5925b03f..321fd4a9f777 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.h
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -144,10 +144,78 @@ static inline unsigned int
i915_sg_segment_size(void)
  bool i915_sg_trim(struct sg_table *orig_st); -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node
*node,
-                                     u64 region_start);
+/**
+ * struct i915_refct_sgt_ops - Operations structure for struct
i915_refct_sgt
+ */
+struct i915_refct_sgt_ops {
+       /**
+        * release() - Free the memory of the struct i915_refct_sgt
+        * @ref: struct kref that is embedded in the struct
i915_refct_sgt
+        */
+       void (*release)(struct kref *ref);
+};
+
+/**
+ * struct i915_refct_sgt - A refcounted scatter-gather table
+ * @kref: struct kref for refcounting
+ * @table: struct sg_table holding the scatter-gather table
itself. Note that
+ * @table->sgl = NULL can be used to determine whether a scatter-
gather table
+ * is present or not.
+ * @size: The size in bytes of the underlying memory buffer
+ * @ops: The operations structure.
+ */
+struct i915_refct_sgt {
+       struct kref kref;
+       struct sg_table table;
+       size_t size;
+       const struct i915_refct_sgt_ops *ops;
+};
+
+/**
+ * i915_refct_sgt_put - Put a refcounted sg-table
+ * @rsgt the struct i915_refct_sgt to put.
+ */
+static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt)
+{
+       if (rsgt)
+               kref_put(&rsgt->kref, rsgt->ops->release);
+}
+
+/**
+ * i915_refct_sgt_get - Get a refcounted sg-table
+ * @rsgt the struct i915_refct_sgt to get.
+ */
+static inline struct i915_refct_sgt *
+i915_refct_sgt_get(struct i915_refct_sgt *rsgt)
+{
+       kref_get(&rsgt->kref);
+       return rsgt;
+}
+
+/**
+ * i915_refct_sgt_init_ops - Initialize a refcounted sg-list with
a custom
+ * operations structure
+ * @rsgt The struct i915_refct_sgt to initialize.
+ * @size: Size in bytes of the underlying memory buffer.
+ * @ops: A customized operations structure in case the refcounted
sg-list
+ * is embedded into another structure.
+ */
+static inline void i915_refct_sgt_init_ops(struct i915_refct_sgt
*rsgt,
+                                          size_t size,
+                                          const struct
i915_refct_sgt_ops *ops)
+{
+       kref_init(&rsgt->kref);
+       rsgt->table.sgl = NULL;
+       rsgt->size = size;
+       rsgt->ops = ops;
+}

Could maybe make this __i915_refct_sgt_init(), since it inits more
than
just the ops?



Will change.

Thanks,
Thomas





[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux