Re: [PATCH v2 1/3] drm/i915: Introduce refcounted sg-tables

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 10/28/21 10:47, Matthew Auld wrote:
On 28/10/2021 08:04, Thomas Hellström wrote:
On Wed, 2021-10-27 at 19:03 +0100, Matthew Auld wrote:
On 27/10/2021 11:52, Thomas Hellström wrote:
As we start to introduce asynchronous failsafe object migration,
where we update the object state and then submit asynchronous
commands we need to record what memory resources are actually used
by various part of the command stream. Initially for three
purposes:

1) Error capture.
2) Asynchronous migration error recovery.
3) Asynchronous vma bind.

At the time where these happens, the object state may have been
updated
to be several migrations ahead and object sg-tables discarded.

In order to make it possible to keep sg-tables with memory resource
information for these operations, introduce refcounted sg-tables
that
aren't freed until the last user is done with them.

The alternative would be to reference information sitting on the
corresponding ttm_resources which typically have the same lifetime
as
these refcountes sg_tables, but that leads to other awkward
constructs:
Due to the design direction chosen for ttm resource managers that
would
lead to diamond-style inheritance, the LMEM resources may sometimes
be
prematurely freed, and finally the subclassed struct ttm_resource
would
have to bleed into the asynchronous vma bind code.

Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
---
   drivers/gpu/drm/i915/gem/i915_gem_object.h    |   4 +-
   .../gpu/drm/i915/gem/i915_gem_object_types.h  |   3 +-
   drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  16 +-
   drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 188 +++++++++++--
-----
   drivers/gpu/drm/i915/i915_scatterlist.c       |  62 ++++--
   drivers/gpu/drm/i915/i915_scatterlist.h       |  76 ++++++-
   drivers/gpu/drm/i915/intel_region_ttm.c       |  15 +-
   drivers/gpu/drm/i915/intel_region_ttm.h       |   5 +-
   drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
   9 files changed, 262 insertions(+), 119 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index a5479ac7a4ad..c5ab364d4311 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -624,8 +624,8 @@ struct sg_table *shmem_alloc_st(struct
drm_i915_private *i915,
                                 size_t size, struct
intel_memory_region *mr,
                                 struct address_space *mapping,
                                 unsigned int max_segment);
-void shmem_free_st(struct sg_table *st, struct address_space
*mapping,
-                  bool dirty, bool backup);
+void shmem_free_st_table(struct sg_table *st, struct address_space
*mapping,
+                        bool dirty, bool backup);

s/st_table/sg_table/?

I thought st was shorthand for sg_table? Maybe shmem_sg_free_table?

Yes the naming is indeed a bit unfortunate here. To be consistent with
the core's sg_free_table(), I changed to
shmem_sg_free_table() / shmem_sg_alloc_table.



   void __shmem_writeback(size_t size, struct address_space
*mapping);
      #ifdef CONFIG_MMU_NOTIFIER
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index a4b69a43b898..604ed5ad77f5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -544,6 +544,7 @@ struct drm_i915_gem_object {
                  */
                 struct list_head region_link;
   +               struct i915_refct_sgt *rsgt;
                 struct sg_table *pages;
                 void *mapping;
   @@ -597,7 +598,7 @@ struct drm_i915_gem_object {
         } mm;
            struct {
-               struct sg_table *cached_io_st;
+               struct i915_refct_sgt *cached_io_rsgt;
                 struct i915_gem_object_page_iter get_io_page;
                 struct drm_i915_gem_object *backup;
                 bool created:1;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 01f332d8dbde..67c6bee695c7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec
*pvec)
         cond_resched();
   }
   -void shmem_free_st(struct sg_table *st, struct address_space
*mapping,
-                  bool dirty, bool backup)
+void shmem_free_st_table(struct sg_table *st, struct address_space
*mapping,
+                        bool dirty, bool backup)
   {
         struct sgt_iter sgt_iter;
         struct pagevec pvec;
@@ -49,7 +49,6 @@ void shmem_free_st(struct sg_table *st, struct
address_space *mapping,
                 check_release_pagevec(&pvec);
            sg_free_table(st);
-       kfree(st);
   }
      struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
@@ -171,7 +170,8 @@ struct sg_table *shmem_alloc_st(struct
drm_i915_private *i915,
   err_sg:
         sg_mark_end(sg);
         if (sg != st->sgl) {
-               shmem_free_st(st, mapping, false, false);
+               shmem_free_st_table(st, mapping, false, false);
+               kfree(st);
         } else {
                 mapping_clear_unevictable(mapping);
                 sg_free_table(st);
@@ -254,7 +254,8 @@ static int shmem_get_pages(struct
drm_i915_gem_object *obj)
         return 0;
      err_pages:
-       shmem_free_st(st, mapping, false, false);
+       shmem_free_st_table(st, mapping, false, false);
+       kfree(st);
         /*
          * shmemfs first checks if there is enough memory to
allocate the page
          * and reports ENOSPC should there be insufficient, along
with the usual
@@ -374,8 +375,9 @@ void i915_gem_object_put_pages_shmem(struct
drm_i915_gem_object *obj, struct sg_
         if (i915_gem_object_needs_bit17_swizzle(obj))
                 i915_gem_object_save_bit_17_swizzle(obj, pages);
   -       shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
-                     obj->mm.dirty, obj->mm.madv ==
I915_MADV_WILLNEED);
+       shmem_free_st_table(pages, file_inode(obj->base.filp)-
i_mapping,
+                           obj->mm.dirty, obj->mm.madv ==
I915_MADV_WILLNEED);
+       kfree(pages);
         obj->mm.dirty = false;
   }
   diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4fd2edb20dd9..6826e3859e18 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -34,7 +34,7 @@
    * struct i915_ttm_tt - TTM page vector with additional private
information
    * @ttm: The base TTM page vector.
    * @dev: The struct device used for dma mapping and unmapping.
- * @cached_st: The cached scatter-gather table.
+ * @cached_rsgt: The cached scatter-gather table.
    * @is_shmem: Set if using shmem.
    * @filp: The shmem file, if using shmem backend.
    *
@@ -47,7 +47,7 @@
   struct i915_ttm_tt {
         struct ttm_tt ttm;
         struct device *dev;
-       struct sg_table *cached_st;
+       struct i915_refct_sgt cached_rsgt;
            bool is_shmem;
         struct file *filp;
@@ -221,14 +221,10 @@ static int i915_ttm_tt_shmem_populate(struct
ttm_device *bdev,
         if (IS_ERR(st))
                 return PTR_ERR(st);
   -       err = dma_map_sg_attrs(i915_tt->dev,
-                              st->sgl, st->nents,
-                              DMA_BIDIRECTIONAL,
-                              DMA_ATTR_SKIP_CPU_SYNC);
-       if (err <= 0) {
-               err = -EINVAL;
+       err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
+                             DMA_ATTR_SKIP_CPU_SYNC);
+       if (err)
                 goto err_free_st;
-       }
            i = 0;
         for_each_sgt_page(page, sgt_iter, st)
@@ -237,11 +233,15 @@ static int i915_ttm_tt_shmem_populate(struct
ttm_device *bdev,
         if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
                 ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
   -       i915_tt->cached_st = st;
+       i915_tt->cached_rsgt.table = *st;
+       kfree(st);

Will it work if the above just operates on the rsgt.table?

I'll change the shmem utility here to not allocate the struct sg_table
and then we can operate on it directly.


+
         return 0;
      err_free_st:
-       shmem_free_st(st, filp->f_mapping, false, false);
+       shmem_free_st_table(st, filp->f_mapping, false, false);
+       kfree(st);
+
         return err;
   }
   @@ -249,16 +249,29 @@ static void
i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
   {
         struct i915_ttm_tt *i915_tt = container_of(ttm,
typeof(*i915_tt), ttm);
         bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
+       struct sg_table *st = &i915_tt->cached_rsgt.table;
   -       dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
-                    i915_tt->cached_st->nents,
-                    DMA_BIDIRECTIONAL);
+       if (st->sgl)

Can we ever hit this? I would have presumed not? Below also.

Yes, here's where we typically free the scatterlist.

What I meant to say: can the above ever not be true? i.e sgl == NULL

Hm, Yes I think it can. If we're populating a non-shmem ttm tt, that sg-list is, IIRC allocated lazily on first use. Although I haven't analyzed in detail whether it can actually be populated and then not lazily allocated under the same lock.





[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux