Re: [PATCH v3 2/6] drm/i915: Add support for asynchronous moving fence waiting

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 11/15/21 14:13, Matthew Auld wrote:
On 15/11/2021 12:42, Thomas Hellström wrote:

On 11/15/21 13:36, Matthew Auld wrote:
On 14/11/2021 11:12, Thomas Hellström wrote:
From: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx>

For now, we will only allow async migration when TTM is used,
so the paths we care about are related to TTM.

The mmap path is handled by having the fence in ttm_bo->moving,
when pinning, the binding only becomes available after the moving
fence is signaled, and pinning a cpu map will only work after
the moving fence signals.

This should close all holes where userspace can read a buffer
before it's fully migrated.

v2:
- Fix a couple of SPARSE warnings
v3:
- Fix a NULL pointer dereference

Co-developed-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/i915/display/intel_fbdev.c    |  7 ++--
  drivers/gpu/drm/i915/display/intel_overlay.c  |  2 +-
  drivers/gpu/drm/i915/gem/i915_gem_pages.c     |  6 +++
  .../i915/gem/selftests/i915_gem_coherency.c   |  4 +-
  .../drm/i915/gem/selftests/i915_gem_mman.c    | 22 ++++++-----
  drivers/gpu/drm/i915/i915_vma.c               | 39 ++++++++++++++++++-
  drivers/gpu/drm/i915/i915_vma.h               |  3 ++
  drivers/gpu/drm/i915/selftests/i915_vma.c     |  4 +-
  8 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index adc3a81be9f7..5902ad0c2bd8 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -265,11 +265,12 @@ static int intelfb_create(struct drm_fb_helper *helper,
          info->fix.smem_len = vma->node.size;
      }
  -    vaddr = i915_vma_pin_iomap(vma);
+    vaddr = i915_vma_pin_iomap_unlocked(vma);
      if (IS_ERR(vaddr)) {
-        drm_err(&dev_priv->drm,
-            "Failed to remap framebuffer into virtual memory\n");
          ret = PTR_ERR(vaddr);
+        if (ret != -EINTR && ret != -ERESTARTSYS)
+            drm_err(&dev_priv->drm,
+                "Failed to remap framebuffer into virtual memory\n");
          goto out_unpin;
      }
      info->screen_base = vaddr;
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 7e3f5c6ca484..21593f3f2664 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -1357,7 +1357,7 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
          overlay->flip_addr = sg_dma_address(obj->mm.pages->sgl);
      else
          overlay->flip_addr = i915_ggtt_offset(vma);
-    overlay->regs = i915_vma_pin_iomap(vma);
+    overlay->regs = i915_vma_pin_iomap_unlocked(vma);
      i915_vma_unpin(vma);
        if (IS_ERR(overlay->regs)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index c4f684b7cc51..49c6e55c68ce 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -418,6 +418,12 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
      }
        if (!ptr) {
+        err = i915_gem_object_wait_moving_fence(obj, true);
+        if (err) {
+            ptr = ERR_PTR(err);
+            goto err_unpin;
+        }
+
          if (GEM_WARN_ON(type == I915_MAP_WC &&
                  !static_cpu_has(X86_FEATURE_PAT)))
              ptr = ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 13b088cc787e..067c512961ba 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -101,7 +101,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
        intel_gt_pm_get(vma->vm->gt);
  -    map = i915_vma_pin_iomap(vma);
+    map = i915_vma_pin_iomap_unlocked(vma);
      i915_vma_unpin(vma);
      if (IS_ERR(map)) {
          err = PTR_ERR(map);
@@ -134,7 +134,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
        intel_gt_pm_get(vma->vm->gt);
  -    map = i915_vma_pin_iomap(vma);
+    map = i915_vma_pin_iomap_unlocked(vma);
      i915_vma_unpin(vma);
      if (IS_ERR(map)) {
          err = PTR_ERR(map);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 6d30cdfa80f3..5d54181c2145 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -125,12 +125,13 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
      n = page - view.partial.offset;
      GEM_BUG_ON(n >= view.partial.size);
  -    io = i915_vma_pin_iomap(vma);
+    io = i915_vma_pin_iomap_unlocked(vma);
      i915_vma_unpin(vma);
      if (IS_ERR(io)) {
-        pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
-               page, (int)PTR_ERR(io));
          err = PTR_ERR(io);
+        if (err != -EINTR && err != -ERESTARTSYS)
+            pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+                   page, err);
          goto out;
      }
  @@ -219,12 +220,15 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
          n = page - view.partial.offset;
          GEM_BUG_ON(n >= view.partial.size);
  -        io = i915_vma_pin_iomap(vma);
+        io = i915_vma_pin_iomap_unlocked(vma);
          i915_vma_unpin(vma);
          if (IS_ERR(io)) {
-            pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
-                   page, (int)PTR_ERR(io));
-            return PTR_ERR(io);
+            int err = PTR_ERR(io);
+
+            if (err != -EINTR && err != -ERESTARTSYS)
+                pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+                       page, err);
+            return err;
          }
            iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
@@ -773,7 +777,7 @@ static int gtt_set(struct drm_i915_gem_object *obj)
          return PTR_ERR(vma);
        intel_gt_pm_get(vma->vm->gt);
-    map = i915_vma_pin_iomap(vma);
+    map = i915_vma_pin_iomap_unlocked(vma);
      i915_vma_unpin(vma);
      if (IS_ERR(map)) {
          err = PTR_ERR(map);
@@ -799,7 +803,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
          return PTR_ERR(vma);
        intel_gt_pm_get(vma->vm->gt);
-    map = i915_vma_pin_iomap(vma);
+    map = i915_vma_pin_iomap_unlocked(vma);
      i915_vma_unpin(vma);
      if (IS_ERR(map)) {
          err = PTR_ERR(map);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 8781c4f61952..069f22b3cd48 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -431,6 +431,13 @@ int i915_vma_bind(struct i915_vma *vma,
              work->pinned = i915_gem_object_get(vma->obj);
          }
      } else {
+        if (vma->obj) {
+            int ret;
+
+            ret = i915_gem_object_wait_moving_fence(vma->obj, true);
+            if (ret)
+                return ret;
+        }
          vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
      }
  @@ -455,6 +462,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
        ptr = READ_ONCE(vma->iomap);
      if (ptr == NULL) {
+        err = i915_gem_object_wait_moving_fence(vma->obj, true);
+        if (err)
+            goto err;
+
          /*
           * TODO: consider just using i915_gem_object_pin_map() for lmem            * instead, which already supports mapping non-contiguous chunks @@ -496,6 +507,25 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
      return IO_ERR_PTR(err);
  }
  +void __iomem *i915_vma_pin_iomap_unlocked(struct i915_vma *vma)
+{
+    struct i915_gem_ww_ctx ww;
+    void __iomem *map;
+    int err;
+
+    for_i915_gem_ww(&ww, err, true) {
+        err = i915_gem_object_lock(vma->obj, &ww);
+        if (err)
+            continue;
+
+        map = i915_vma_pin_iomap(vma);
+    }
+    if (err)
+        map = IO_ERR_PTR(err);
+
+    return map;
+}

What is the reason for this change? Is this strictly related to this series/commit?

Yes, it's because pulling out the moving fence requires the dma_resv lock.

Ok, I was thinking that vma_pin_iomap is only ever called on an already bound GGTT vma, for which we do a syncronous wait_for_bind, but maybe that's not always true?

Hmm, Good point. We should probably replace that vma_pin_iomap stuff with an assert that the binding fence is indeed signaled and error free? Because if binding succeeded, no need to check the moving fence.

/Thomas



Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx>


/Thomas





[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux