Re: [Intel-gfx] [PATCH v8 22/22] drm/i915/vm_bind: Support capture of persistent mappings

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 01/12/2022 18:43, Niranjana Vishwanathapura wrote:
On Thu, Dec 01, 2022 at 07:27:31AM -0800, Niranjana Vishwanathapura wrote:
On Thu, Dec 01, 2022 at 10:49:15AM +0000, Matthew Auld wrote:
On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:
Support dump capture of persistent mappings upon user request.

Signed-off-by: Brian Welty <brian.welty@xxxxxxxxx>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@xxxxxxxxx>
---
.../drm/i915/gem/i915_gem_vm_bind_object.c    | 11 +++++++++++
drivers/gpu/drm/i915/gt/intel_gtt.c           |  3 +++
drivers/gpu/drm/i915/gt/intel_gtt.h           |  5 +++++
drivers/gpu/drm/i915/i915_gpu_error.c         | 19 +++++++++++++++++++
drivers/gpu/drm/i915/i915_vma.c               |  1 +
drivers/gpu/drm/i915/i915_vma_types.h         |  2 ++
include/uapi/drm/i915_drm.h                   |  3 ++-
7 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
index 78e7c0642c5f..50969613daf6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
@@ -88,6 +88,11 @@ static void i915_gem_vm_bind_remove(struct i915_vma *vma, bool release_obj)
{
    lockdep_assert_held(&vma->vm->vm_bind_lock);
+    spin_lock(&vma->vm->vm_capture_lock);
+    if (!list_empty(&vma->vm_capture_link))
+        list_del_init(&vma->vm_capture_link);
+    spin_unlock(&vma->vm->vm_capture_lock);
+
    spin_lock(&vma->vm->vm_rebind_lock);
    if (!list_empty(&vma->vm_rebind_link))
        list_del_init(&vma->vm_rebind_link);
@@ -357,6 +362,12 @@ static int i915_gem_vm_bind_obj(struct i915_address_space *vm,
                continue;
        }
+        if (va->flags & I915_GEM_VM_BIND_CAPTURE) {
+            spin_lock(&vm->vm_capture_lock);
+            list_add_tail(&vma->vm_capture_link, &vm->vm_capture_list);
+            spin_unlock(&vm->vm_capture_lock);
+        }
+
        list_add_tail(&vma->vm_bind_link, &vm->vm_bound_list);
        i915_vm_bind_it_insert(vma, &vm->va);
        if (!obj->priv_root)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index ebf6830574a0..bdabe13fc30e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -297,6 +297,9 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
    spin_lock_init(&vm->vm_rebind_lock);
    spin_lock_init(&vm->userptr_invalidated_lock);
    INIT_LIST_HEAD(&vm->userptr_invalidated_list);
+
+    INIT_LIST_HEAD(&vm->vm_capture_list);
+    spin_lock_init(&vm->vm_capture_lock);
}
void *__px_vaddr(struct drm_i915_gem_object *p)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 87e5b6568a00..8e4ddd073348 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -281,6 +281,11 @@ struct i915_address_space {
    /** @root_obj: root object for dma-resv sharing by private objects */
    struct drm_i915_gem_object *root_obj;
+    /* @vm_capture_list: list of vm captures */
+    struct list_head vm_capture_list;
+    /* @vm_capture_lock: protects vm_capture_list */
+    spinlock_t vm_capture_lock;
+
    /* Global GTT */
    bool is_ggtt:1;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9d5d5a397b64..3b2b12a739f7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1460,6 +1460,22 @@ capture_vma(struct intel_engine_capture_vma *next,
    return next;
}
+static struct intel_engine_capture_vma *
+capture_user_vm(struct intel_engine_capture_vma *capture,
+        struct i915_address_space *vm, gfp_t gfp)
+{
+    struct i915_vma *vma;
+
+    spin_lock(&vm->vm_capture_lock);

Does it make sense to move this into the eb3 submission stage, like we do for eb2? IIRC the gfp flags here are quite limiting due to potentially being in a fence critical section. If we can use rq->capture_list for eb3, we shouldn't need to change much here?


But that will add latency on submission path as we will have to iterate
over capture_list in each submission. Besides, unlike eb2 case, we can't
just transfer the list to rq->capture_list as we will have to do this
for each submission. It was discussed long time back and decided not to
bother the fast path (submision) scenario with this error capture which
is only required upon gpu hang (slow path).

Maybe add some of this to the commit message, just to give a bit more back story/history. From my pov I'm coming into this semi-blind.


Also there is the existing CONFIG_DRM_I915_CAPTURE_ERROR. Should we take that into account?


Ok, will fix.

+    /* vma->resource must be valid here as persistent vmas are bound */
+    list_for_each_entry(vma, &vm->vm_capture_list, vm_capture_link)
+        capture = capture_vma_snapshot(capture, vma->resource,
+                           gfp, "user");
+    spin_unlock(&vm->vm_capture_lock);
+
+    return capture;
+}
+
static struct intel_engine_capture_vma *
capture_user(struct intel_engine_capture_vma *capture,
         const struct i915_request *rq,
@@ -1471,6 +1487,9 @@ capture_user(struct intel_engine_capture_vma *capture,
        capture = capture_vma_snapshot(capture, c->vma_res, gfp,
                           "user");
+    capture = capture_user_vm(capture, rq->context->vm,
+                  GFP_NOWAIT | __GFP_NOWARN);

And this should maybe use the passed in gfp?


Ok, will fix


Acutally in one path (capture_engine), it is called with non-blocking gfp,
in other path (execlists_capture_work), it is called with blocking gfp.
I chose to override it here as we use spinlock (vm_capture_lock) and we
are using MAYFAIL version in somepaths anyhow.I can add documentation
for this override here. We can switch to a mutex here (instead of spinlock)
for vm_capture_lock, but not sure it is worth it or if in anyway we endup
here with atomic context. What do you think?

No strong opinion. Keeping the existing gfp looks like the least amount of friction, if possible.


Thanks,
Niranjana

+
    return capture;
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 68a9ac77b4f2..0244864e94f7 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -248,6 +248,7 @@ vma_create(struct drm_i915_gem_object *obj,
    INIT_LIST_HEAD(&vma->non_priv_vm_bind_link);
    INIT_LIST_HEAD(&vma->vm_rebind_link);
    INIT_LIST_HEAD(&vma->userptr_invalidated_link);
+    INIT_LIST_HEAD(&vma->vm_capture_link);
    return vma;
err_unlock:
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index 90471dc0b235..10ae9f739d57 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -309,6 +309,8 @@ struct i915_vma {
    struct list_head vm_rebind_link; /* Link in vm_rebind_list */
    /** @userptr_invalidated_link: link to the vm->userptr_invalidated_list */
    struct list_head userptr_invalidated_link;
+    /* @vm_capture_link: link to the captureable VMA list */
+    struct list_head vm_capture_link;
    /** Timeline fence for vm_bind completion notification */
    struct {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b9167f950327..0744651ad5b0 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3930,7 +3930,8 @@ struct drm_i915_gem_vm_bind {
     * Note that @fence carries its own flags.
     */
    __u64 flags;
-#define __I915_GEM_VM_BIND_UNKNOWN_FLAGS (~0ull)
+#define I915_GEM_VM_BIND_CAPTURE           (1 << 0)

1ull << 0

Worried about what the value of UNKNOWN_FLAGS might be otherwise? Also needs some kernel-doc.


Ok, will fix and add kernel-doc.

Thanks,
Niranjana

+#define __I915_GEM_VM_BIND_UNKNOWN_FLAGS (-(I915_GEM_VM_BIND_CAPTURE << 1))
    /** @rsvd: Reserved, MBZ */
    __u64 rsvd[2];



[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux