On Wed, Apr 26, 2023 at 04:57:12PM -0400, Rodrigo Vivi wrote: > The goal is to allow for a snapshot capture to be taken at the time > of the crash, while the print out can happen at a later time through > the exposed devcoredump virtual device. > > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> Also thinking out loud here, at some point we are going to need a hook to dump the entire contexts of the VMAs... I can think of a few options. 1. Flag on the VM creation, dump the entire VM. 2. Flag on VM binds, dump VMAs with the flag set. 3. Have both options. Thoughts? Matt > --- > drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- > drivers/gpu/drm/xe/xe_vm.c | 137 +++++++++++++++++++++++++---- > drivers/gpu/drm/xe/xe_vm.h | 6 +- > drivers/gpu/drm/xe/xe_vm_types.h | 18 ++++ > 4 files changed, 143 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c > index 74659d0a69b3..ac98bc1843e8 100644 > --- a/drivers/gpu/drm/xe/xe_guc_submit.c > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c > @@ -753,7 +753,7 @@ static void simple_error_capture(struct xe_engine *e) > continue; > xe_hw_engine_print(hwe, &p); > } > - xe_analyze_vm(&p, e->vm, e->gt->info.id); > + xe_vm_print(&p, e->vm, e->gt->info.id); > xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); > dma_fence_end_signalling(cookie); > } > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c > index 4cffdb84680a..075640dbdff0 100644 > --- a/drivers/gpu/drm/xe/xe_vm.c > +++ b/drivers/gpu/drm/xe/xe_vm.c > @@ -3369,38 +3369,139 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) > return 0; > } > > -int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) > +/** > + * xe_vm_snapshot_capture - Take a quick snapshot of the HW Engine. > + * @vm: Xe VM > + * @gt_id: GT id number > + * > + * This can be printed out in a later stage like during dev_coredump > + * analysis. > + * > + * Returns: a Xe VM snapshot object that must be freed by the > + * caller, using `xe_vm_snapshot_free`. > + */ > +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm, int gt_id) > { > + struct xe_vm_snapshot *snapshot; > struct rb_node *node; > - bool is_vram; > - uint64_t addr; > + int i = 0; > + > + snapshot = kzalloc(sizeof(struct xe_vm_snapshot), GFP_ATOMIC); > + > + if (!down_read_trylock(&vm->lock)) > + return snapshot; > + > + snapshot->acquired = true; > + > + for (node = rb_first(&vm->vmas); node; node = rb_next(node)) > + snapshot->num_nodes++; > + > + snapshot->vm_nodes = kmalloc_array(snapshot->num_nodes, > + sizeof(struct vm_node_snapshot), > + GFP_ATOMIC); > > - if (!down_read_trylock(&vm->lock)) { > - drm_printf(p, " Failed to acquire VM lock to dump capture"); > - return 0; > - } > if (vm->pt_root[gt_id]) { > - addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, GEN8_PAGE_SIZE, &is_vram); > - drm_printf(p, " VM root: A:0x%llx %s\n", addr, is_vram ? "VRAM" : "SYS"); > + snapshot->vm_root = kzalloc(sizeof(struct vm_node_snapshot), > + GFP_ATOMIC); > + snapshot->vm_root->addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, > + GEN8_PAGE_SIZE, > + &snapshot->vm_root->is_vram); > } > > for (node = rb_first(&vm->vmas); node; node = rb_next(node)) { > struct xe_vma *vma = to_xe_vma(node); > - bool is_userptr = xe_vma_is_userptr(vma); > + snapshot->vm_nodes[i].is_userptr = xe_vma_is_userptr(vma); > > - if (is_userptr) { > + if (snapshot->vm_nodes[i].is_userptr) { > struct xe_res_cursor cur; > > - xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, &cur); > - addr = xe_res_dma(&cur); > + xe_res_first_sg(vma->userptr.sg, 0, GEN8_PAGE_SIZE, > + &cur); > + snapshot->vm_nodes[i].addr = xe_res_dma(&cur); > } else { > - addr = xe_bo_addr(vma->bo, 0, GEN8_PAGE_SIZE, &is_vram); > + snapshot->vm_nodes[i].addr = xe_bo_addr(vma->bo, 0, > + GEN8_PAGE_SIZE, > + &snapshot->vm_nodes[i].is_vram); > } > - drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", > - vma->start, vma->end, vma->end - vma->start + 1ull, > - addr, is_userptr ? "USR" : is_vram ? "VRAM" : "SYS"); > + snapshot->vm_nodes[i].vma.start = vma->start; > + snapshot->vm_nodes[i].vma.end = vma->end; > + i++; > } > up_read(&vm->lock); > > - return 0; > + return snapshot; > +} > + > +/** > + * xe_vm_snapshot_print - Print out a given Xe HW Engine snapshot. > + * @snapshot: Xe VM snapshot object. > + * @p: drm_printer where it will be printed out. > + * > + * This function prints out a given Xe HW Engine snapshot object. > + */ > +void xe_vm_snapshot_print(struct xe_vm_snapshot *snapshot, > + struct drm_printer *p) > +{ > + int i; > + > + if (!snapshot) > + return; > + > + if (!snapshot->acquired) { > + drm_printf(p, " Failed to acquire VM lock to dump capture"); > + return; > + } > + > + if (snapshot->vm_root) { > + drm_printf(p, " VM root: A:0x%llx %s\n", > + snapshot->vm_root->addr, > + snapshot->vm_root->is_vram ? "VRAM" : "SYS"); > + } > + > + for (i = 0; snapshot->vm_nodes && i < snapshot->num_nodes; i++) > + drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n", > + snapshot->vm_nodes[i].vma.start, > + snapshot->vm_nodes[i].vma.end, > + snapshot->vm_nodes[i].vma.end - > + snapshot->vm_nodes[i].vma.start + 1ull, > + snapshot->vm_nodes[i].addr, > + snapshot->vm_nodes[i].is_userptr ? > + "USR" : snapshot->vm_nodes[i].is_vram ? > + "VRAM" : "SYS"); > +} > + > +/** > + * xe_vm_snapshot_free - Free all allocated objects for a given snapshot. > + * @snapshot: Xe VM snapshot object. > + * > + * This function free all the memory that needed to be allocated at capture > + * time. > + */ > +void xe_vm_snapshot_free(struct xe_vm_snapshot *snapshot) > +{ > + if (!snapshot) > + return; > + > + if (snapshot->vm_root) > + kfree(snapshot->vm_root); > + if (snapshot->vm_nodes) > + kfree(snapshot->vm_nodes); > + kfree(snapshot); > +} > + > +/** > + * xe_vm_print - Xe VM Print. > + * @p: drm_printer > + * @vm: Xe VM > + * @gt_id: GT id number > + * > + * This function quickly capture a snapshot and immediately print it out. > + */ > +void xe_vm_print(struct drm_printer *p, struct xe_vm *vm, int gt_id) > +{ > + struct xe_vm_snapshot *snapshot; > + > + snapshot = xe_vm_snapshot_capture(vm, gt_id); > + xe_vm_snapshot_print(snapshot, p); > + xe_vm_snapshot_free(snapshot); > } > diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h > index 748dc16ebed9..924884b36469 100644 > --- a/drivers/gpu/drm/xe/xe_vm.h > +++ b/drivers/gpu/drm/xe/xe_vm.h > @@ -145,7 +145,11 @@ void xe_vm_unlock_dma_resv(struct xe_vm *vm, > void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence, > enum dma_resv_usage usage); > > -int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); > +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm, int gt_id); > +void xe_vm_snapshot_print(struct xe_vm_snapshot *snapshot, > + struct drm_printer *p); > +void xe_vm_snapshot_free(struct xe_vm_snapshot *snapshot); > +void xe_vm_print(struct drm_printer *p, struct xe_vm *vm, int gt_id); > > #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) > #define vm_dbg drm_dbg > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h > index fada7896867f..18e79b6a2182 100644 > --- a/drivers/gpu/drm/xe/xe_vm_types.h > +++ b/drivers/gpu/drm/xe/xe_vm_types.h > @@ -149,6 +149,24 @@ struct xe_vma { > } extobj; > }; > > + > +struct vm_node_snapshot { > + bool is_userptr; > + bool is_vram; > + struct { > + u64 start; > + u64 end; > + } vma; > + u64 addr; > +}; > + > +struct xe_vm_snapshot { > + bool acquired; > + struct vm_node_snapshot *vm_root; > + struct vm_node_snapshot *vm_nodes; > + int num_nodes; > +}; > + > struct xe_device; > > #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv) > -- > 2.39.2 >