On Fri, Feb 21, 2025 at 02:38:41PM +0000, Matthew Auld wrote: > On error restore anything still on the pin_list back to the invalidation > list on error. For the actual pin, so long as the vma is tracked on > either list it should get picked up on the next pin, however it looks > possible for the vma to get nuked but still be present on this per vm > pin_list leading to corruption. An alternative might be then to instead > just remove the link when destroying the vma. > > v2: > - Also add some asserts. > - Keep the overzealous locking so that we are consistent with the docs; > updating the docs and related bits will be done as a follow up. > > Fixes: ed2bdf3b264d ("drm/xe/vm: Subclass userptr vmas") > Suggested-by: Matthew Brost <matthew.brost@xxxxxxxxx> Reviewed-by: Matthew Brost <matthew.brost@xxxxxxxxx> > Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> > Cc: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> > Cc: <stable@xxxxxxxxxxxxxxx> # v6.8+ > --- > drivers/gpu/drm/xe/xe_vm.c | 28 +++++++++++++++++++++------- > 1 file changed, 21 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c > index d664f2e418b2..3dbfb20a7c60 100644 > --- a/drivers/gpu/drm/xe/xe_vm.c > +++ b/drivers/gpu/drm/xe/xe_vm.c > @@ -667,15 +667,16 @@ int xe_vm_userptr_pin(struct xe_vm *vm) > > /* Collect invalidated userptrs */ > spin_lock(&vm->userptr.invalidated_lock); > + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); > list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, > userptr.invalidate_link) { > list_del_init(&uvma->userptr.invalidate_link); > - list_move_tail(&uvma->userptr.repin_link, > - &vm->userptr.repin_list); > + list_add_tail(&uvma->userptr.repin_link, > + &vm->userptr.repin_list); > } > spin_unlock(&vm->userptr.invalidated_lock); > > - /* Pin and move to temporary list */ > + /* Pin and move to bind list */ > list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, > userptr.repin_link) { > err = xe_vma_userptr_pin_pages(uvma); > @@ -691,10 +692,10 @@ int xe_vm_userptr_pin(struct xe_vm *vm) > err = xe_vm_invalidate_vma(&uvma->vma); > xe_vm_unlock(vm); > if (err) > - return err; > + break; > } else { > - if (err < 0) > - return err; > + if (err) > + break; > > list_del_init(&uvma->userptr.repin_link); > list_move_tail(&uvma->vma.combined_links.rebind, > @@ -702,7 +703,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm) > } > } > > - return 0; > + if (err) { > + down_write(&vm->userptr.notifier_lock); > + spin_lock(&vm->userptr.invalidated_lock); > + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, > + userptr.repin_link) { > + list_del_init(&uvma->userptr.repin_link); > + list_move_tail(&uvma->userptr.invalidate_link, > + &vm->userptr.invalidated); > + } > + spin_unlock(&vm->userptr.invalidated_lock); > + up_write(&vm->userptr.notifier_lock); > + } > + return err; > } > > /** > @@ -1067,6 +1080,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) > xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); > > spin_lock(&vm->userptr.invalidated_lock); > + xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); > list_del(&to_userptr_vma(vma)->userptr.invalidate_link); > spin_unlock(&vm->userptr.invalidated_lock); > } else if (!xe_vma_is_null(vma)) { > -- > 2.48.1 >