[AMD Official Use Only] we make something like dma_fence_release does. @@ -783,11 +783,15 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) unsigned long flags; signed long ret = timeout ? timeout : 1; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && + list_empty(&fence->cb_list)) return ret; spin_lock_irqsave(fence->lock, flags); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + goto out; + if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out; ________________________________________ 发件人: Koenig, Christian <Christian.Koenig@xxxxxxx> 发送时间: 2022年4月12日 20:11 收件人: Pan, Xinhui; amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Daniel Vetter 抄送: Deucher, Alexander 主题: Re: [PATCH] drm/amdgpu: Fix one use-after-free of VM Am 12.04.22 um 14:03 schrieb xinhui pan: > VM might already be freed when amdgpu_vm_tlb_seq_cb() is called. > We see the calltrace below. > > Fix it by keeping the last flush fence around and wait for it to signal > > BUG kmalloc-4k (Not tainted): Poison overwritten > > 0xffff9c88630414e8-0xffff9c88630414e8 @offset=5352. First byte 0x6c > instead of 0x6b Allocated in amdgpu_driver_open_kms+0x9d/0x360 [amdgpu] > age=44 cpu=0 pid=2343 > __slab_alloc.isra.0+0x4f/0x90 > kmem_cache_alloc_trace+0x6b8/0x7a0 > amdgpu_driver_open_kms+0x9d/0x360 [amdgpu] > drm_file_alloc+0x222/0x3e0 [drm] > drm_open+0x11d/0x410 [drm] > drm_stub_open+0xdc/0x230 [drm] > chrdev_open+0xa5/0x1e0 > do_dentry_open+0x16c/0x3c0 > vfs_open+0x2d/0x30 > path_openat+0x70a/0xa90 > do_filp_open+0xb2/0x120 > do_sys_openat2+0x245/0x330 > do_sys_open+0x46/0x80 > __x64_sys_openat+0x20/0x30 > do_syscall_64+0x38/0xc0 > entry_SYSCALL_64_after_hwframe+0x44/0xae > Freed in amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu] age=22 cpu=1 > pid=2485 > kfree+0x4a2/0x580 > amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu] > drm_file_free+0x24e/0x3c0 [drm] > drm_close_helper.isra.0+0x90/0xb0 [drm] > drm_release+0x97/0x1a0 [drm] > __fput+0xb6/0x280 > ____fput+0xe/0x10 > task_work_run+0x64/0xb0 > do_exit+0x406/0xcf0 > do_group_exit+0x50/0xc0 > __x64_sys_exit_group+0x18/0x20 > do_syscall_64+0x38/0xc0 > entry_SYSCALL_64_after_hwframe+0x44/0xae > > Suggested-by: Christian König <christian.koenig@xxxxxxx> > Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + > 2 files changed, 20 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 645ce28277c2..e2486e95ca69 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -932,9 +932,12 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, > > if (flush_tlb || params.table_freed) { > tlb_cb->vm = vm; > - if (!fence || !*fence || > - dma_fence_add_callback(*fence, &tlb_cb->cb, > - amdgpu_vm_tlb_seq_cb)) > + if (fence && *fence && > + !dma_fence_add_callback(*fence, &tlb_cb->cb, > + amdgpu_vm_tlb_seq_cb)) { > + dma_fence_put(vm->last_delayed_tlb_flush); > + vm->last_delayed_tlb_flush = dma_fence_get(*fence); > + } else > amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); > tlb_cb = NULL; > } > @@ -2258,6 +2261,19 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) > dma_fence_wait(vm->last_unlocked, false); > dma_fence_put(vm->last_unlocked); > > + if (vm->last_delayed_tlb_flush) { You can initialize last_delayed_tlb_flush() with the dummy fence, see how last_unlocked works. > + /* Wait until fence is signaled. > + * But must double check to make sure fence cb is called. > + * As dma_fence_default_wait checks DMA_FENCE_FLAG_SIGNALED_BIT without > + * holding fence lock(the first test_bit). > + * So call dma_fence_get_status which will hold the fence lock. > + * Then we can make sure fence cb has been called. > + */ Uff, that is a really good point and most likely a bug in dma_fence_wait(). I'm pretty sure that a couple of other callers rely on that as well. Daniel what's you opinion about that? Thanks, Christian. > + (void)dma_fence_wait(vm->last_delayed_tlb_flush, false); > + (void)dma_fence_get_status(vm->last_delayed_tlb_flush); > + dma_fence_put(vm->last_delayed_tlb_flush); > + } > + > list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { > if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { > amdgpu_vm_prt_fini(adev, vm); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index 1a814fbffff8..c1a48f5c1019 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -286,6 +286,7 @@ struct amdgpu_vm { > > /* Last finished delayed update */ > atomic64_t tlb_seq; > + struct dma_fence *last_delayed_tlb_flush; > > /* Last unlocked submission to the scheduler entities */ > struct dma_fence *last_unlocked;