Hi Christian, Would you review this patch? Just looking at the code, calling amdgpu_sync_fence with adev=NULL should be OK for us. It's just a bit unusual compared to amdgpu's usage of this function. We've had this patch in kfd-staging for a while without problems. If you're OK with this I'll go ahead and push this upstream as well. Thanks, Felix On 2018-11-05 8:40 p.m., Kuehling, Felix wrote: > The adev parameter in amdgpu_sync_fence and amdgpu_sync_resv is only > needed for updating sync->last_vm_update. This breaks if different > adevs are passed to calls for the same sync object. > > Always pass NULL for calls from KFD because sync objects used for > KFD don't belong to any particular device, and KFD doesn't need the > sync->last_vm_update fence. > > This fixes kernel log warnings on multi-GPU systems after recent > changes in amdgpu_amdkfd_gpuvm_restore_process_bos. > > Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 28 +++++------------------- > 1 file changed, 5 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index d005371..572ac5f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -395,23 +395,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) > return 0; > } > > -static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, > - struct dma_fence *f) > -{ > - int ret = amdgpu_sync_fence(adev, sync, f, false); > - > - /* Sync objects can't handle multiple GPUs (contexts) updating > - * sync->last_vm_update. Fortunately we don't need it for > - * KFD's purposes, so we can just drop that fence. > - */ > - if (sync->last_vm_update) { > - dma_fence_put(sync->last_vm_update); > - sync->last_vm_update = NULL; > - } > - > - return ret; > -} > - > static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) > { > struct amdgpu_bo *pd = vm->root.base.bo; > @@ -422,7 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) > if (ret) > return ret; > > - return sync_vm_fence(adev, sync, vm->last_update); > + return amdgpu_sync_fence(NULL, sync, vm->last_update, false); > } > > /* add_bo_to_vm - Add a BO to a VM > @@ -826,7 +809,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, > /* Add the eviction fence back */ > amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); > > - sync_vm_fence(adev, sync, bo_va->last_pt_update); > + amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); > > return 0; > } > @@ -851,7 +834,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, > return ret; > } > > - return sync_vm_fence(adev, sync, bo_va->last_pt_update); > + return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); > } > > static int map_bo_to_gpuvm(struct amdgpu_device *adev, > @@ -911,7 +894,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info, > vm_list_node) { > struct amdgpu_bo *pd = peer_vm->root.base.bo; > > - ret = amdgpu_sync_resv(amdgpu_ttm_adev(pd->tbo.bdev), > + ret = amdgpu_sync_resv(NULL, > sync, pd->tbo.resv, > AMDGPU_FENCE_OWNER_UNDEFINED, false); > if (ret) > @@ -2084,8 +2067,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) > pr_debug("Memory eviction: Validate BOs failed. Try again\n"); > goto validate_map_fail; > } > - ret = amdgpu_sync_fence(amdgpu_ttm_adev(bo->tbo.bdev), > - &sync_obj, bo->tbo.moving, false); > + ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); > if (ret) { > pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); > goto validate_map_fail; _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx