On Thu, Mar 21, 2024 at 8:52 AM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move > on same heap. The basic problem here is that after the move the old > location is simply not available any more. > > Some fixes where suggested, but essentially we should call the move where -> were > notification before actually moving things because only this way we have > the correct order for DMA-buf and VM move notifications as well. > > Also rework the statistic handling so that we don't update the eviction > counter before the move. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++---- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 48 ++++++++++++---------- > 3 files changed, 37 insertions(+), 30 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index 425cebcc5cbf..eb7d824763b9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -1245,19 +1245,20 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, > * amdgpu_bo_move_notify - notification about a memory move > * @bo: pointer to a buffer object > * @evict: if this move is evicting the buffer from the graphics address space > + * @new_mem: new resource for backing the BO > * > * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs > * bookkeeping. > * TTM driver callback which is called when ttm moves a buffer. > */ > -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) > +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, > + bool evict, > + struct ttm_resource *new_mem) > { > struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); > + struct ttm_resource *old_mem = bo->resource; > struct amdgpu_bo *abo; > > - if (!amdgpu_bo_is_amdgpu_bo(bo)) > - return; > - > abo = ttm_to_amdgpu_bo(bo); > amdgpu_vm_bo_invalidate(adev, abo, evict); > > @@ -1267,9 +1268,9 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) > bo->resource->mem_type != TTM_PL_SYSTEM) > dma_buf_move_notify(abo->tbo.base.dma_buf); > > - /* remember the eviction */ > - if (evict) > - atomic64_inc(&adev->num_evictions); > + /* move_notify is called before move happens */ > + trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1, > + old_mem ? old_mem->mem_type : -1); > } > > void amdgpu_bo_get_memory(struct amdgpu_bo *bo, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index a3ea8a82db23..d28e21baef16 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -344,7 +344,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, > int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, > size_t buffer_size, uint32_t *metadata_size, > uint64_t *flags); > -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); > +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, > + bool evict, > + struct ttm_resource *new_mem); > void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); > vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); > void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index a5ceec7820cf..460b23918bfc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -471,14 +471,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, > > if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && > bo->ttm == NULL)) { > + amdgpu_bo_move_notify(bo, evict, new_mem); > ttm_bo_move_null(bo, new_mem); > - goto out; > + return 0; > } > if (old_mem->mem_type == TTM_PL_SYSTEM && > (new_mem->mem_type == TTM_PL_TT || > new_mem->mem_type == AMDGPU_PL_PREEMPT)) { > + amdgpu_bo_move_notify(bo, evict, new_mem); > ttm_bo_move_null(bo, new_mem); > - goto out; > + return 0; > } > if ((old_mem->mem_type == TTM_PL_TT || > old_mem->mem_type == AMDGPU_PL_PREEMPT) && > @@ -488,9 +490,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, > return r; > > amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); > + amdgpu_bo_move_notify(bo, evict, new_mem); > ttm_resource_free(bo, &bo->resource); > ttm_bo_assign_mem(bo, new_mem); > - goto out; > + return 0; > } > > if (old_mem->mem_type == AMDGPU_PL_GDS || > @@ -502,8 +505,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, > new_mem->mem_type == AMDGPU_PL_OA || > new_mem->mem_type == AMDGPU_PL_DOORBELL) { > /* Nothing to save here */ > + amdgpu_bo_move_notify(bo, evict, new_mem); > ttm_bo_move_null(bo, new_mem); > - goto out; > + return 0; > } > > if (bo->type == ttm_bo_type_device && > @@ -515,22 +519,23 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, > abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; > } > > - if (adev->mman.buffer_funcs_enabled) { > - if (((old_mem->mem_type == TTM_PL_SYSTEM && > - new_mem->mem_type == TTM_PL_VRAM) || > - (old_mem->mem_type == TTM_PL_VRAM && > - new_mem->mem_type == TTM_PL_SYSTEM))) { > - hop->fpfn = 0; > - hop->lpfn = 0; > - hop->mem_type = TTM_PL_TT; > - hop->flags = TTM_PL_FLAG_TEMPORARY; > - return -EMULTIHOP; > - } > + if (adev->mman.buffer_funcs_enabled && > + ((old_mem->mem_type == TTM_PL_SYSTEM && > + new_mem->mem_type == TTM_PL_VRAM) || > + (old_mem->mem_type == TTM_PL_VRAM && > + new_mem->mem_type == TTM_PL_SYSTEM))) { > + hop->fpfn = 0; > + hop->lpfn = 0; > + hop->mem_type = TTM_PL_TT; > + hop->flags = TTM_PL_FLAG_TEMPORARY; > + return -EMULTIHOP; > + } > > + amdgpu_bo_move_notify(bo, evict, new_mem); > + if (adev->mman.buffer_funcs_enabled) > r = amdgpu_move_blit(bo, evict, new_mem, old_mem); > - } else { > + else > r = -ENODEV; > - } > > if (r) { > /* Check that all memory is CPU accessible */ > @@ -545,11 +550,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, > return r; > } > > - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); > -out: > - /* update statistics */ > + /* update statistics after the move */ > + if (evict) > + atomic64_inc(&adev->num_evictions); > atomic64_add(bo->base.size, &adev->num_bytes_moved); > - amdgpu_bo_move_notify(bo, evict); > return 0; > } > > @@ -1551,7 +1555,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > static void > amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) > { > - amdgpu_bo_move_notify(bo, false); > + amdgpu_bo_move_notify(bo, false, NULL); > } > > static struct ttm_device_funcs amdgpu_bo_driver = { > -- > 2.34.1 >