[Public] > From: Koenig, Christian <Christian.Koenig@xxxxxxx> > Sent: Thursday, December 19, 2024 5:07 > Am 16.12.24 um 18:49 schrieb Yunxiang Li: > > Before, every time fdinfo is queried we try to lock all the BOs in the > > VM and calculate memory usage from scratch. This works okay if the > > fdinfo is rarely read and the VMs don't have a ton of BOs. If either > > of these conditions is not true, we get a massive performance hit. > > > > In this new revision, we track the BOs as they change states. This way > > when the fdinfo is queried we only need to take the status lock and > > copy out the usage stats with minimal impact to the runtime > > performance. With this new approach however, we would no longer be > > able to track active buffers. > > > > Signed-off-by: Yunxiang Li <Yunxiang.Li@xxxxxxx> > > Reviewed-by: Christian König <christian.koenig@xxxxxxx> > > How do we want to merge this? Do we already have the required acks and rbs for > the patches who touch documentation and general DRM code? Yep, I think all patches have been reviewed. Teddy > Regards, > Christian. > > > --- > > v12: call update_shared in amdgpu_dma_buf_attach > > > > drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 3 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 18 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 110 ++++------- > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 205 +++++++++++++++----- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 23 ++- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 1 + > > 9 files changed, 232 insertions(+), 139 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > index b144404902255..9f627caedc3f6 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > @@ -36,6 +36,7 @@ > > #include "amdgpu_gem.h" > > #include "amdgpu_dma_buf.h" > > #include "amdgpu_xgmi.h" > > +#include "amdgpu_vm.h" > > #include <drm/amdgpu_drm.h> > > #include <drm/ttm/ttm_tt.h> > > #include <linux/dma-buf.h> > > @@ -60,6 +61,8 @@ static int amdgpu_dma_buf_attach(struct dma_buf > *dmabuf, > > if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) > > attach->peer2peer = false; > > > > + amdgpu_vm_bo_update_shared(bo); > > + > > return 0; > > } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c > > index 7717e3e4f05b5..91d638098889d 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c > > @@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct > drm_file *file) > > struct amdgpu_fpriv *fpriv = file->driver_priv; > > struct amdgpu_vm *vm = &fpriv->vm; > > > > - struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST + 1] = { }; > > + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; > > ktime_t usage[AMDGPU_HW_IP_NUM]; > > const char *pl_name[] = { > > [TTM_PL_VRAM] = "vram", > > @@ -72,15 +72,8 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct > drm_file *file) > > [AMDGPU_PL_DOORBELL] = "doorbell", > > }; > > unsigned int hw_ip, i; > > - int ret; > > - > > - ret = amdgpu_bo_reserve(vm->root.bo, false); > > - if (ret) > > - return; > > - > > - amdgpu_vm_get_memory(vm, stats, ARRAY_SIZE(stats)); > > - amdgpu_bo_unreserve(vm->root.bo); > > > > + amdgpu_vm_get_memory(vm, stats); > > amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage); > > > > /* > > @@ -97,7 +90,6 @@ void amdgpu_show_fdinfo(struct drm_printer *p, > > struct drm_file *file) > > > > drm_print_memory_stats(p, > > &stats[i].drm, > > - DRM_GEM_OBJECT_ACTIVE | > > DRM_GEM_OBJECT_RESIDENT | > > DRM_GEM_OBJECT_PURGEABLE, > > pl_name[i]); > > @@ -115,9 +107,11 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct > drm_file *file) > > drm_printf(p, "amd-evicted-vram:\t%llu KiB\n", > > stats[TTM_PL_VRAM].evicted/1024UL); > > drm_printf(p, "amd-requested-vram:\t%llu KiB\n", > > - stats[TTM_PL_VRAM].requested/1024UL); > > + (stats[TTM_PL_VRAM].drm.shared + > > + stats[TTM_PL_VRAM].drm.private) / 1024UL); > > drm_printf(p, "amd-requested-gtt:\t%llu KiB\n", > > - stats[TTM_PL_TT].requested/1024UL); > > + (stats[TTM_PL_TT].drm.shared + > > + stats[TTM_PL_TT].drm.private) / 1024UL); > > > > for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { > > if (!usage[hw_ip]) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > > index fe7ae45500639..9f1382ff9d813 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > > @@ -43,6 +43,7 @@ > > #include "amdgpu_dma_buf.h" > > #include "amdgpu_hmm.h" > > #include "amdgpu_xgmi.h" > > +#include "amdgpu_vm.h" > > > > static int > > amdgpu_gem_add_input_fence(struct drm_file *filp, @@ -288,6 +289,7 > > @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, > > if (r) > > return r; > > > > + amdgpu_vm_bo_update_shared(abo); > > bo_va = amdgpu_vm_bo_find(vm, abo); > > if (!bo_va) > > bo_va = amdgpu_vm_bo_add(adev, vm, abo); @@ -362,6 +364,7 > @@ > > static void amdgpu_gem_object_close(struct drm_gem_object *obj, > > goto out_unlock; > > > > amdgpu_vm_bo_del(adev, bo_va); > > + amdgpu_vm_bo_update_shared(bo); > > if (!amdgpu_vm_ready(vm)) > > goto out_unlock; > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > index 951b20e40fd35..96f4b8904e9a6 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > @@ -1258,7 +1258,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object > *bo, > > return; > > > > abo = ttm_to_amdgpu_bo(bo); > > - amdgpu_vm_bo_invalidate(abo, evict); > > + amdgpu_vm_bo_move(abo, new_mem, evict); > > > > amdgpu_bo_kunmap(abo); > > > > @@ -1271,75 +1271,6 @@ void amdgpu_bo_move_notify(struct > ttm_buffer_object *bo, > > old_mem ? old_mem->mem_type : -1); > > } > > > > -void amdgpu_bo_get_memory(struct amdgpu_bo *bo, > > - struct amdgpu_mem_stats *stats, > > - unsigned int sz) > > -{ > > - const unsigned int domain_to_pl[] = { > > - [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM, > > - [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, > > - [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM, > > - [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = > AMDGPU_PL_GDS, > > - [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = > AMDGPU_PL_GWS, > > - [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA, > > - [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = > AMDGPU_PL_DOORBELL, > > - }; > > - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > - struct ttm_resource *res = bo->tbo.resource; > > - struct drm_gem_object *obj = &bo->tbo.base; > > - uint64_t size = amdgpu_bo_size(bo); > > - unsigned int type; > > - > > - if (!res) { > > - /* > > - * If no backing store use one of the preferred domain for basic > > - * stats. We take the MSB since that should give a reasonable > > - * view. > > - */ > > - BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || > > - TTM_PL_VRAM < TTM_PL_SYSTEM); > > - type = fls(bo->preferred_domains & > AMDGPU_GEM_DOMAIN_MASK); > > - if (!type) > > - return; > > - type--; > > - if (drm_WARN_ON_ONCE(&adev->ddev, > > - type >= ARRAY_SIZE(domain_to_pl))) > > - return; > > - type = domain_to_pl[type]; > > - } else { > > - type = res->mem_type; > > - } > > - > > - if (drm_WARN_ON_ONCE(&adev->ddev, type >= sz)) > > - return; > > - > > - /* DRM stats common fields: */ > > - > > - if (drm_gem_object_is_shared_for_memory_stats(obj)) > > - stats[type].drm.shared += size; > > - else > > - stats[type].drm.private += size; > > - > > - if (res) { > > - stats[type].drm.resident += size; > > - > > - if (!dma_resv_test_signaled(obj->resv, > DMA_RESV_USAGE_BOOKKEEP)) > > - stats[type].drm.active += size; > > - else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) > > - stats[type].drm.purgeable += size; > > - } > > - > > - /* amdgpu specific stats: */ > > - > > - if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) { > > - stats[TTM_PL_VRAM].requested += size; > > - if (type != TTM_PL_VRAM) > > - stats[TTM_PL_VRAM].evicted += size; > > - } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) { > > - stats[TTM_PL_TT].requested += size; > > - } > > -} > > - > > /** > > * amdgpu_bo_release_notify - notification about a BO being released > > * @bo: pointer to a buffer object > > @@ -1554,6 +1485,45 @@ u64 amdgpu_bo_gpu_offset_no_check(struct > amdgpu_bo *bo) > > return amdgpu_gmc_sign_extend(offset); > > } > > > > +/** > > + * amdgpu_bo_mem_stats_placement - bo placement for memory accounting > > + * @bo: the buffer object we should look at > > + * > > + * BO can have multiple preferred placements, to avoid double > > +counting we want > > + * to file it under a single placement for memory stats. > > + * Luckily, if we take the highest set bit in preferred_domains the > > +result is > > + * quite sensible. > > + * > > + * Returns: > > + * Which of the placements should the BO be accounted under. > > + */ > > +uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) { > > + uint32_t domain = bo->preferred_domains & > AMDGPU_GEM_DOMAIN_MASK; > > + > > + if (!domain) > > + return TTM_PL_SYSTEM; > > + > > + switch (rounddown_pow_of_two(domain)) { > > + case AMDGPU_GEM_DOMAIN_CPU: > > + return TTM_PL_SYSTEM; > > + case AMDGPU_GEM_DOMAIN_GTT: > > + return TTM_PL_TT; > > + case AMDGPU_GEM_DOMAIN_VRAM: > > + return TTM_PL_VRAM; > > + case AMDGPU_GEM_DOMAIN_GDS: > > + return AMDGPU_PL_GDS; > > + case AMDGPU_GEM_DOMAIN_GWS: > > + return AMDGPU_PL_GWS; > > + case AMDGPU_GEM_DOMAIN_OA: > > + return AMDGPU_PL_OA; > > + case AMDGPU_GEM_DOMAIN_DOORBELL: > > + return AMDGPU_PL_DOORBELL; > > + default: > > + return TTM_PL_SYSTEM; > > + } > > +} > > + > > /** > > * amdgpu_bo_get_preferred_domain - get preferred domain > > * @adev: amdgpu device object > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > index ab3fe7b42da7a..375448627f7bc 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > @@ -305,9 +305,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device > *adev, struct dma_resv *resv, > > int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); > > u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); > > u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); -void > > amdgpu_bo_get_memory(struct amdgpu_bo *bo, > > - struct amdgpu_mem_stats *stats, > > - unsigned int size); > > +uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo); > > uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, > > uint32_t domain); > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > index 2852a6064c9ac..461fb8090ae04 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > @@ -26,15 +26,15 @@ > > > > #include <linux/dma-direction.h> > > #include <drm/gpu_scheduler.h> > > +#include <drm/ttm/ttm_placement.h> > > #include "amdgpu_vram_mgr.h" > > -#include "amdgpu.h" > > > > #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) > > #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) > > #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) > > #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) > > #define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) > > -#define __AMDGPU_PL_LAST (TTM_PL_PRIV + 4) > > +#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 5) > > > > #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 > > #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > index 1adca13bfb7f7..bd206ead2e9c0 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > @@ -36,6 +36,7 @@ > > #include <drm/ttm/ttm_tt.h> > > #include <drm/drm_exec.h> > > #include "amdgpu.h" > > +#include "amdgpu_vm.h" > > #include "amdgpu_trace.h" > > #include "amdgpu_amdkfd.h" > > #include "amdgpu_gmc.h" > > @@ -310,6 +311,111 @@ static void > amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) > > spin_unlock(&vm->status_lock); > > } > > > > +/** > > + * amdgpu_vm_update_shared - helper to update shared memory stat > > + * @base: base structure for tracking BO usage in a VM > > + * > > + * Takes the vm status_lock and updates the shared memory stat. If > > +the basic > > + * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need > > +to be called > > + * as well. > > + */ > > +static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) { > > + struct amdgpu_vm *vm = base->vm; > > + struct amdgpu_bo *bo = base->bo; > > + uint64_t size = amdgpu_bo_size(bo); > > + uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); > > + bool shared; > > + > > + spin_lock(&vm->status_lock); > > + shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); > > + if (base->shared != shared) { > > + base->shared = shared; > > + if (shared) { > > + vm->stats[bo_memtype].drm.shared += size; > > + vm->stats[bo_memtype].drm.private -= size; > > + } else { > > + vm->stats[bo_memtype].drm.shared -= size; > > + vm->stats[bo_memtype].drm.private += size; > > + } > > + } > > + spin_unlock(&vm->status_lock); > > +} > > + > > +/** > > + * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared > > + * @bo: amdgpu buffer object > > + * > > + * Update the per VM stats for all the vm if needed from private to > > +shared or > > + * vice versa. > > + */ > > +void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo) { > > + struct amdgpu_vm_bo_base *base; > > + > > + for (base = bo->vm_bo; base; base = base->next) > > + amdgpu_vm_update_shared(base); > > +} > > + > > +/** > > + * amdgpu_vm_update_stats_locked - helper to update normal memory > > +stat > > + * @base: base structure for tracking BO usage in a VM > > + * @res: the ttm_resource to use for the purpose of accounting, may or may not > > + * be bo->tbo.resource > > + * @sign: if we should add (+1) or subtract (-1) from the stat > > + * > > + * Caller need to have the vm status_lock held. Useful for when > > +multiple update > > + * need to happen at the same time. > > + */ > > +static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base > *base, > > + struct ttm_resource *res, int sign) { > > + struct amdgpu_vm *vm = base->vm; > > + struct amdgpu_bo *bo = base->bo; > > + int64_t size = sign * amdgpu_bo_size(bo); > > + uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); > > + > > + /* For drm-total- and drm-shared-, BO are accounted by their preferred > > + * placement, see also amdgpu_bo_mem_stats_placement. > > + */ > > + if (base->shared) > > + vm->stats[bo_memtype].drm.shared += size; > > + else > > + vm->stats[bo_memtype].drm.private += size; > > + > > + if (res && res->mem_type < __AMDGPU_PL_NUM) { > > + uint32_t res_memtype = res->mem_type; > > + > > + vm->stats[res_memtype].drm.resident += size; > > + /* BO only count as purgeable if it is resident, > > + * since otherwise there's nothing to purge. > > + */ > > + if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) > > + vm->stats[res_memtype].drm.purgeable += size; > > + if (!(bo->preferred_domains & > amdgpu_mem_type_to_domain(res_memtype))) > > + vm->stats[bo_memtype].evicted += size; > > + } > > +} > > + > > +/** > > + * amdgpu_vm_update_stats - helper to update normal memory stat > > + * @base: base structure for tracking BO usage in a VM > > + * @res: the ttm_resource to use for the purpose of accounting, may or may not > > + * be bo->tbo.resource > > + * @sign: if we should add (+1) or subtract (-1) from the stat > > + * > > + * Updates the basic memory stat when bo is added/deleted/moved. > > + */ > > +void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, > > + struct ttm_resource *res, int sign) { > > + struct amdgpu_vm *vm = base->vm; > > + > > + spin_lock(&vm->status_lock); > > + amdgpu_vm_update_stats_locked(base, res, sign); > > + spin_unlock(&vm->status_lock); > > +} > > + > > /** > > * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm > > * > > @@ -333,6 +439,11 @@ void amdgpu_vm_bo_base_init(struct > amdgpu_vm_bo_base *base, > > base->next = bo->vm_bo; > > bo->vm_bo = base; > > > > + spin_lock(&vm->status_lock); > > + base->shared = drm_gem_object_is_shared_for_memory_stats(&bo- > >tbo.base); > > + amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1); > > + spin_unlock(&vm->status_lock); > > + > > if (!amdgpu_vm_is_bo_always_valid(vm, bo)) > > return; > > > > @@ -1083,53 +1194,11 @@ int amdgpu_vm_update_range(struct > amdgpu_device *adev, struct amdgpu_vm *vm, > > return r; > > } > > > > -static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va, > > - struct amdgpu_mem_stats *stats, > > - unsigned int size) > > -{ > > - struct amdgpu_vm *vm = bo_va->base.vm; > > - struct amdgpu_bo *bo = bo_va->base.bo; > > - > > - if (!bo) > > - return; > > - > > - /* > > - * For now ignore BOs which are currently locked and potentially > > - * changing their location. > > - */ > > - if (!amdgpu_vm_is_bo_always_valid(vm, bo) && > > - !dma_resv_trylock(bo->tbo.base.resv)) > > - return; > > - > > - amdgpu_bo_get_memory(bo, stats, size); > > - if (!amdgpu_vm_is_bo_always_valid(vm, bo)) > > - dma_resv_unlock(bo->tbo.base.resv); > > -} > > - > > void amdgpu_vm_get_memory(struct amdgpu_vm *vm, > > - struct amdgpu_mem_stats *stats, > > - unsigned int size) > > + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]) > > { > > - struct amdgpu_bo_va *bo_va, *tmp; > > - > > spin_lock(&vm->status_lock); > > - list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > + memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM); > > spin_unlock(&vm->status_lock); > > } > > > > @@ -2076,6 +2145,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device > *adev, > > if (*base != &bo_va->base) > > continue; > > > > + amdgpu_vm_update_stats(*base, bo->tbo.resource, -1); > > *base = bo_va->base.next; > > break; > > } > > @@ -2174,6 +2244,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_bo > *bo, bool evicted) > > } > > } > > > > +/** > > + * amdgpu_vm_bo_move - handle BO move > > + * > > + * @bo: amdgpu buffer object > > + * @new_mem: the new placement of the BO move > > + * @evicted: is the BO evicted > > + * > > + * Update the memory stats for the new placement and mark @bo as invalid. > > + */ > > +void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource > *new_mem, > > + bool evicted) > > +{ > > + struct amdgpu_vm_bo_base *bo_base; > > + > > + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { > > + struct amdgpu_vm *vm = bo_base->vm; > > + > > + spin_lock(&vm->status_lock); > > + amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1); > > + amdgpu_vm_update_stats_locked(bo_base, new_mem, +1); > > + spin_unlock(&vm->status_lock); > > + } > > + > > + amdgpu_vm_bo_invalidate(bo, evicted); } > > + > > /** > > * amdgpu_vm_get_block_size - calculate VM page table size as power of two > > * > > @@ -2593,6 +2689,16 @@ void amdgpu_vm_release_compute(struct > amdgpu_device *adev, struct amdgpu_vm *vm) > > vm->is_compute_context = false; > > } > > > > +static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm) { > > + for (int i = 0; i < __AMDGPU_PL_NUM; ++i) { > > + if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) && > > + vm->stats[i].evicted == 0)) > > + return false; > > + } > > + return true; > > +} > > + > > /** > > * amdgpu_vm_fini - tear down a vm instance > > * > > @@ -2616,7 +2722,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, > > struct amdgpu_vm *vm) > > > > root = amdgpu_bo_ref(vm->root.bo); > > amdgpu_bo_reserve(root, true); > > - amdgpu_vm_put_task_info(vm->task_info); > > amdgpu_vm_set_pasid(adev, vm, 0); > > dma_fence_wait(vm->last_unlocked, false); > > dma_fence_put(vm->last_unlocked); > > @@ -2665,6 +2770,16 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, > struct amdgpu_vm *vm) > > } > > > > ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); > > + > > + if (!amdgpu_vm_stats_is_zero(vm)) { > > + struct amdgpu_task_info *ti = vm->task_info; > > + > > + dev_warn(adev->dev, > > + "VM memory stats for proc %s(%d) task %s(%d) is non-zero > when fini\n", > > + ti->process_name, ti->pid, ti->task_name, ti->tgid); > > + } > > + > > + amdgpu_vm_put_task_info(vm->task_info); > > } > > > > /** > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > index 6a1b344e15e1b..a3e128e373bc6 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > > @@ -35,6 +35,7 @@ > > #include "amdgpu_sync.h" > > #include "amdgpu_ring.h" > > #include "amdgpu_ids.h" > > +#include "amdgpu_ttm.h" > > > > struct drm_exec; > > > > @@ -202,9 +203,13 @@ struct amdgpu_vm_bo_base { > > /* protected by bo being reserved */ > > struct amdgpu_vm_bo_base *next; > > > > - /* protected by spinlock */ > > + /* protected by vm status_lock */ > > struct list_head vm_status; > > > > + /* if the bo is counted as shared in mem stats > > + * protected by vm status_lock */ > > + bool shared; > > + > > /* protected by the BO being reserved */ > > bool moved; > > }; > > @@ -324,10 +329,7 @@ struct amdgpu_vm_fault_info { > > struct amdgpu_mem_stats { > > struct drm_memory_stats drm; > > > > - /* buffers that requested this placement */ > > - uint64_t requested; > > - /* buffers that requested this placement > > - * but are currently evicted */ > > + /* buffers that requested this placement but are currently evicted > > +*/ > > uint64_t evicted; > > }; > > > > @@ -345,6 +347,9 @@ struct amdgpu_vm { > > /* Lock to protect vm_bo add/del/move on all lists of vm */ > > spinlock_t status_lock; > > > > + /* Memory statistics for this vm, protected by status_lock */ > > + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; > > + > > /* Per-VM and PT BOs who needs a validation */ > > struct list_head evicted; > > > > @@ -525,6 +530,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device > *adev, > > bool clear); > > bool amdgpu_vm_evictable(struct amdgpu_bo *bo); > > void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted); > > +void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, > > + struct ttm_resource *new_res, int sign); void > > +amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo); void > > +amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem, > > + bool evicted); > > uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); > > struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, > > struct amdgpu_bo *bo); > > @@ -575,8 +585,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm > *vm); > > void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, > > struct amdgpu_vm *vm); > > void amdgpu_vm_get_memory(struct amdgpu_vm *vm, > > - struct amdgpu_mem_stats *stats, > > - unsigned int size); > > + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]); > > > > int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, > > struct amdgpu_bo_vm *vmbo, bool immediate); diff --git > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > > index f78a0434a48fa..b0bf216821152 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c > > @@ -537,6 +537,7 @@ static void amdgpu_vm_pt_free(struct > amdgpu_vm_bo_base *entry) > > if (!entry->bo) > > return; > > > > + amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1); > > entry->bo->vm_bo = NULL; > > ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); > >