[Public] > From: Christian König <ckoenig.leichtzumerken@xxxxxxxxx> > Sent: Thursday, October 31, 2024 8:54 > Am 25.10.24 um 19:41 schrieb Yunxiang Li: > > Before, every time fdinfo is queried we try to lock all the BOs in the > > VM and calculate memory usage from scratch. This works okay if the > > fdinfo is rarely read and the VMs don't have a ton of BOs. If either > > of these conditions is not true, we get a massive performance hit. > > > > In this new revision, we track the BOs as they change states. This way > > when the fdinfo is queried we only need to take the status lock and > > copy out the usage stats with minimal impact to the runtime performance. > > > > Signed-off-by: Yunxiang Li <Yunxiang.Li@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 14 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 10 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 107 +++-------- > > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 5 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 189 +++++++++++++++----- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 12 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 1 + > > 8 files changed, 199 insertions(+), 141 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > index b144404902255..1d8a0ff3c8604 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > > @@ -36,6 +36,7 @@ > > #include "amdgpu_gem.h" > > #include "amdgpu_dma_buf.h" > > #include "amdgpu_xgmi.h" > > +#include "amdgpu_vm.h" > > #include <drm/amdgpu_drm.h> > > #include <drm/ttm/ttm_tt.h> > > #include <linux/dma-buf.h> > > @@ -190,6 +191,13 @@ static void amdgpu_dma_buf_unmap(struct > dma_buf_attachment *attach, > > } > > } > > > > +static void amdgpu_dma_buf_release(struct dma_buf *buf) { > > + struct amdgpu_bo *bo = gem_to_amdgpu_bo(buf->priv); > > + amdgpu_vm_bo_update_shared(bo, -1); > > + drm_gem_dmabuf_release(buf); > > Please run checkpatch.pl on the patch. As far as I can see it would complain about > the coding style here (empty line between declaration and code). > > Not much of an issue but we would like to prevent upstream from complaining about > such things. Will do > > +} > > + > > /** > > * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access > implementation > > * @dma_buf: Shared DMA buffer > > @@ -237,7 +245,7 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { > > .unpin = amdgpu_dma_buf_unpin, > > .map_dma_buf = amdgpu_dma_buf_map, > > .unmap_dma_buf = amdgpu_dma_buf_unmap, > > - .release = drm_gem_dmabuf_release, > > + .release = amdgpu_dma_buf_release, > > .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, > > .mmap = drm_gem_dmabuf_mmap, > > .vmap = drm_gem_dmabuf_vmap, > > @@ -265,8 +273,10 @@ struct dma_buf *amdgpu_gem_prime_export(struct > drm_gem_object *gobj, > > return ERR_PTR(-EPERM); > > > > buf = drm_gem_prime_export(gobj, flags); > > - if (!IS_ERR(buf)) > > + if (!IS_ERR(buf)) { > > buf->ops = &amdgpu_dmabuf_ops; > > + amdgpu_vm_bo_update_shared(bo, +1); > > + } > > > > return buf; > > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c > > index 7a9573958d87c..e0e09f7b39d10 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c > > @@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct > drm_file *file) > > struct amdgpu_fpriv *fpriv = file->driver_priv; > > struct amdgpu_vm *vm = &fpriv->vm; > > > > - struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST + 1] = { }; > > + struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST] = { }; > > ktime_t usage[AMDGPU_HW_IP_NUM]; > > const char *pl_name[] = { > > [TTM_PL_VRAM] = "vram", > > @@ -70,13 +70,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct > drm_file *file) > > unsigned int hw_ip, i; > > int ret; > > > > - ret = amdgpu_bo_reserve(vm->root.bo, false); > > - if (ret) > > - return; > > - > > - amdgpu_vm_get_memory(vm, stats, ARRAY_SIZE(stats)); > > - amdgpu_bo_unreserve(vm->root.bo); > > - > > + amdgpu_vm_get_memory(vm, stats); > > amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage); > > > > /* > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > index 2436b7c9ad12b..98563124ff99c 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > > @@ -1156,7 +1156,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object > *bo, > > return; > > > > abo = ttm_to_amdgpu_bo(bo); > > - amdgpu_vm_bo_invalidate(abo, evict); > > + amdgpu_vm_bo_move(abo, new_mem, evict); > > > > amdgpu_bo_kunmap(abo); > > > > @@ -1169,86 +1169,6 @@ void amdgpu_bo_move_notify(struct > ttm_buffer_object *bo, > > old_mem ? old_mem->mem_type : -1); > > } > > > > -void amdgpu_bo_get_memory(struct amdgpu_bo *bo, > > - struct amdgpu_mem_stats *stats, > > - unsigned int sz) > > -{ > > - const unsigned int domain_to_pl[] = { > > - [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM, > > - [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, > > - [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM, > > - [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = > AMDGPU_PL_GDS, > > - [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = > AMDGPU_PL_GWS, > > - [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA, > > - [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = > AMDGPU_PL_DOORBELL, > > - }; > > - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > > - struct ttm_resource *res = bo->tbo.resource; > > - struct drm_gem_object *obj = &bo->tbo.base; > > - uint64_t size = amdgpu_bo_size(bo); > > - unsigned int type; > > - > > - if (!res) { > > - /* > > - * If no backing store use one of the preferred domain for basic > > - * stats. We take the MSB since that should give a reasonable > > - * view. > > - */ > > - BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || > > - TTM_PL_VRAM < TTM_PL_SYSTEM); > > - type = fls(bo->preferred_domains & > AMDGPU_GEM_DOMAIN_MASK); > > - if (!type) > > - return; > > - type--; > > - if (drm_WARN_ON_ONCE(&adev->ddev, > > - type >= ARRAY_SIZE(domain_to_pl))) > > - return; > > - type = domain_to_pl[type]; > > - } else { > > - type = res->mem_type; > > - } > > - > > - /* Squash some into 'cpu' to keep the legacy userspace view. */ > > - switch (type) { > > - case TTM_PL_VRAM: > > - case TTM_PL_TT: > > - case TTM_PL_SYSTEM: > > - break; > > - default: > > - type = TTM_PL_SYSTEM; > > - break; > > - } > > - > > - if (drm_WARN_ON_ONCE(&adev->ddev, type >= sz)) > > - return; > > - > > - /* DRM stats common fields: */ > > - > > - if (drm_gem_object_is_shared_for_memory_stats(obj)) > > - stats[type].drm.shared += size; > > - else > > - stats[type].drm.private += size; > > - > > - if (res) { > > - stats[type].drm.resident += size; > > - > > - if (!dma_resv_test_signaled(obj->resv, > DMA_RESV_USAGE_BOOKKEEP)) > > - stats[type].drm.active += size; > > - else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) > > - stats[type].drm.purgeable += size; > > - } > > - > > - /* amdgpu specific stats: */ > > - > > - if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) { > > - stats[TTM_PL_VRAM].requested += size; > > - if (type != TTM_PL_VRAM) > > - stats[TTM_PL_VRAM].evicted += size; > > - } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) { > > - stats[TTM_PL_TT].requested += size; > > - } > > -} > > - > > /** > > * amdgpu_bo_release_notify - notification about a BO being released > > * @bo: pointer to a buffer object > > @@ -1463,6 +1383,31 @@ u64 amdgpu_bo_gpu_offset_no_check(struct > amdgpu_bo *bo) > > return amdgpu_gmc_sign_extend(offset); > > } > > > > +uint32_t amdgpu_bo_get_preferred_placement(struct amdgpu_bo *bo) { > > + uint32_t domain = bo->preferred_domains & > AMDGPU_GEM_DOMAIN_MASK; > > + if (!domain) > > + return TTM_PL_SYSTEM; > > + > > + switch (ilog2(domain)) { > > + case AMDGPU_GEM_DOMAIN_CPU: > > + return TTM_PL_SYSTEM; > > + case AMDGPU_GEM_DOMAIN_GTT: > > + return TTM_PL_TT; > > + case AMDGPU_GEM_DOMAIN_VRAM: > > + return TTM_PL_VRAM; > > + case AMDGPU_GEM_DOMAIN_GDS: > > + return AMDGPU_PL_GDS; > > + case AMDGPU_GEM_DOMAIN_GWS: > > + return AMDGPU_PL_GWS; > > + case AMDGPU_GEM_DOMAIN_OA: > > + return AMDGPU_PL_OA; > > + case AMDGPU_GEM_DOMAIN_DOORBELL: > > + return AMDGPU_PL_DOORBELL; > > + default: > > + return TTM_PL_SYSTEM; > > If I'm not completely mistaken that won't work like that. > > The AMDGPU_GEM_DOMAIN_* defines are masks and not shifts. Yeah you are right, should have been rounddown_pow_of_two > > + } > > +} > > + > > /** > > * amdgpu_bo_get_preferred_domain - get preferred domain > > * @adev: amdgpu device object > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > index be6769852ece4..bd58a8b0ece66 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > > @@ -30,6 +30,7 @@ > > > > #include <drm/amdgpu_drm.h> > > #include "amdgpu.h" > > +#include "amdgpu_ttm.h" > > #include "amdgpu_res_cursor.h" > > Why is that necessary? I got a compile error otherwise for those AMDGPU_PL_* > > > > #ifdef CONFIG_MMU_NOTIFIER > > @@ -300,9 +301,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device > *adev, struct dma_resv *resv, > > int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); > > u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); > > u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); -void > > amdgpu_bo_get_memory(struct amdgpu_bo *bo, > > - struct amdgpu_mem_stats *stats, > > - unsigned int size); > > +uint32_t amdgpu_bo_get_preferred_placement(struct amdgpu_bo *bo); > > uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, > > uint32_t domain); > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > index 2852a6064c9ac..a9088e864fde4 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > > @@ -26,8 +26,8 @@ > > > > #include <linux/dma-direction.h> > > #include <drm/gpu_scheduler.h> > > +#include <drm/ttm/ttm_placement.h> > > #include "amdgpu_vram_mgr.h" > > -#include "amdgpu.h" > > Looks like a valuable cleanup, but should probably a separate patch. Without this there's a circular include that breaks compilation combined with the above > > > > #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) > > #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > index 9fab64edd0530..abd35c18ddaa8 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > > @@ -36,6 +36,7 @@ > > #include <drm/ttm/ttm_tt.h> > > #include <drm/drm_exec.h> > > #include "amdgpu.h" > > +#include "amdgpu_vm.h" > > #include "amdgpu_trace.h" > > #include "amdgpu_amdkfd.h" > > #include "amdgpu_gmc.h" > > @@ -310,6 +311,94 @@ static void amdgpu_vm_bo_reset_state_machine(struct > amdgpu_vm *vm) > > spin_unlock(&vm->status_lock); > > } > > > > +/** > > + * amdgpu_vm_update_shared - helper to update shared memory stat > > + * @base: base structure for tracking BO usage in a VM > > + * @sign: if we should add (+1) or subtract (-1) from the shared stat > > + * > > + * Takes the vm status_lock and updates the shared memory stat. If > > +the basic > > + * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need > > +to be called > > + * as well. > > + */ > > +static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base, > > +int sign) { > > + struct amdgpu_vm *vm = base->vm; > > + struct amdgpu_bo *bo = base->bo; > > + struct ttm_resource *res; > > + int64_t size; > > + uint32_t type; > > + > > + if (!vm || !bo) > > + return; > > + > > + size = sign * amdgpu_bo_size(bo); > > + if ((res = bo->tbo.resource)) > > + type = res->mem_type; > > + else > > + type = amdgpu_bo_get_preferred_placement(bo); > > As discussed with Tvrtko that won't work like this. > > Either use the preferred placement or the actual backing store, but don't use a > fallback here. I had a follow up discussion with Tvrtko here https://lists.freedesktop.org/archives/amd-gfx/2024-October/116024.html it seems like this is the intended semantics for the drm-total-* stats. I can see it going both ways, I guess it's just up to which design is most useful for whom ever is reading the stats. Current design is for it to mean "all the buffer currently at X" + "all the buffer that wants to be at X but currently don't have a backing" The alternative I guess is for it to mean "all the buffer that wants to be at X" Btw, I'm having trouble figuring out where I should account for drm-active-* it's for buffers that are currently being used (e.g. have a fence not signaled) it seems like the work scheduling part is quite far removed from the individual BOs... > > + if (type >= __AMDGPU_PL_LAST) > > + return; > > + > > + spin_lock(&vm->status_lock); > > + vm->stats[type].drm.shared += size; > > + vm->stats[type].drm.private -= size; > > + spin_unlock(&vm->status_lock); > > +} > > + > > +/** > > + * amdgpu_vm_update_stats - helper to update normal memory stat > > + * @base: base structure for tracking BO usage in a VM > > + * @new_res: if not NULL, the ttm_resource to use for the purpose of > > +accounting > > + * (i.e. ignore the one in the BO) > > + * @sign: if we should add (+1) or subtract (-1) from the stat > > + * > > + * Takes the vm status_lock and updates the basic memory stat. If the > > +shared > > + * stat changed (e.g. buffer was exported) amdgpu_vm_update_shared > > +need to be > > + * called as well. > > + */ > > +void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, > > + struct ttm_resource *new_res, int sign) { > > + struct amdgpu_vm *vm = base->vm; > > + struct amdgpu_bo *bo = base->bo; > > + struct ttm_resource *res; > > + int64_t size; > > + uint32_t type; > > + bool shared; > > + > > + if (!vm || !bo) > > + return; > > + > > + size = sign * amdgpu_bo_size(bo); > > + res = new_res ? new_res : bo->tbo.resource; > > + type = res ? res->mem_type : amdgpu_bo_get_preferred_placement(bo); > > Same here. Don't use the preferred domain as fallback. > > > + shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); > > + > > + if (type >= __AMDGPU_PL_LAST) > > + return; > > + > > + spin_lock(&vm->status_lock); > > + > > + if (shared) > > + vm->stats[type].drm.shared += size; > > + else > > + vm->stats[type].drm.private += size; > > + if (res) > > + vm->stats[type].drm.resident += size; > > + if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) > > + vm->stats[type].drm.purgeable += size; > > + > > + if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) { > > + vm->stats[TTM_PL_VRAM].requested += size; > > + if (type != TTM_PL_VRAM) > > + vm->stats[TTM_PL_VRAM].evicted += size; > > That check is probably not correct. We have BOs which can be placed in both > VRAM and GTT. That is true, but does it make sense to count it towards evicted if say our picking order prefers VRAM over GTT? > > + } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) { > > + vm->stats[TTM_PL_TT].requested += size; > > + } > > + > > + spin_unlock(&vm->status_lock); > > +} > > + > > /** > > * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm > > * > > @@ -332,6 +421,7 @@ void amdgpu_vm_bo_base_init(struct > amdgpu_vm_bo_base *base, > > return; > > base->next = bo->vm_bo; > > bo->vm_bo = base; > > + amdgpu_vm_update_stats(base, NULL, +1); > > > > if (!amdgpu_vm_is_bo_always_valid(vm, bo)) > > return; > > @@ -1082,53 +1172,11 @@ int amdgpu_vm_update_range(struct > amdgpu_device *adev, struct amdgpu_vm *vm, > > return r; > > } > > > > -static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va, > > - struct amdgpu_mem_stats *stats, > > - unsigned int size) > > -{ > > - struct amdgpu_vm *vm = bo_va->base.vm; > > - struct amdgpu_bo *bo = bo_va->base.bo; > > - > > - if (!bo) > > - return; > > - > > - /* > > - * For now ignore BOs which are currently locked and potentially > > - * changing their location. > > - */ > > - if (!amdgpu_vm_is_bo_always_valid(vm, bo) && > > - !dma_resv_trylock(bo->tbo.base.resv)) > > - return; > > - > > - amdgpu_bo_get_memory(bo, stats, size); > > - if (!amdgpu_vm_is_bo_always_valid(vm, bo)) > > - dma_resv_unlock(bo->tbo.base.resv); > > -} > > - > > void amdgpu_vm_get_memory(struct amdgpu_vm *vm, > > - struct amdgpu_mem_stats *stats, > > - unsigned int size) > > + struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST]) > > { > > - struct amdgpu_bo_va *bo_va, *tmp; > > - > > spin_lock(&vm->status_lock); > > - list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > - > > - list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) > > - amdgpu_vm_bo_get_memory(bo_va, stats, size); > > + memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_LAST); > > spin_unlock(&vm->status_lock); > > } > > > > @@ -2071,6 +2119,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device > *adev, > > if (*base != &bo_va->base) > > continue; > > > > + amdgpu_vm_update_stats(*base, NULL, -1); > > *base = bo_va->base.next; > > break; > > } > > @@ -2136,6 +2185,22 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) > > return true; > > } > > > > +/** > > + * amdgpu_vm_bo_update_shared - called when bo gets shared/unshared > > + * > > + * @bo: amdgpu buffer object > > + * @sign: if we should add (+1) or subtract (-1) the memory stat > > + * > > + * Update the per VM stats for all the vm */ void > > +amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo, int sign) { > > + struct amdgpu_vm_bo_base *bo_base; > > + > > + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) > > + amdgpu_vm_update_shared(bo_base, sign); } > > + > > /** > > * amdgpu_vm_bo_invalidate - mark the bo as invalid > > * > > @@ -2169,6 +2234,28 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_bo > *bo, bool evicted) > > } > > } > > > > +/** > > + * amdgpu_vm_bo_move - handle BO move > > + * > > + * @bo: amdgpu buffer object > > + * @new_mem: the new placement of the BO move > > + * @evicted: is the BO evicted > > + * > > + * Update the memory stats for the new placement and mark @bo as invalid. > > + */ > > +void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource > *new_mem, > > + bool evicted) > > +{ > > + struct amdgpu_vm_bo_base *bo_base; > > + > > + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { > > + amdgpu_vm_update_stats(bo_base, bo->tbo.resource, -1); > > + amdgpu_vm_update_stats(bo_base, new_mem, +1); > > + } > > + > > + amdgpu_vm_bo_invalidate(bo, evicted); } > > + > > /** > > * amdgpu_vm_get_block_size - calculate VM page table size as power of two > > * > > @@ -2585,6 +2672,18 @@ void amdgpu_vm_release_compute(struct > amdgpu_device *adev, struct amdgpu_vm *vm) > > vm->is_compute_context = false; > > } > > > > +static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm) { > > + int is_zero = 1; > > + for (int i = 0; i < __AMDGPU_PL_LAST; ++i) { > > + is_zero = drm_memory_stats_is_zero(&vm->stats[i].drm) && > > + vm->stats->evicted == 0 && vm->stats->requested == 0; > > + if (!is_zero) > > + break; > > Just make that an "if (...) return false", no need for the local variable. > > Regards, > Christian. > D'oh! Teddy