To give userspace a detailed view about its GPU memory usage and evictions.
This will help performance investigations.
This will help performance investigations.
Signed-off-by: Marek Olšák <marek.olsak@xxxxxxx>
The patch is attached.
Marek
From 01f41d5b49920b11494ca07f6dde24ea3098fa9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@xxxxxxx> Date: Sat, 24 Dec 2022 17:41:51 -0500 Subject: [PATCH 2/2] drm/amdgpu: add AMDGPU_INFO_VM_STAT to return GPU VM stats about the process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To give userspace a detailed view about its GPU memory usage and evictions. This will help performance investigations. Signed-off-by: Marek Olšák <marek.olsak@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 101 ++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 29 +++++++ 3 files changed, 132 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 155f905b00c9..ee1532959032 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -108,9 +108,10 @@ * - 3.50.0 - Update AMDGPU_INFO_DEV_INFO IOCTL for minimum engine and memory clock * Update AMDGPU_INFO_SENSOR IOCTL for PEAK_PSTATE engine and memory clock * 3.51.0 - Return the PCIe gen and lanes from the INFO ioctl + * 3.52.0 - Add AMDGPU_INFO_VM_STAT */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 51 +#define KMS_DRIVER_MINOR 52 #define KMS_DRIVER_PATCHLEVEL 0 unsigned int amdgpu_vram_limit = UINT_MAX; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index fba306e0ef87..619c3a633ee6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -515,6 +515,67 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, return 0; } +static void amdgpu_vm_stat_visit_bo(struct drm_amdgpu_info_vm_stat *stat, + struct amdgpu_bo_va *bo_va) +{ + struct amdgpu_bo *bo = bo_va->base.bo; + uint64_t size; + + if (!bo) + return; + + size = amdgpu_bo_size(bo); + + switch (bo->tbo.resource->mem_type) { + case TTM_PL_VRAM: + if (bo->tbo.deleted) { + stat->unreclaimed_vram += size; + stat->unreclaimed_vram_bo_count++; + } else { + stat->vram += size; + stat->vram_bo_count++; + + if (amdgpu_bo_in_cpu_visible_vram(bo)) { + stat->visible_vram += size; + stat->visible_vram_bo_count++; + } + } + break; + case TTM_PL_TT: + if (bo->tbo.deleted) { + stat->unreclaimed_gtt += size; + stat->unreclaimed_gtt_bo_count++; + } else { + stat->gtt += size; + stat->gtt_bo_count++; + } + break; + case TTM_PL_SYSTEM: + stat->sysmem += size; + stat->sysmem_bo_count++; + break; + /* Ignore GDS, GWS, and OA - those are not important. */ + } + + if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) { + stat->requested_vram += size; + stat->requested_vram_bo_count++; + + if (bo->tbo.resource->mem_type != TTM_PL_VRAM) { + stat->evicted_vram += size; + stat->evicted_vram_bo_count++; + + if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { + stat->evicted_visible_vram += size; + stat->evicted_visible_vram_bo_count++; + } + } + } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) { + stat->requested_gtt += size; + stat->requested_gtt_bo_count++; + } +} + /* * Userspace get information ioctl */ @@ -1128,6 +1189,46 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) kfree(caps); return r; } + case AMDGPU_INFO_VM_STAT: { + struct drm_amdgpu_info_vm_stat stat = {}; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo_va *bo_va, *tmp; + int r; + + r = amdgpu_bo_reserve(vm->root.bo, true); + if (r) + return r; + + spin_lock(&vm->status_lock); + + list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { + amdgpu_vm_stat_visit_bo(&stat, bo_va); + } + list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) { + amdgpu_vm_stat_visit_bo(&stat, bo_va); + } + list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) { + amdgpu_vm_stat_visit_bo(&stat, bo_va); + } + list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { + amdgpu_vm_stat_visit_bo(&stat, bo_va); + } + list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { + amdgpu_vm_stat_visit_bo(&stat, bo_va); + } + list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) { + amdgpu_vm_stat_visit_bo(&stat, bo_va); + } + list_for_each_entry_safe(bo_va, tmp, &vm->freed, base.vm_status) { + amdgpu_vm_stat_visit_bo(&stat, bo_va); + } + + spin_unlock(&vm->status_lock); + amdgpu_bo_unreserve(vm->root.bo); + return copy_to_user(out, &stat, + min((size_t)size, sizeof(stat))) ? -EFAULT : 0; + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index f7fc7325f17f..521b7ca0ffe9 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -875,6 +875,7 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 /* Subquery id: Encode */ #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 +#define AMDGPU_INFO_VM_STAT 0x22 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff @@ -1157,6 +1158,34 @@ struct drm_amdgpu_info_vce_clock_table { __u32 pad; }; +/* Statistics of the current VM - per driver FD. */ +struct drm_amdgpu_info_vm_stat { + /* Current memory usage. total = heap + unreclaimed_heap. */ + __u64 vram; /* includes visible_vram */ + __u64 gtt; + __u64 sysmem; + __u64 unreclaimed_vram; /* marked for freeing */ + __u64 unreclaimed_gtt; /* marked for freeing */ + /* What userspace requested. */ + __u64 requested_vram; + __u64 requested_gtt; + /* Other stats. */ + __u64 visible_vram; /* included in "vram" */ + __u64 evicted_vram; /* VRAM buffers not in VRAM, incl. visible VRAM */ + __u64 evicted_visible_vram; /* visible VRAM buffers not in VRAM */ + /* Buffer counts. */ + __u32 vram_bo_count; + __u32 gtt_bo_count; + __u32 sysmem_bo_count; + __u32 unreclaimed_vram_bo_count; + __u32 unreclaimed_gtt_bo_count; + __u32 requested_vram_bo_count; + __u32 requested_gtt_bo_count; + __u32 visible_vram_bo_count; + __u32 evicted_vram_bo_count; + __u32 evicted_visible_vram_bo_count; +}; + /* query video encode/decode caps */ #define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2 0 #define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4 1 -- 2.25.1