On 2024-10-11 9:23, Philip Yang wrote: > > On 2024-10-09 17:20, Felix Kuehling wrote: >> >> On 2024-10-04 16:28, Philip Yang wrote: >>> Per process device data pdd->vram_usage is used by rocm-smi to report >>> VRAM usage, this is currently missing the svm_bo usage accounting, so >>> "rocm-smi --showpids" per process report is incorrect. >>> >>> Add pdd->vram_usage accounting for svm_bo and change type to atomic64_t >>> because it is updated outside process mutex now. >>> >>> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++--- >>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- >>> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++-- >>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 22 ++++++++++++++++++++++ >>> 4 files changed, 28 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >>> index a1f191a5984b..065d87841459 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >>> @@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, >>> if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) >>> size >>= 1; >>> - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size)); >>> + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); >>> } >>> mutex_unlock(&p->mutex); >>> @@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, >>> kfd_process_device_remove_obj_handle( >>> pdd, GET_IDR_HANDLE(args->handle)); >>> - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); >>> + atomic64_sub(size, &pdd->vram_usage); >>> err_unlock: >>> err_pdd: >>> @@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, >>> } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { >>> bo_bucket->restored_offset = offset; >>> /* Update the VRAM usage count */ >>> - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size); >>> + atomic64_add(bo_bucket->size, &pdd->vram_usage); >>> } >>> return 0; >>> } >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> index 6a5bf88cc232..9e5ca0b93b2a 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >>> @@ -775,7 +775,7 @@ struct kfd_process_device { >>> enum kfd_pdd_bound bound; >>> /* VRAM usage */ >>> - uint64_t vram_usage; >>> + atomic64_t vram_usage; >>> struct attribute attr_vram; >>> char vram_filename[MAX_SYSFS_FILENAME_LEN]; >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c >>> index 7909dfd158be..4810521736a9 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c >>> @@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, >>> } else if (strncmp(attr->name, "vram_", 5) == 0) { >>> struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, >>> attr_vram); >>> - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); >>> + return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage)); >>> } else if (strncmp(attr->name, "sdma_", 5) == 0) { >>> struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, >>> attr_sdma); >>> @@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, >>> pdd->bound = PDD_UNBOUND; >>> pdd->already_dequeued = false; >>> pdd->runtime_inuse = false; >>> - pdd->vram_usage = 0; >>> + atomic64_set(&pdd->vram_usage, 0); >>> pdd->sdma_past_activity_counter = 0; >>> pdd->user_gpu_id = dev->id; >>> atomic64_set(&pdd->evict_duration_counter, 0); >>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> index 857ec6f23bba..61891ea6b1ac 100644 >>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >>> @@ -379,6 +379,7 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo) >>> static void svm_range_bo_release(struct kref *kref) >>> { >>> struct svm_range_bo *svm_bo; >>> + struct mm_struct *mm = NULL; >>> svm_bo = container_of(kref, struct svm_range_bo, kref); >>> pr_debug("svm_bo 0x%p\n", svm_bo); >>> @@ -405,6 +406,22 @@ static void svm_range_bo_release(struct kref *kref) >>> spin_lock(&svm_bo->list_lock); >>> } >>> spin_unlock(&svm_bo->list_lock); >>> + >>> + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { >>> + struct kfd_process_device *pdd; >>> + struct kfd_process *p; >> >> Move struct mm_struct *mm here as well. It's only needed in this block and should not be used outside. > yes, mm is only used here. If changing svm_bo->node to svm_bo->pdd, the entire block will be dropped. >> >> >>> + >>> + mm = svm_bo->eviction_fence->mm; >>> + p = kfd_lookup_process_by_mm(mm); >>> + if (p) { >>> + pdd = kfd_get_process_device_data(svm_bo->node, p); >>> + if (pdd) >>> + atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage); >>> + kfd_unref_process(p); >>> + } >>> + mmput(mm); >>> + } >>> + >>> if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) >>> /* We're not in the eviction worker. Signal the fence. */ >>> dma_fence_signal(&svm_bo->eviction_fence->base); >>> @@ -532,6 +549,7 @@ int >>> svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, >>> bool clear) >>> { >>> + struct kfd_process_device *pdd; >>> struct amdgpu_bo_param bp; >>> struct svm_range_bo *svm_bo; >>> struct amdgpu_bo_user *ubo; >>> @@ -623,6 +641,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, >>> list_add(&prange->svm_bo_list, &svm_bo->range_list); >>> spin_unlock(&svm_bo->list_lock); >>> + pdd = svm_range_get_pdd_by_node(prange, node); >>> + if (pdd) >>> + atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage); >>> + >> >> Would it make sense to save the pdd pointer in the svm_bo struct? The effort to look up the mm, process and pdd in svm_range_bo_release seems quite high. > Thanks for the good idea. >> >> You could replace svm_bo->node with svm_bo->pdd. Then you can still get the node with svm_bo->pdd->dev without growing the size of the structure. This assumes that the svm_bo cannot outlive the pdd. > > yes, svm_range_list_fini is called before calling kfd_process_destroy_pdds after process exit, so svm_bo->pdd will always be valid. I will send new patch series. I think that's OK. kfd_process_destroy_pdds happens in the cleanup worker that runs after the mm_struct is gone. So all the page references should be gone. But there could be issues if a page was shared with another process that holds on to page reference that still point to pdds of processes that don't exist any more. Regards, Felix > > Regards, > > Philip > >> >> Regards, >> Felix >> >> >>> return 0; >>> reserve_bo_failed: