Re: [PATCH] drm/amdkfd: Accounting pdd vram_usage for svm

Felix Kuehling <felix.kuehling@xxxxxxx> · Fri, 11 Oct 2024 10:42:15 -0400

On 2024-10-11 9:23, Philip Yang wrote:
> 
> On 2024-10-09 17:20, Felix Kuehling wrote:
>>
>> On 2024-10-04 16:28, Philip Yang wrote:
>>> Per process device data pdd->vram_usage is used by rocm-smi to report
>>> VRAM usage, this is currently missing the svm_bo usage accounting, so
>>> "rocm-smi --showpids" per process report is incorrect.
>>>
>>> Add pdd->vram_usage accounting for svm_bo and change type to atomic64_t
>>> because it is updated outside process mutex now.
>>>
>>> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
>>> ---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  6 +++---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  2 +-
>>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c |  4 ++--
>>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 22 ++++++++++++++++++++++
>>>   4 files changed, 28 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index a1f191a5984b..065d87841459 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>>>             if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
>>>               size >>= 1;
>>> -        WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
>>> +        atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
>>>       }
>>>         mutex_unlock(&p->mutex);
>>> @@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
>>>           kfd_process_device_remove_obj_handle(
>>>               pdd, GET_IDR_HANDLE(args->handle));
>>>   -    WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
>>> +    atomic64_sub(size, &pdd->vram_usage);
>>>     err_unlock:
>>>   err_pdd:
>>> @@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
>>>       } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>>>           bo_bucket->restored_offset = offset;
>>>           /* Update the VRAM usage count */
>>> -        WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
>>> +        atomic64_add(bo_bucket->size, &pdd->vram_usage);
>>>       }
>>>       return 0;
>>>   }
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index 6a5bf88cc232..9e5ca0b93b2a 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -775,7 +775,7 @@ struct kfd_process_device {
>>>       enum kfd_pdd_bound bound;
>>>         /* VRAM usage */
>>> -    uint64_t vram_usage;
>>> +    atomic64_t vram_usage;
>>>       struct attribute attr_vram;
>>>       char vram_filename[MAX_SYSFS_FILENAME_LEN];
>>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> index 7909dfd158be..4810521736a9 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> @@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
>>>       } else if (strncmp(attr->name, "vram_", 5) == 0) {
>>>           struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
>>>                                     attr_vram);
>>> -        return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
>>> +        return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
>>>       } else if (strncmp(attr->name, "sdma_", 5) == 0) {
>>>           struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
>>>                                     attr_sdma);
>>> @@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
>>>       pdd->bound = PDD_UNBOUND;
>>>       pdd->already_dequeued = false;
>>>       pdd->runtime_inuse = false;
>>> -    pdd->vram_usage = 0;
>>> +    atomic64_set(&pdd->vram_usage, 0);
>>>       pdd->sdma_past_activity_counter = 0;
>>>       pdd->user_gpu_id = dev->id;
>>>       atomic64_set(&pdd->evict_duration_counter, 0);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> index 857ec6f23bba..61891ea6b1ac 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> @@ -379,6 +379,7 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
>>>   static void svm_range_bo_release(struct kref *kref)
>>>   {
>>>       struct svm_range_bo *svm_bo;
>>> +    struct mm_struct *mm = NULL;
>>>         svm_bo = container_of(kref, struct svm_range_bo, kref);
>>>       pr_debug("svm_bo 0x%p\n", svm_bo);
>>> @@ -405,6 +406,22 @@ static void svm_range_bo_release(struct kref *kref)
>>>           spin_lock(&svm_bo->list_lock);
>>>       }
>>>       spin_unlock(&svm_bo->list_lock);
>>> +
>>> +    if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
>>> +        struct kfd_process_device *pdd;
>>> +        struct kfd_process *p;
>>
>> Move struct mm_struct *mm here as well. It's only needed in this block and should not be used outside.
> yes, mm is only used here. If changing svm_bo->node to svm_bo->pdd, the entire block will be dropped. 
>>
>>
>>> +
>>> +        mm = svm_bo->eviction_fence->mm;
>>> +        p = kfd_lookup_process_by_mm(mm);
>>> +        if (p) {
>>> +            pdd = kfd_get_process_device_data(svm_bo->node, p);
>>> +            if (pdd)
>>> +                atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
>>> +            kfd_unref_process(p);
>>> +        }
>>> +        mmput(mm);
>>> +    }
>>> +
>>>       if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
>>>           /* We're not in the eviction worker. Signal the fence. */
>>>           dma_fence_signal(&svm_bo->eviction_fence->base);
>>> @@ -532,6 +549,7 @@ int
>>>   svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
>>>               bool clear)
>>>   {
>>> +    struct kfd_process_device *pdd;
>>>       struct amdgpu_bo_param bp;
>>>       struct svm_range_bo *svm_bo;
>>>       struct amdgpu_bo_user *ubo;
>>> @@ -623,6 +641,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
>>>       list_add(&prange->svm_bo_list, &svm_bo->range_list);
>>>       spin_unlock(&svm_bo->list_lock);
>>>   +    pdd = svm_range_get_pdd_by_node(prange, node);
>>> +    if (pdd)
>>> +        atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
>>> +
>>
>> Would it make sense to save the pdd pointer in the svm_bo struct? The effort to look up the mm, process and pdd in svm_range_bo_release seems quite high.
> Thanks for the good idea.
>>
>> You could replace svm_bo->node with svm_bo->pdd. Then you can still get the node with svm_bo->pdd->dev without growing the size of the structure. This assumes that the svm_bo cannot outlive the pdd.
> 
> yes, svm_range_list_fini is called before calling kfd_process_destroy_pdds after process exit, so svm_bo->pdd will always be valid. I will send new patch series.

I think that's OK. kfd_process_destroy_pdds happens in the cleanup worker that runs after the mm_struct is gone. So all the page references should be gone.

But there could be issues if a page was shared with another process that holds on to page reference that still point to pdds of processes that don't exist any more.

Regards,
  Felix

> 
> Regards,
> 
> Philip
> 
>>
>> Regards,
>>   Felix
>>
>>
>>>       return 0;
>>>     reserve_bo_failed: