On 2024-10-11 16:22, Philip Yang wrote: > Process device data pdd->vram_usage is read by rocm-smi via sysfs, this > is currently missing the svm_bo usage accounting, so "rocm-smi > --showpids" per process VRAM usage report is incorrect. > > Add pdd->vram_usage accounting when svm_bo allocation and release, > change to atomic64_t type because it is updated outside process mutex > now. > > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> Reviewed-by: Felix Kuehling <felix.kuehling@xxxxxxx> > --- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 +++--- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++-- > drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 26 ++++++++++++++++++++++++ > 4 files changed, 32 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index a1f191a5984b..065d87841459 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, > > if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) > size >>= 1; > - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size)); > + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); > } > > mutex_unlock(&p->mutex); > @@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, > kfd_process_device_remove_obj_handle( > pdd, GET_IDR_HANDLE(args->handle)); > > - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); > + atomic64_sub(size, &pdd->vram_usage); > > err_unlock: > err_pdd: > @@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, > } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { > bo_bucket->restored_offset = offset; > /* Update the VRAM usage count */ > - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size); > + atomic64_add(bo_bucket->size, &pdd->vram_usage); > } > return 0; > } > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 6a5bf88cc232..9e5ca0b93b2a 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -775,7 +775,7 @@ struct kfd_process_device { > enum kfd_pdd_bound bound; > > /* VRAM usage */ > - uint64_t vram_usage; > + atomic64_t vram_usage; > struct attribute attr_vram; > char vram_filename[MAX_SYSFS_FILENAME_LEN]; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 7909dfd158be..4810521736a9 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, > } else if (strncmp(attr->name, "vram_", 5) == 0) { > struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, > attr_vram); > - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); > + return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage)); > } else if (strncmp(attr->name, "sdma_", 5) == 0) { > struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, > attr_sdma); > @@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, > pdd->bound = PDD_UNBOUND; > pdd->already_dequeued = false; > pdd->runtime_inuse = false; > - pdd->vram_usage = 0; > + atomic64_set(&pdd->vram_usage, 0); > pdd->sdma_past_activity_counter = 0; > pdd->user_gpu_id = dev->id; > atomic64_set(&pdd->evict_duration_counter, 0); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > index 857ec6f23bba..3e2911895c74 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > @@ -405,6 +405,27 @@ static void svm_range_bo_release(struct kref *kref) > spin_lock(&svm_bo->list_lock); > } > spin_unlock(&svm_bo->list_lock); > + > + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { > + struct kfd_process_device *pdd; > + struct kfd_process *p; > + struct mm_struct *mm; > + > + mm = svm_bo->eviction_fence->mm; > + /* > + * The forked child process takes svm_bo device pages ref, svm_bo could be > + * released after parent process is gone. > + */ > + p = kfd_lookup_process_by_mm(mm); > + if (p) { > + pdd = kfd_get_process_device_data(svm_bo->node, p); > + if (pdd) > + atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage); > + kfd_unref_process(p); > + } > + mmput(mm); > + } > + > if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) > /* We're not in the eviction worker. Signal the fence. */ > dma_fence_signal(&svm_bo->eviction_fence->base); > @@ -532,6 +553,7 @@ int > svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, > bool clear) > { > + struct kfd_process_device *pdd; > struct amdgpu_bo_param bp; > struct svm_range_bo *svm_bo; > struct amdgpu_bo_user *ubo; > @@ -623,6 +645,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, > list_add(&prange->svm_bo_list, &svm_bo->range_list); > spin_unlock(&svm_bo->list_lock); > > + pdd = svm_range_get_pdd_by_node(prange, node); > + if (pdd) > + atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage); > + > return 0; > > reserve_bo_failed: