Thanks Mukul. One last thing I noticed. It seems the update the the vram_usage is protected by the process->mutex. That means the variable will always be coherent while that lock is held. However, the sysfs-show function doesn't hold that lock. So if the compiler decides to break the update into multiple instructions, or sysfs-show does the read with multiple instructions, you may see inconsistent results. You can ensure that doesn't happen by only updating the size atomically with WRITE_ONCE and only reading it atomically with READ_ONCE in the sysfs-show function. For reference, this is a good article on the subject on lwn.net: [Who's afraid of a big bad optimizing compiler?](https://lwn.net/Articles/793253/) Regards, Felix Am 2020-04-26 um 11:45 p.m. schrieb Mukul Joshi: > Track GPU VRAM usage on a per process basis and report it through > sysfs. > > v2: > - Handle AMDGPU BO-specific details in > amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). > - Return size of VRAM BO being freed from > amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). > - Do not consider imported memory for VRAM > usage calculations. > > v3: > - Move handling of imported BO size from > kfd_ioctl_free_memory_of_gpu() to > amdgpu_amdkfd_gpuvm_free_memory_of_gpu(). > > Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 +++++- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 ++++- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 +++ > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 57 ++++++++++++++++--- > 5 files changed, 84 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index d065c50582eb..a501026e829c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -65,6 +65,7 @@ struct kgd_mem { > struct amdgpu_sync sync; > > bool aql_queue; > + bool is_imported; > }; > > /* KFD Memory Eviction */ > @@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > void *vm, struct kgd_mem **mem, > uint64_t *offset, uint32_t flags); > int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > - struct kgd_dev *kgd, struct kgd_mem *mem); > + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size); > int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( > struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); > int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 0768b7eb7683..1247938b1ec1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > } > > int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > - struct kgd_dev *kgd, struct kgd_mem *mem) > + struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) > { > struct amdkfd_process_info *process_info = mem->process_info; > unsigned long bo_size = mem->bo->tbo.mem.size; > @@ -1286,9 +1286,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > struct ttm_validate_buffer *bo_list_entry; > unsigned int mapped_to_gpu_memory; > int ret; > + bool is_imported = 0; > > mutex_lock(&mem->lock); > mapped_to_gpu_memory = mem->mapped_to_gpu_memory; > + is_imported = mem->is_imported; > mutex_unlock(&mem->lock); > /* lock is not needed after this, since mem is unused and will > * be freed anyway > @@ -1340,6 +1342,17 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > kfree(mem->bo->tbo.sg); > } > > + /* Update the size of the BO being freed if it was allocated from > + * VRAM and is not imported. > + */ > + if (size) { > + if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && > + (!is_imported)) > + *size = bo_size; > + else > + *size = 0; > + } > + > /* Free the BO*/ > amdgpu_bo_unref(&mem->bo); > mutex_destroy(&mem->lock); > @@ -1694,6 +1707,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, > (*mem)->process_info = avm->process_info; > add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); > amdgpu_sync_create(&(*mem)->sync); > + (*mem)->is_imported = true; > > return 0; > } > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index f8fa03a12add..ede84f76397f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -1322,6 +1322,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, > goto err_free; > } > > + /* Update the VRAM usage count */ > + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) > + pdd->vram_usage += args->size; > + > mutex_unlock(&p->mutex); > > args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); > @@ -1337,7 +1341,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, > return 0; > > err_free: > - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); > + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); > err_unlock: > mutex_unlock(&p->mutex); > return err; > @@ -1351,6 +1355,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, > void *mem; > struct kfd_dev *dev; > int ret; > + uint64_t size = 0; > > dev = kfd_device_by_id(GET_GPU_ID(args->handle)); > if (!dev) > @@ -1373,7 +1378,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, > } > > ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, > - (struct kgd_mem *)mem); > + (struct kgd_mem *)mem, &size); > > /* If freeing the buffer failed, leave the handle in place for > * clean-up during process tear-down. > @@ -1382,6 +1387,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, > kfd_process_device_remove_obj_handle( > pdd, GET_IDR_HANDLE(args->handle)); > > + pdd->vram_usage -= size; > + > err_unlock: > mutex_unlock(&p->mutex); > return ret; > @@ -1726,7 +1733,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, > return 0; > > err_free: > - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); > + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); > err_unlock: > mutex_unlock(&p->mutex); > return r; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 43b888b311c7..e7918fc3cee5 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -616,6 +616,8 @@ enum kfd_pdd_bound { > PDD_BOUND_SUSPENDED, > }; > > +#define MAX_VRAM_FILENAME_LEN 11 > + > /* Data that is per-process-per device. */ > struct kfd_process_device { > /* > @@ -658,6 +660,11 @@ struct kfd_process_device { > > /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ > enum kfd_pdd_bound bound; > + > + /* VRAM usage */ > + uint64_t vram_usage; > + struct attribute attr_vram; > + char vram_filename[MAX_VRAM_FILENAME_LEN]; > }; > > #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index fe0cd49d4ea7..600759949802 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -79,18 +79,22 @@ static struct kfd_procfs_tree procfs; > static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, > char *buffer) > { > - int val = 0; > - > if (strcmp(attr->name, "pasid") == 0) { > struct kfd_process *p = container_of(attr, struct kfd_process, > attr_pasid); > - val = p->pasid; > + > + return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid); > + } else if (strncmp(attr->name, "vram_", 5) == 0) { > + struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, > + attr_vram); > + if (pdd) > + return snprintf(buffer, PAGE_SIZE, "%llu\n", pdd->vram_usage); > } else { > pr_err("Invalid attribute"); > return -EINVAL; > } > > - return snprintf(buffer, PAGE_SIZE, "%d\n", val); > + return 0; > } > > static void kfd_procfs_kobj_release(struct kobject *kobj) > @@ -206,6 +210,34 @@ int kfd_procfs_add_queue(struct queue *q) > return 0; > } > > +int kfd_procfs_add_vram_usage(struct kfd_process *p) > +{ > + int ret = 0; > + struct kfd_process_device *pdd; > + > + if (!p) > + return -EINVAL; > + > + if (!p->kobj) > + return -EFAULT; > + > + /* Create proc/<pid>/vram_<gpuid> file for each GPU */ > + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { > + snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u", > + pdd->dev->id); > + pdd->attr_vram.name = pdd->vram_filename; > + pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE; > + sysfs_attr_init(&pdd->attr_vram); > + ret = sysfs_create_file(p->kobj, &pdd->attr_vram); > + if (ret) > + pr_warn("Creating vram usage for gpu id %d failed", > + (int)pdd->dev->id); > + } > + > + return ret; > +} > + > + > void kfd_procfs_del_queue(struct queue *q) > { > if (!q) > @@ -248,7 +280,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, > struct kfd_dev *dev = pdd->dev; > > amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); > - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem); > + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL); > } > > /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process > @@ -312,7 +344,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, > return err; > > err_map_mem: > - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem); > + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL); > err_alloc_mem: > *kptr = NULL; > return err; > @@ -411,6 +443,11 @@ struct kfd_process *kfd_create_process(struct file *filep) > process->kobj); > if (!process->kobj_queues) > pr_warn("Creating KFD proc/queues folder failed"); > + > + ret = kfd_procfs_add_vram_usage(process); > + if (ret) > + pr_warn("Creating vram usage file for pid %d failed", > + (int)process->lead_thread->pid); > } > out: > if (!IS_ERR(process)) > @@ -488,7 +525,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) > peer_pdd->dev->kgd, mem, peer_pdd->vm); > } > > - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem); > + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL); > kfd_process_device_remove_obj_handle(pdd, id); > } > } > @@ -551,6 +588,7 @@ static void kfd_process_wq_release(struct work_struct *work) > { > struct kfd_process *p = container_of(work, struct kfd_process, > release_work); > + struct kfd_process_device *pdd; > > /* Remove the procfs files */ > if (p->kobj) { > @@ -558,6 +596,10 @@ static void kfd_process_wq_release(struct work_struct *work) > kobject_del(p->kobj_queues); > kobject_put(p->kobj_queues); > p->kobj_queues = NULL; > + > + list_for_each_entry(pdd, &p->per_device_data, per_device_list) > + sysfs_remove_file(p->kobj, &pdd->attr_vram); > + > kobject_del(p->kobj); > kobject_put(p->kobj); > p->kobj = NULL; > @@ -862,6 +904,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, > pdd->bound = PDD_UNBOUND; > pdd->already_dequeued = false; > pdd->runtime_inuse = false; > + pdd->vram_usage = 0; > list_add(&pdd->per_device_list, &p->per_device_data); > > /* Init idr used for memory handle translation */ _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx