Track SDMA usage on a per process basis and report it through sysfs. The value in the sysfs file indicates the amount of time SDMA has been in-use by this process since the creation of the process. This value is in microsecond granularity. v2: - Remove unnecessary checks for pdd is kfd_procfs_show(). - Make counter part of the kfd_sdma_activity_handler_workarea structure. Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 57 ++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 16 ++- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 130 ++++++++++++++++-- 4 files changed, 191 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e9c4867abeff..49f72d0f7be7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager *dqm, dqm->active_cp_queue_count--; } +int read_sdma_queue_counter(struct queue *q, uint64_t *val) +{ + int ret; + uint64_t tmp = 0; + + if (!q || !val) + return -EINVAL; + /* + * SDMA activity counter is stored at queue's RPTR + 0x8 location. + */ + if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr + + sizeof(uint64_t)), sizeof(uint64_t))) { + pr_err("Can't access sdma queue activity counter\n"); + return -EFAULT; + } + + ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) + + sizeof(uint64_t))); + if (!ret) { + *val = tmp; + } + + return ret; +} + +static int update_sdma_queue_past_activity_stats(struct kfd_process_device *pdd, + struct queue *q) +{ + int ret; + uint64_t val = 0; + + if (!pdd) + return -ENODEV; + + ret = read_sdma_queue_counter(q, &val); + if (ret) { + pr_err("Failed to read SDMA queue counter for queue: %d\n", + q->properties.queue_id); + return ret; + } + + WRITE_ONCE(pdd->sdma_past_activity_counter, pdd->sdma_past_activity_counter + val); + + return ret; +} + static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) { struct kfd_dev *dev = qpd->dqm->dev; @@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, if (retval == -ETIME) qpd->reset_wavefronts = true; + /* Get the SDMA queue stats */ + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q); + } + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); list_del(&q->list); @@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, } } + /* Get the SDMA queue stats */ + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q); + } /* * Unconditionally decrement this counter, regardless of the queue's * type diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 4afa015c69b1..894bcf877f9e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager *dqm) mutex_unlock(&dqm->lock_hidden); } +int read_sdma_queue_counter(struct queue *q, uint64_t *val); + #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f70f789c3cb3..fae139b77c0a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -633,7 +633,14 @@ enum kfd_pdd_bound { PDD_BOUND_SUSPENDED, }; -#define MAX_VRAM_FILENAME_LEN 11 +#define MAX_SYSFS_FILENAME_LEN 11 + +/* + * SDMA counter runs at 100MHz frequency. + * We display SDMA activity in microsecond granularity in sysfs. + * As a result, the divisor is 100. + */ +#define SDMA_ACTIVITY_DIVISOR 100 /* Data that is per-process-per device. */ struct kfd_process_device { @@ -681,7 +688,12 @@ struct kfd_process_device { /* VRAM usage */ uint64_t vram_usage; struct attribute attr_vram; - char vram_filename[MAX_VRAM_FILENAME_LEN]; + char vram_filename[MAX_SYSFS_FILENAME_LEN]; + + /* SDMA activity tracking */ + uint64_t sdma_past_activity_counter; + struct attribute attr_sdma; + char sdma_filename[MAX_SYSFS_FILENAME_LEN]; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d27221ddcdeb..e2ebed5c033f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -25,6 +25,7 @@ #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> +#include <linux/mmu_context.h> #include <linux/slab.h> #include <linux/amd-iommu.h> #include <linux/notifier.h> @@ -76,6 +77,66 @@ struct kfd_procfs_tree { static struct kfd_procfs_tree procfs; +/* + * Structure for SDMA activity tracking + */ +struct kfd_sdma_activity_handler_workarea { + struct work_struct sdma_activity_work; + struct kfd_process_device *pdd; + uint64_t sdma_current_activity_counter; +}; + +static void kfd_sdma_activity_worker(struct work_struct *work) +{ + struct kfd_sdma_activity_handler_workarea *workarea; + struct kfd_process_device *pdd; + uint64_t val; + struct mm_struct *mm; + struct queue *q; + struct qcm_process_device *qpd; + struct device_queue_manager *dqm; + int ret = 0; + + workarea = container_of(work, struct kfd_sdma_activity_handler_workarea, + sdma_activity_work); + if (!workarea) + return; + + pdd = workarea->pdd; + dqm = pdd->dev->dqm; + qpd = &pdd->qpd; + + if (!pdd || !dqm || !qpd) + return; + + mm = get_task_mm(pdd->process->lead_thread); + if (!mm) { + return; + } + + use_mm(mm); + + dqm_lock(dqm); + + list_for_each_entry(q, &qpd->queues_list, list) { + if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || + (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { + val = 0; + ret = read_sdma_queue_counter(q, &val); + if (ret) + pr_debug("Failed to read SDMA queue active " + "counter for queue id: %d", + q->properties.queue_id); + else + workarea->sdma_current_activity_counter += val; + } + } + + dqm_unlock(dqm); + unuse_mm(mm); + mmput(mm); +} + static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, char *buffer) { @@ -87,8 +148,25 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, } else if (strncmp(attr->name, "vram_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_vram); - if (pdd) - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); + return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); + } else if (strncmp(attr->name, "sdma_", 5) == 0) { + struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, + attr_sdma); + struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler; + + INIT_WORK(&sdma_activity_work_handler.sdma_activity_work, + kfd_sdma_activity_worker); + + sdma_activity_work_handler.pdd = pdd; + + schedule_work(&sdma_activity_work_handler.sdma_activity_work); + + flush_work(&sdma_activity_work_handler.sdma_activity_work); + + return snprintf(buffer, PAGE_SIZE, "%llu\n", + (READ_ONCE(pdd->sdma_past_activity_counter) + + sdma_activity_work_handler.sdma_current_activity_counter)/ + SDMA_ACTIVITY_DIVISOR); } else { pr_err("Invalid attribute"); return -EINVAL; @@ -210,7 +288,24 @@ int kfd_procfs_add_queue(struct queue *q) return 0; } -int kfd_procfs_add_vram_usage(struct kfd_process *p) +static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr, + char *name) +{ + int ret = 0; + + if (!p || !attr || !name) + return -EINVAL; + + attr->name = name; + attr->mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(attr); + + ret = sysfs_create_file(p->kobj, attr); + + return ret; +} + +int kfd_procfs_add_sysfs_files(struct kfd_process *p) { int ret = 0; struct kfd_process_device *pdd; @@ -221,17 +316,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p) if (!p->kobj) return -EFAULT; - /* Create proc/<pid>/vram_<gpuid> file for each GPU */ + /* + * Create sysfs files for each GPU: + * - proc/<pid>/vram_<gpuid> + * - proc/<pid>/sdma_<gpuid> + */ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u", + snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u", pdd->dev->id); - pdd->attr_vram.name = pdd->vram_filename; - pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&pdd->attr_vram); - ret = sysfs_create_file(p->kobj, &pdd->attr_vram); + ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename); if (ret) pr_warn("Creating vram usage for gpu id %d failed", (int)pdd->dev->id); + + snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u", + pdd->dev->id); + ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename); + if (ret) + pr_warn("Creating sdma usage for gpu id %d failed", + (int)pdd->dev->id); } return ret; @@ -444,9 +547,9 @@ struct kfd_process *kfd_create_process(struct file *filep) if (!process->kobj_queues) pr_warn("Creating KFD proc/queues folder failed"); - ret = kfd_procfs_add_vram_usage(process); + ret = kfd_procfs_add_sysfs_files(process); if (ret) - pr_warn("Creating vram usage file for pid %d failed", + pr_warn("Creating sysfs usage file for pid %d failed", (int)process->lead_thread->pid); } out: @@ -597,8 +700,10 @@ static void kfd_process_wq_release(struct work_struct *work) kobject_put(p->kobj_queues); p->kobj_queues = NULL; - list_for_each_entry(pdd, &p->per_device_data, per_device_list) + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { sysfs_remove_file(p->kobj, &pdd->attr_vram); + sysfs_remove_file(p->kobj, &pdd->attr_sdma); + } kobject_del(p->kobj); kobject_put(p->kobj); @@ -906,6 +1011,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->already_dequeued = false; pdd->runtime_inuse = false; pdd->vram_usage = 0; + pdd->sdma_past_activity_counter = 0; list_add(&pdd->per_device_list, &p->per_device_data); /* Init idr used for memory handle translation */ -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx