Am 07.04.21 um 15:31 schrieb Roy Sun:
Tracking devices, process info and fence info using
/proc/pid/fdinfo
Signed-off-by: David M Nieto <David.Nieto@xxxxxxx>
Signed-off-by: Roy Sun <Roy.Sun@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 247 +++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h | 51 +++++
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 1 +
6 files changed, 306 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8aba636..d216b7ecb5d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,6 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o
+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
+
amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
# add asic specific block
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 616c85a01299..c2338a0dd1f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,6 +107,7 @@
#include "amdgpu_gfxhub.h"
#include "amdgpu_df.h"
#include "amdgpu_smuio.h"
+#include "amdgpu_fdinfo.h"
#define MAX_GPU_INSTANCE 16
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4bcc03c4c6c5..07aed377dec8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -42,7 +42,7 @@
#include "amdgpu_irq.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_sched.h"
-
+#include "amdgpu_fdinfo.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
@@ -1691,6 +1691,9 @@ static const struct file_operations amdgpu_driver_kms_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = amdgpu_kms_compat_ioctl,
#endif
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = amdgpu_show_fdinfo
+#endif
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
new file mode 100644
index 000000000000..72e61a89c0ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -0,0 +1,247 @@
Still no SPDX license tag.
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: David Nieto
+ * Roy Sun
+ */
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/reboot.h>
+#include <linux/syscalls.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+
+#include "amdgpu.h"
+#include "amdgpu_fdinfo.h"
+
+
+static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
+ [AMDGPU_HW_IP_GFX] = "gfx",
+ [AMDGPU_HW_IP_COMPUTE] = "compute",
+ [AMDGPU_HW_IP_DMA] = "dma",
+ [AMDGPU_HW_IP_UVD] = "dec",
+ [AMDGPU_HW_IP_VCE] = "enc",
+ [AMDGPU_HW_IP_UVD_ENC] = "enc_1",
+ [AMDGPU_HW_IP_VCN_DEC] = "dec",
+ [AMDGPU_HW_IP_VCN_ENC] = "enc",
+ [AMDGPU_HW_IP_VCN_JPEG] = "jpeg",
+};
+
+uint64_t amdgpu_get_proc_mem(struct drm_file *file, struct amdgpu_fpriv *fpriv)
+{
+ int id;
+ struct drm_gem_object *gobj;
+ uint64_t total = 0;
+
+ spin_lock(&file->table_lock);
+ idr_for_each_entry(&file->object_idr, gobj, id) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+ unsigned int domain = amdgpu_mem_type_to_domain(
+ bo->tbo.mem.mem_type);
+
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM)
+ total += amdgpu_bo_size(bo);
Why only VRAM memory?
+ }
+ spin_unlock(&file->table_lock);
This should probably be as helper in the amdgpu_gem.c file.
+
+ if (fpriv->vm.process_info) {
+ struct kgd_mem *mem;
+
+ mutex_lock(&fpriv->vm.process_info->lock);
+ list_for_each_entry(mem, &fpriv->vm.process_info->kfd_bo_list,
+ validate_list.head) {
+ struct amdgpu_bo *bo = mem->bo;
+ unsigned int domain = amdgpu_mem_type_to_domain(
+ bo->tbo.mem.mem_type);
+
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM)
+ total += amdgpu_bo_size(bo);
+ }
+
+ list_for_each_entry(mem, &fpriv->vm.process_info->userptr_valid_list,
+ validate_list.head) {
+ struct amdgpu_bo *bo = mem->bo;
+ unsigned int domain = amdgpu_mem_type_to_domain(
+ bo->tbo.mem.mem_type);
+
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM)
+ total += amdgpu_bo_size(bo);
+ }
+
+ mutex_unlock(&fpriv->vm.process_info->lock);
This needs to be moved into the VM or even better the
amdkfd_process_info code.
+ }
+
+ return total;
+}
+
+uint64_t amdgpu_get_fence_usage(struct amdgpu_fpriv *fpriv, uint32_t hwip,
+ uint32_t idx, uint64_t *elapsed)
+{
+ struct amdgpu_ctx_entity *centity;
+ struct idr *idp;
+ struct amdgpu_ctx *ctx;
+ uint32_t id, i;
+ uint64_t now, t1, t2;
+ uint64_t total = 0, min = 0;
+
+
+ if (idx >= AMDGPU_MAX_ENTITY_NUM)
+ return 0;
+
+ idp = &fpriv->ctx_mgr.ctx_handles;
+
+ mutex_lock(&fpriv->ctx_mgr.lock);
+ idr_for_each_entry(idp, ctx, id) {
+ if (!ctx->entities[hwip][idx])
+ continue;
+
+ centity = ctx->entities[hwip][idx];
+
+ for (i = 0; i < amdgpu_sched_jobs; i++) {
+ struct dma_fence *fence;
+ struct drm_sched_fence *s_fence;
+
+ spin_lock(&ctx->ring_lock);
+ fence = dma_fence_get(centity->fences[i]);
+ spin_unlock(&ctx->ring_lock);
That is quite racy.
+ if (!fence)
+ continue;
+ s_fence = to_drm_sched_fence(fence);
+ if (!dma_fence_is_signaled(&s_fence->scheduled))
+ continue;
+ now = ktime_to_ns(ktime_get());
+ t1 = ktime_to_ns(s_fence->scheduled.timestamp);
+ t2 = !dma_fence_is_signaled(&s_fence->finished) ?
+ 0 : ktime_to_ns(s_fence->finished.timestamp);
+ dma_fence_put(fence);
+
+ t1 = now - t1;
+ t2 = (t2 == 0) ? 0 : now - t2;
+
+ total += t1 - t2;
+ if (t1 > min)
+ min = t1;
Please use ktime for the calculation here.
+ }
+
+ }
+
+ mutex_unlock(&fpriv->ctx_mgr.lock);
Again that needs to be in the ctx handling code in general.
+
+ if (elapsed)
+ *elapsed = min;
+
+ return total;
+}
+
+uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id)
+{
+ enum amd_ip_block_type type;
+ uint32_t count = 0;
+ int i;
+
+ switch (id) {
+ case AMDGPU_HW_IP_GFX:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ type = AMD_IP_BLOCK_TYPE_SDMA;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ type = AMD_IP_BLOCK_TYPE_VCE;
+ break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ case AMDGPU_HW_IP_VCN_ENC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = (amdgpu_device_ip_get_ip_block(adev,
+ AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ default:
+ return 0;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++)
+ if (adev->ip_blocks[i].version->type == type &&
+ adev->ip_blocks[i].status.valid &&
+ count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
+ count++;
+ return count;
+
+}
Please just completely drop that and use AMDGPU_HW_IP_* directly.
We certainly don't want decode and encode show up as one value.
+
+void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
+{
+ struct amdgpu_fpriv *fpriv;
+ uint32_t bus, dev, fn, i;
+
+ if (amdgpu_file_to_fpriv(f, &fpriv))
+ return;
+ struct drm_file *file = f->private_data;
+ struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
+
+ bus = adev->pdev->bus->number;
+ dev = PCI_SLOT(adev->pdev->devfn);
+ fn = PCI_FUNC(adev->pdev->devfn);
+ seq_printf(m, "pdev:\t%02x:%02x.%d\npasid:\t%u\n", bus, dev, fn,
+ fpriv->vm.pasid);
+
+ seq_printf(m, "mem:\t%llu kB\n", amdgpu_get_proc_mem(file, fpriv)/1024UL);
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
+ uint32_t enabled = amdgpu_get_ip_count(adev, i);
+ uint32_t count = amdgpu_ctx_num_entities[i];
+ int idx = 0;
+ uint64_t total = 0, min = 0;
+ uint32_t perc, frac;
+
+ if (enabled) {
Please rather use "if (!enabled) continue style here.
Regards,
Christian.
+ for (idx = 0; idx < count; idx++) {
+ total = amdgpu_get_fence_usage(fpriv,
+ i, idx, &min);
+
+ if ((total == 0) || (min == 0))
+ continue;
+
+ perc = div64_u64(10000 * total, min);
+ frac = perc % 100;
+
+ seq_printf(m, "%s%d:\t%d.%d%%\n",
+ amdgpu_ip_name[i],
+ idx, perc/100, frac);
+ }
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
new file mode 100644
index 000000000000..4d73a820f7e7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: David Nieto
+ * Roy Sun
+ */
+#ifndef __AMDGPU_SMI_H__
+#define __AMDGPU_SMI_H__
+
+#include <linux/idr.h>
+#include <linux/kfifo.h>
+#include <linux/rbtree.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <linux/sched/mm.h>
+
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ids.h"
+
+struct amdgpu_proc;
+struct amdgpu_ctx;
+uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id);
+
+uint64_t amdgpu_get_fence_usage(struct amdgpu_fpriv *fpriv, uint32_t hwip,
+ uint32_t idx, uint64_t *elapsed);
+
+uint64_t amdgpu_get_proc_mem(struct drm_file *file, struct amdgpu_fpriv *fpriv);
+
+void amdgpu_show_fdinfo(struct seq_file *m, struct file *f);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 39ee88d29cca..b2e774aeab45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -41,6 +41,7 @@
#include "amdgpu_gem.h"
#include "amdgpu_display.h"
#include "amdgpu_ras.h"
+#include "amdgpu_fdinfo.h"
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx