From: Rob Clark <robdclark@xxxxxxxxxxxx> Perfetto can use these traces to track global and per-process GPU memory usage. Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx> --- I realized the tracepoint that perfetto uses to show GPU memory usage globally and per-process was already upstream, but with no users. This overlaps a bit with fdinfo, but ftrace is a lighter weight mechanism and fits better with perfetto (plus is already supported in trace_processor and perfetto UI, whereas something fdinfo based would require new code to be added in perfetto. We could probably do this more globally (ie. drm_gem_get/put_pages() and drm_gem_handle_create_tail()/drm_gem_object_release_handle() if folks prefer. Not sure where that leaves the TTM drivers. drivers/gpu/drm/msm/Kconfig | 1 + drivers/gpu/drm/msm/msm_drv.h | 5 +++++ drivers/gpu/drm/msm/msm_gem.c | 37 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_gpu.h | 8 ++++++++ 4 files changed, 51 insertions(+) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index f202f26adab2..e4c912fcaf22 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -33,6 +33,7 @@ config DRM_MSM select PM_OPP select NVMEM select PM_GENERIC_DOMAINS + select TRACE_GPU_MEM help DRM/KMS driver for MSM/snapdragon. diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 16a7cbc0b7dd..cb8f7e804b5b 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -137,6 +137,11 @@ struct msm_drm_private { struct msm_rd_state *hangrd; /* debugfs to dump hanging submits */ struct msm_perf_state *perf; + /** + * total_mem: Total/global amount of memory backing GEM objects. + */ + atomic64_t total_mem; + /** * List of all GEM objects (mainly for debugfs, protected by obj_lock * (acquire before per GEM object lock) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 175ee4ab8a6f..e04c4af5d154 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -12,6 +12,9 @@ #include <linux/pfn_t.h> #include <drm/drm_prime.h> +#include <drm/drm_file.h> + +#include <trace/events/gpu_mem.h> #include "msm_drv.h" #include "msm_fence.h" @@ -33,6 +36,34 @@ static bool use_pages(struct drm_gem_object *obj) return !msm_obj->vram_node; } +static void update_device_mem(struct msm_drm_private *priv, ssize_t size) +{ + uint64_t total_mem = atomic64_add_return(size, &priv->total_mem); + trace_gpu_mem_total(0, 0, total_mem); +} + +static void update_ctx_mem(struct drm_file *file, ssize_t size) +{ + struct msm_file_private *ctx = file->driver_priv; + uint64_t ctx_mem = atomic64_add_return(size, &ctx->ctx_mem); + + rcu_read_lock(); /* Locks file->pid! */ + trace_gpu_mem_total(0, pid_nr(file->pid), ctx_mem); + rcu_read_unlock(); + +} + +static int msm_gem_open(struct drm_gem_object *obj, struct drm_file *file) +{ + update_ctx_mem(file, obj->size); + return 0; +} + +static void msm_gem_close(struct drm_gem_object *obj, struct drm_file *file) +{ + update_ctx_mem(file, -obj->size); +} + /* * Cache sync.. this is a bit over-complicated, to fit dma-mapping * API. Really GPU cache is out of scope here (handled on cmdstream) @@ -156,6 +187,8 @@ static struct page **get_pages(struct drm_gem_object *obj) return p; } + update_device_mem(dev->dev_private, obj->size); + msm_obj->pages = p; msm_obj->sgt = drm_prime_pages_to_sg(obj->dev, p, npages); @@ -209,6 +242,8 @@ static void put_pages(struct drm_gem_object *obj) msm_obj->sgt = NULL; } + update_device_mem(obj->dev->dev_private, -obj->size); + if (use_pages(obj)) drm_gem_put_pages(obj, msm_obj->pages, true, false); else @@ -1118,6 +1153,8 @@ static const struct vm_operations_struct vm_ops = { static const struct drm_gem_object_funcs msm_gem_object_funcs = { .free = msm_gem_free_object, + .open = msm_gem_open, + .close = msm_gem_close, .pin = msm_gem_prime_pin, .unpin = msm_gem_prime_unpin, .get_sg_table = msm_gem_prime_get_sg_table, diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 2bfcb222e353..f7d2a7d6f8cc 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -428,6 +428,14 @@ struct msm_file_private { * level. */ struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS]; + + /** + * ctx_mem: + * + * Total amount of memory of GEM buffers with handles attached for + * this context. + */ + atomic64_t ctx_mem; }; /** -- 2.44.0