This patch attempts to make use of the hardware counters for GPU busy % estimation when possible and skip using the software counters as it also accounts for software side delays. This should help give more accurate representation of the GPU workload. Signed-off-by: Sharat Masetty <smasetty@xxxxxxxxxxxxxx> --- drivers/gpu/drm/msm/msm_gpu.c | 30 ++++++++++++++++++++++++++---- drivers/gpu/drm/msm/msm_gpu.h | 5 +++-- drivers/gpu/drm/msm/msm_perf.c | 10 +++++----- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index e9b5426..a896541 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -592,6 +592,9 @@ static void update_sw_cntrs(struct msm_gpu *gpu) uint32_t elapsed; unsigned long flags; + if (gpu->funcs->gpu_busy) + return; + spin_lock_irqsave(&gpu->perf_lock, flags); if (!gpu->perfcntr_active) goto out; @@ -620,6 +623,7 @@ void msm_gpu_perfcntr_start(struct msm_gpu *gpu) /* we could dynamically enable/disable perfcntr registers too.. */ gpu->last_sample.active = msm_gpu_active(gpu); gpu->last_sample.time = ktime_get(); + gpu->last_sample.busy_cycles = 0; gpu->activetime = gpu->totaltime = 0; gpu->perfcntr_active = true; update_hw_cntrs(gpu, 0, NULL); @@ -632,9 +636,22 @@ void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) pm_runtime_put_sync(&gpu->pdev->dev); } +static void msm_gpu_hw_sample(struct msm_gpu *gpu, uint64_t *activetime, + uint64_t *totaltime) +{ + ktime_t time; + + *activetime = gpu->funcs->gpu_busy(gpu, + &gpu->last_sample.busy_cycles); + + time = ktime_get(); + *totaltime = ktime_us_delta(time, gpu->last_sample.time); + gpu->last_sample.time = time; +} + /* returns -errno or # of cntrs sampled */ -int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, - uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) +int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint64_t *activetime, + uint64_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) { unsigned long flags; int ret; @@ -646,13 +663,18 @@ int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, goto out; } + ret = update_hw_cntrs(gpu, ncntrs, cntrs); + + if (gpu->funcs->gpu_busy) { + msm_gpu_hw_sample(gpu, activetime, totaltime); + goto out; + } + *activetime = gpu->activetime; *totaltime = gpu->totaltime; gpu->activetime = gpu->totaltime = 0; - ret = update_hw_cntrs(gpu, ncntrs, cntrs); - out: spin_unlock_irqrestore(&gpu->perf_lock, flags); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 0ff23ca..7dc775f 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -90,6 +90,7 @@ struct msm_gpu { struct { bool active; ktime_t time; + u64 busy_cycles; } last_sample; uint32_t totaltime, activetime; /* sw counters */ uint32_t last_cntrs[5]; /* hw counters */ @@ -275,8 +276,8 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) void msm_gpu_perfcntr_start(struct msm_gpu *gpu); void msm_gpu_perfcntr_stop(struct msm_gpu *gpu); -int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, - uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); +int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint64_t *activetime, + uint64_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); void msm_gpu_retire(struct msm_gpu *gpu); void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c index 5ab21bd..318f7dd 100644 --- a/drivers/gpu/drm/msm/msm_perf.c +++ b/drivers/gpu/drm/msm/msm_perf.c @@ -17,7 +17,7 @@ /* For profiling, userspace can: * - * tail -f /sys/kernel/debug/dri/<minor>/gpu + * tail -f /sys/kernel/debug/dri/<minor>/perf * * This will enable performance counters/profiling to track the busy time * and any gpu specific performance counters that are supported. @@ -85,9 +85,9 @@ static int refill_buf(struct msm_perf_state *perf) } } else { /* Sample line: */ - uint32_t activetime = 0, totaltime = 0; + uint64_t activetime = 0, totaltime = 0; uint32_t cntrs[5]; - uint32_t val; + uint64_t val; int ret; /* sleep until next sample time: */ @@ -101,14 +101,14 @@ static int refill_buf(struct msm_perf_state *perf) return ret; val = totaltime ? 1000 * activetime / totaltime : 0; - n = snprintf(ptr, rem, "%3d.%d%%", val / 10, val % 10); + n = snprintf(ptr, rem, "%3llu.%llu%%", val / 10, val % 10); ptr += n; rem -= n; for (i = 0; i < ret; i++) { /* cycle counters (I think).. convert to MHz.. */ val = cntrs[i] / 10000; - n = snprintf(ptr, rem, "\t%5d.%02d", + n = snprintf(ptr, rem, "\t%5llu.%02llu", val / 100, val % 100); ptr += n; rem -= n; -- 1.9.1