[PATCH] drm/amdgpu: add UAPI for workload profile to ctx interface

Alex Deucher <alexander.deucher@xxxxxxx> · Fri, 7 Mar 2025 13:15:39 -0500

Allow a rendering context to set a workload profile.  This
allows an application to select a workload profile to match
its intended use case.  Each rendering context can set a
profile and internally the SMU firmware will select the highest
priority profile among those that are active.  When the
context is destroyed, the profile is automatically cleaned up.

E.g., a compositor can select the fullscreen3d hint when it
unredirects a fullscreen game.  This hint tweaks allows the
driver to tweak the performance for a specific workload associated
with the rendering context.

Initial proposed userspace:
https://gitlab.freedesktop.org/monado/monado/-/merge_requests/2371

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3802
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 82 ++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  2 +
 include/uapi/drm/amdgpu_drm.h           |  9 +++
 3 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c43d1b6e5d66b..ccbfe4adbf9e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -339,6 +339,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
 	ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
 	ctx->init_priority = priority;
 	ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
+	ctx->workload_profile = PP_SMC_POWER_PROFILE_COUNT;
 
 	r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
 	if (r)
@@ -404,6 +405,49 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
 	return r;
 }
 
+static int amdgpu_ctx_set_workload_profile(struct amdgpu_ctx *ctx,
+					   u32 workload_profile)
+{
+	struct amdgpu_device *adev = ctx->mgr->adev;
+	enum PP_SMC_POWER_PROFILE profile;
+	int r;
+
+	switch (workload_profile) {
+	case AMDGPU_CTX_WORKLOAD_PROFILE_DEFAULT:
+		profile = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+		break;
+	case AMDGPU_CTX_WORKLOAD_PROFILE_FULLSCREEN3D:
+		profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+		break;
+	case AMDGPU_CTX_WORKLOAD_PROFILE_VIDEO:
+		profile = PP_SMC_POWER_PROFILE_VIDEO;
+		break;
+	case AMDGPU_CTX_WORKLOAD_PROFILE_VR:
+		profile = PP_SMC_POWER_PROFILE_VR;
+		break;
+	case AMDGPU_CTX_WORKLOAD_PROFILE_COMPUTE:
+		profile = PP_SMC_POWER_PROFILE_COMPUTE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ctx->workload_profile != PP_SMC_POWER_PROFILE_COUNT) {
+		r = amdgpu_dpm_switch_power_profile(adev, ctx->workload_profile,
+						    false);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_dpm_switch_power_profile(adev, profile, true);
+	if (r)
+		return r;
+
+	ctx->workload_profile = profile;
+
+	return r;
+}
+
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
@@ -425,6 +469,9 @@ static void amdgpu_ctx_fini(struct kref *ref)
 
 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
 		amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
+		if (ctx->workload_profile != PP_SMC_POWER_PROFILE_COUNT)
+			amdgpu_dpm_switch_power_profile(adev, ctx->workload_profile,
+							false);
 		drm_dev_exit(idx);
 	}
 
@@ -662,11 +709,36 @@ static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
 	return r;
 }
 
+static int amdgpu_ctx_workload_profile(struct amdgpu_device *adev,
+				       struct amdgpu_fpriv *fpriv, uint32_t id,
+				       u32 workload_profile)
+{
+	struct amdgpu_ctx *ctx;
+	struct amdgpu_ctx_mgr *mgr;
+	int r;
+
+	if (!fpriv)
+		return -EINVAL;
+
+	mgr = &fpriv->ctx_mgr;
+	mutex_lock(&mgr->lock);
+	ctx = idr_find(&mgr->ctx_handles, id);
+	if (!ctx) {
+		mutex_unlock(&mgr->lock);
+		return -EINVAL;
+	}
+
+	r = amdgpu_ctx_set_workload_profile(ctx, workload_profile);
+
+	mutex_unlock(&mgr->lock);
+	return r;
+}
+
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp)
 {
 	int r;
-	uint32_t id, stable_pstate;
+	uint32_t id, stable_pstate, workload_profile;
 	int32_t priority;
 
 	union drm_amdgpu_ctx *args = data;
@@ -720,6 +792,14 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 			return -EINVAL;
 		r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
 		break;
+	case AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE:
+		if (args->in.flags & ~AMDGPU_CTX_WORKLOAD_PROFILE_FLAGS_MASK)
+			return -EINVAL;
+		workload_profile = args->in.flags & AMDGPU_CTX_WORKLOAD_PROFILE_FLAGS_MASK;
+		if (workload_profile > AMDGPU_CTX_WORKLOAD_PROFILE_COMPUTE)
+			return -EINVAL;
+		r = amdgpu_ctx_workload_profile(adev, fpriv, id, workload_profile);
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 85376baaa92f2..1b42e6757fac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -27,6 +27,7 @@
 #include <linux/types.h>
 
 #include "amdgpu_ring.h"
+#include "kgd_pp_interface.h"
 
 struct drm_device;
 struct drm_file;
@@ -58,6 +59,7 @@ struct amdgpu_ctx {
 	unsigned long			ras_counter_ue;
 	uint32_t			stable_pstate;
 	struct amdgpu_ctx_mgr		*ctx_mgr;
+	enum PP_SMC_POWER_PROFILE	workload_profile;
 };
 
 struct amdgpu_ctx_mgr {
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 5dbd9037afe75..9fcf81ab39a94 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -247,6 +247,7 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_OP_QUERY_STATE2	4
 #define AMDGPU_CTX_OP_GET_STABLE_PSTATE	5
 #define AMDGPU_CTX_OP_SET_STABLE_PSTATE	6
+#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE	7
 
 /* GPU reset status */
 #define AMDGPU_CTX_NO_RESET		0
@@ -289,6 +290,14 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
 #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
 
+/* select a workload profile for applications */
+#define AMDGPU_CTX_WORKLOAD_PROFILE_FLAGS_MASK  0xf
+#define AMDGPU_CTX_WORKLOAD_PROFILE_DEFAULT  0
+#define AMDGPU_CTX_WORKLOAD_PROFILE_FULLSCREEN3D  1
+#define AMDGPU_CTX_WORKLOAD_PROFILE_VIDEO  2
+#define AMDGPU_CTX_WORKLOAD_PROFILE_VR  3
+#define AMDGPU_CTX_WORKLOAD_PROFILE_COMPUTE  4
+
 struct drm_amdgpu_ctx_in {
 	/** AMDGPU_CTX_OP_* */
 	__u32	op;
-- 
2.48.1