+
+ ndev = xdna->dev_handle;
+ ndev->dft_dpm_level = dpm_level;
+ if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
+ return 0;
+
+ return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+}
+
static struct xrs_action_ops aie2_xrs_actions = {
.load = aie2_xrs_load,
.unload = aie2_xrs_unload,
+ .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
};
static void aie2_hw_stop(struct amdxdna_dev *xdna)
@@ -354,6 +368,12 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
goto stop_psp;
}
+ ret = aie2_pm_init(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
ret = aie2_mgmt_fw_init(ndev);
if (ret) {
XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
@@ -480,10 +500,9 @@ static int aie2_init(struct amdxdna_dev *xdna)
}
ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
- xrs_cfg.clk_list.num_levels = 3;
- xrs_cfg.clk_list.cu_clk_list[0] = 0;
- xrs_cfg.clk_list.cu_clk_list[1] = 800;
- xrs_cfg.clk_list.cu_clk_list[2] = 1000;
+ xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
+ for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
+ xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
xrs_cfg.sys_eff_factor = 1;
xrs_cfg.ddev = &xdna->ddev;
xrs_cfg.actions = &aie2_xrs_actions;
@@ -657,6 +676,22 @@ static int aie2_get_firmware_version(struct amdxdna_client *client,
return 0;
}
+static int aie2_get_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_get_info *args)
+{
+ struct amdxdna_drm_get_power_mode mode = {};
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_dev_hdl *ndev;
+
+ ndev = xdna->dev_handle;
+ mode.power_mode = ndev->pw_mode;
+
+ if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int aie2_get_clock_metadata(struct amdxdna_client *client,
struct amdxdna_drm_get_info *args)
{
@@ -670,11 +705,11 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
if (!clock)
return -ENOMEM;
- memcpy(clock->mp_npu_clock.name, ndev->mp_npu_clock.name,
- sizeof(clock->mp_npu_clock.name));
- clock->mp_npu_clock.freq_mhz = ndev->mp_npu_clock.freq_mhz;
- memcpy(clock->h_clock.name, ndev->h_clock.name, sizeof(clock->h_clock.name));
- clock->h_clock.freq_mhz = ndev->h_clock.freq_mhz;
+ snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
+ "MP-NPU Clock");
+ clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
+ snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
+ clock->h_clock.freq_mhz = ndev->hclk_freq;
if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
ret = -EFAULT;
@@ -772,6 +807,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
ret = aie2_get_firmware_version(client, args);
break;
+ case DRM_AMDXDNA_GET_POWER_MODE:
+ ret = aie2_get_power_mode(client, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
@@ -782,12 +820,58 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
return ret;
}
+static int aie2_set_power_mode(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_drm_set_power_mode power_state;
+ enum amdxdna_power_mode_type power_mode;
+ struct amdxdna_dev *xdna = client->xdna;
+
+ if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
+ sizeof(power_state))) {
+ XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
+ return -EFAULT;
+ }
+
+ power_mode = power_state.power_mode;
+ if (power_mode > POWER_MODE_TURBO) {
+ XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
+ return -EINVAL;
+ }
+
+ return aie2_pm_set_mode(xdna->dev_handle, power_mode);
+}
+
+static int aie2_set_state(struct amdxdna_client *client,
+ struct amdxdna_drm_set_state *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ int ret, idx;
+
+ if (!drm_dev_enter(&xdna->ddev, &idx))
+ return -ENODEV;
+
+ switch (args->param) {
+ case DRM_AMDXDNA_SET_POWER_MODE:
+ ret = aie2_set_power_mode(client, args);
+ break;
+ default:
+ XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ drm_dev_exit(idx);
+ return ret;
+}
+
const struct amdxdna_dev_ops aie2_ops = {
.init = aie2_init,
.fini = aie2_fini,
.resume = aie2_hw_start,
.suspend = aie2_hw_stop,
.get_aie_info = aie2_get_info,
+ .set_aie_state = aie2_set_state,
.hwctx_init = aie2_hwctx_init,
.hwctx_fini = aie2_hwctx_fini,
.hwctx_config = aie2_hwctx_config,
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 1c6f07d9b805..8c17b74654ce 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -6,6 +6,7 @@
#ifndef _AIE2_PCI_H_
#define _AIE2_PCI_H_
+#include <drm/amdxdna_accel.h>
#include <linux/semaphore.h>
#include "amdxdna_mailbox.h"
@@ -48,9 +49,6 @@
pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
})
-#define SMU_MPNPUCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_mpnpuclk_freq_max)
-#define SMU_HCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_hclk_freq_max)
-
enum aie2_smu_reg_idx {
SMU_CMD_REG = 0,
SMU_ARG_REG,
@@ -112,14 +110,20 @@ struct aie_metadata {
struct aie_tile_metadata shim;
};
-struct clock_entry {
- char name[16];
- u32 freq_mhz;
+enum rt_config_category {
+ AIE2_RT_CFG_INIT,
+ AIE2_RT_CFG_CLK_GATING,
};
struct rt_config {
u32 type;
u32 value;
+ u32 category;
+};
+
+struct dpm_clk_freq {
+ u32 npuclk;
+ u32 hclk;
};
/*
@@ -150,6 +154,7 @@ struct amdxdna_hwctx_priv {
};
enum aie2_dev_status {
+ AIE2_DEV_UNINIT,
AIE2_DEV_INIT,
AIE2_DEV_START,
};
@@ -169,8 +174,15 @@ struct amdxdna_dev_hdl {
u32 total_col;
struct aie_version version;
struct aie_metadata metadata;
- struct clock_entry mp_npu_clock;
- struct clock_entry h_clock;
+
+ /* power management and clock*/
+ enum amdxdna_power_mode_type pw_mode;
+ u32 dpm_level;
+ u32 dft_dpm_level;
+ u32 max_dpm_level;
+ u32 clk_gating;
+ u32 npuclk_freq;
+ u32 hclk_freq;
/* Mailbox and the management channel */
struct mailbox *mbox;
@@ -178,6 +190,7 @@ struct amdxdna_dev_hdl {
struct async_events *async_events;
enum aie2_dev_status dev_status;
+ u32 hwctx_num;
};
#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
@@ -188,11 +201,17 @@ struct aie2_bar_off_pair {
u32 offset;
};
+struct aie2_hw_ops {
+ int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+};
+
struct amdxdna_dev_priv {
const char *fw_path;
u64 protocol_major;
u64 protocol_minor;
- struct rt_config rt_config;
+ const struct rt_config *rt_config;
+ const struct dpm_clk_freq *dpm_clk_tbl;
+
#define COL_ALIGN_NONE 0
#define COL_ALIGN_NATURE 1
u32 col_align;
@@ -203,15 +222,29 @@ struct amdxdna_dev_priv {
struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS];
- u32 smu_mpnpuclk_freq_max;
- u32 smu_hclk_freq_max;
+ struct aie2_hw_ops hw_ops;
};
extern const struct amdxdna_dev_ops aie2_ops;
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+ enum rt_config_category category, u32 *val);
+
+/* aie2 npu hw config */
+extern const struct dpm_clk_freq npu1_dpm_clk_table[];
+extern const struct dpm_clk_freq npu4_dpm_clk_table[];
+extern const struct rt_config npu1_default_rt_cfg[];
+extern const struct rt_config npu4_default_rt_cfg[];
+
/* aie2_smu.c */
int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+
+/* aie2_pm.c */
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
/* aie2_psp.c */
struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
new file mode 100644
index 000000000000..426c38fce848
--- /dev/null
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+
+#define AIE2_CLK_GATING_ENABLE 1
+#define AIE2_CLK_GATING_DISABLE 0
+
+static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
+{
+ int ret;
+
+ ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
+ if (ret)
+ return ret;
+
+ ndev->clk_gating = val;
+ return 0;
+}
+
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ if (ndev->dev_status != AIE2_DEV_UNINIT) {
+ /* Resume device */
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
+ if (ret)
+ return ret;
+
+ return 0;
+ }
+
+ while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
+ ndev->max_dpm_level++;
+ ndev->max_dpm_level--;
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
+ if (ret)
+ return ret;
+
+ if (ndev->pw_mode == target)
+ return 0;
+
+ switch (target) {
+ case POWER_MODE_TURBO:
+ if (ndev->hwctx_num) {
+ XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
+ return -EINVAL;
+ }
+
+ clk_gating = AIE2_CLK_GATING_DISABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_HIGH:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->max_dpm_level;
+ break;
+ case POWER_MODE_DEFAULT:
+ clk_gating = AIE2_CLK_GATING_ENABLE;
+ dpm_level = ndev->dft_dpm_level;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+ if (ret)
+ return ret;
+
+ ret = aie2_pm_set_clk_gating(ndev, clk_gating);
+ if (ret)
+ return ret;
+
+ ndev->pw_mode = target;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
index 91893d438da7..73388443c676 100644
--- a/drivers/accel/amdxdna/aie2_smu.c
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -19,8 +19,11 @@
#define AIE2_SMU_POWER_OFF 0x4
#define AIE2_SMU_SET_MPNPUCLK_FREQ 0x5
#define AIE2_SMU_SET_HCLK_FREQ 0x6
+#define AIE2_SMU_SET_SOFT_DPMLEVEL 0x7
+#define AIE2_SMU_SET_HARD_DPMLEVEL 0x8
-static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
+static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
+ u32 reg_arg, u32 *out)
{
u32 resp;
int ret;
@@ -40,6 +43,9 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
return ret;
}
+ if (out)
+ *out = readl(SMU_REG(ndev, SMU_OUT_REG));
+
if (resp != SMU_RESULT_OK) {
XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
return -EINVAL;
@@ -48,63 +54,71 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
return 0;
}
-static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
+ u32 freq;
int ret;
- if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) {
- XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz);
- return -EINVAL;
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
}
+ ndev->npuclk_freq = freq;
- ndev->mp_npu_clock.freq_mhz = freq_mhz;
- ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz);
- if (!ret)
- XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", freq_mhz);
-
- return ret;
-}
-
-static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
-{
- int ret;
-
- if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) {
- XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz);
- return -EINVAL;
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
+ if (ret) {
+ XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
+ ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
}
+ ndev->hclk_freq = freq;
+ ndev->dpm_level = dpm_level;
- ndev->h_clock.freq_mhz = freq_mhz;
- ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz);
- if (!ret)
- XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", freq_mhz);
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
- return ret;
+ return 0;
}
-int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
int ret;
- ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0);
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
if (ret) {
- XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
+ XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
+ dpm_level, ret);
return ret;
}
- ret = aie2_smu_set_mpnpu_clock_freq(ndev, SMU_MPNPUCLK_FREQ_MAX(ndev));
+ ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
if (ret) {
- XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret);
+ XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
+ dpm_level, ret);
return ret;
}
- snprintf(ndev->mp_npu_clock.name, sizeof(ndev->mp_npu_clock.name), "MP-NPU Clock");
- ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev));
+ ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
+ ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
+ ndev->dpm_level = dpm_level;
+
+ XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+ ndev->npuclk_freq, ndev->hclk_freq);
+
+ return 0;
+}
+
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+{
+ int ret;
+
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
if (ret) {
- XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret);
+ XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
return ret;
}
- snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H Clock");
return 0;
}
@@ -113,7 +127,8 @@ void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
{
int ret;
- ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0);
+ ndev->priv->hw_ops.set_dpm(ndev, 0);
+ ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
if (ret)
XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
}
diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/amdxdna/aie2_solver.c
index a537c66589a4..1939625d6027 100644
--- a/drivers/accel/amdxdna/aie2_solver.c
+++ b/drivers/accel/amdxdna/aie2_solver.c
@@ -25,6 +25,7 @@ struct solver_node {
struct partition_node *pt_node;
void *cb_arg;
+ u32 dpm_level;
u32 cols_len;
u32 start_cols[] __counted_by(cols_len);
};
@@ -95,6 +96,51 @@ static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
return 0;
}
+static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
+{
+ /*
+ * gops is retrieved from the xmodel, so it's always set
+ * fps and latency are the configurable params from the application
+ */
+ if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0))
+ return true;
+
+ return false;
+}
+
+static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
+{
+ struct solver_rgroup *rgp = &xrs->rgp;
+ struct cdo_parts *cdop = &req->cdo;
+ struct aie_qos *rqos = &req->rqos;
+ u32 freq, max_dpm_level, level;
+ struct solver_node *node;
+
+ max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
+ /* If no QoS parameters are passed, set it to the max DPM level */
+ if (!is_valid_qos_dpm_params(rqos)) {
+ level = max_dpm_level;
+ goto set_dpm;
+ }
+
+ /* Find one CDO group that meet the GOPs requirement. */
+ for (level = 0; level < max_dpm_level; level++) {
+ freq = xrs->cfg.clk_list.cu_clk_list[level];
+ if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
+ break;
+ }
+
+ /* set the dpm level which fits all the sessions */
+ list_for_each_entry(node, &rgp->node_list, list) {
+ if (node->dpm_level > level)
+ level = node->dpm_level;
+ }
+
+set_dpm:
+ *dpm_level = level;
+ return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
+}
+
static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
{
struct solver_node *node;
@@ -159,12 +205,9 @@ static int get_free_partition(struct solver_state *xrs,
pt_node->ncols = ncols;
/*
- * Before fully support latency in QoS, if a request
- * specifies a non-zero latency value, it will not share
- * the partition with other requests.
+ * Always set exclusive to false for now.
*/
- if (req->rqos.latency)
- pt_node->exclusive = true;
+ pt_node->exclusive = false;
list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
xrs->rgp.npartition_node++;
@@ -257,6 +300,7 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
struct xrs_action_load load_act;
struct solver_node *snode;
struct solver_state *xrs;
+ u32 dpm_level;
int ret;
xrs = (struct solver_state *)hdl;
@@ -281,6 +325,11 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
if (ret)
goto free_node;
+ ret = set_dpm_level(xrs, req, &dpm_level);
+ if (ret)
+ goto free_node;
+
+ snode->dpm_level = dpm_level;
snode->cb_arg = cb_arg;
drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/amdxdna/aie2_solver.h
index 9b1847bb46a6..a2e3c52229e9 100644
--- a/drivers/accel/amdxdna/aie2_solver.h
+++ b/drivers/accel/amdxdna/aie2_solver.h
@@ -99,6 +99,7 @@ struct clk_list_info {
struct xrs_action_ops {
int (*load)(void *cb_arg, struct xrs_action_load *action);
int (*unload)(void *cb_arg);
+ int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
};
/*
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index c3541796d189..6bbd437d48d8 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -160,6 +160,24 @@ static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct
return ret;
}
+static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_drm_set_state *args = data;
+ int ret;
+
+ if (!xdna->dev_info->ops->set_aie_state)
+ return -EOPNOTSUPP;
+
+ XDNA_DBG(xdna, "Request parameter %u", args->param);
+ mutex_lock(&xdna->dev_lock);
+ ret = xdna->dev_info->ops->set_aie_state(client, args);
+ mutex_unlock(&xdna->dev_lock);
+
+ return ret;
+}
+
static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
/* Context */
DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
@@ -173,6 +191,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
/* AIE hardware */
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
};
static const struct file_operations amdxdna_fops = {
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index f5b830fb14bb..e2071e31d949 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv;
struct amdxdna_client;
struct amdxdna_dev;
struct amdxdna_drm_get_info;
+struct amdxdna_drm_set_state;
struct amdxdna_gem_obj;
struct amdxdna_hwctx;
struct amdxdna_sched_job;
@@ -40,6 +41,7 @@ struct amdxdna_dev_ops {
void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
+ int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
};
/*
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index f00c50461b09..c8f4d1cac65d 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -44,18 +44,30 @@
#define NPU1_SMU_BAR_BASE MPNPU_APERTURE0_BASE
#define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
-#define NPU1_RT_CFG_TYPE_PDI_LOAD 2
-#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1
+const struct rt_config npu1_default_rt_cfg[] = {
+ { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 0 },
+};
-#define NPU1_MPNPUCLK_FREQ_MAX 600
-#define NPU1_HCLK_FREQ_MAX 1024
+const struct dpm_clk_freq npu1_dpm_clk_table[] = {
+ {400, 800},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {600, 1024},
+ {720, 1309},
+ {720, 1309},
+ {847, 1600},
+ { 0 }
+};
const struct amdxdna_dev_priv npu1_dev_priv = {
.fw_path = "amdnpu/1502_00/npu.sbin",
.protocol_major = 0x5,
.protocol_minor = 0x1,
- .rt_config = {NPU1_RT_CFG_TYPE_PDI_LOAD, NPU1_RT_CFG_VAL_PDI_LOAD_APP},
+ .rt_config = npu1_default_rt_cfg,
+ .dpm_clk_tbl = npu1_dpm_clk_table,
.col_align = COL_ALIGN_NONE,
.mbox_dev_addr = NPU1_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
@@ -80,8 +92,9 @@ const struct amdxdna_dev_priv npu1_dev_priv = {
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU1_SMU, MPNPU_PUB_SCRATCH7),
},
- .smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX,
- .smu_hclk_freq_max = NPU1_HCLK_FREQ_MAX,
+ .hw_ops = {
+ .set_dpm = npu1_set_dpm,
+ },
};
const struct amdxdna_dev_info dev_npu1_info = {
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
index 00cb381031d2..ac63131f9c7c 100644
--- a/drivers/accel/amdxdna/npu2_regs.c
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -61,18 +61,12 @@
#define NPU2_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU2_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
-#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU2_MPNPUCLK_FREQ_MAX 1267
-#define NPU2_HCLK_FREQ_MAX 1800
-
const struct amdxdna_dev_priv npu2_dev_priv = {
.fw_path = "amdnpu/17f0_00/npu.sbin",
.protocol_major = 0x6,
.protocol_minor = 0x1,
- .rt_config = {NPU2_RT_CFG_TYPE_PDI_LOAD, NPU2_RT_CFG_VAL_PDI_LOAD_APP},
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU2_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
@@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu2_dev_priv = {
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU2_SMU, MP1_C2PMSG_60),
},
- .smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
- .smu_hclk_freq_max = NPU2_HCLK_FREQ_MAX,
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
};
const struct amdxdna_dev_info dev_npu2_info = {
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index b6dae9667cca..a713ac18adfc 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -61,18 +61,33 @@
#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
-#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1
+const struct rt_config npu4_default_rt_cfg[] = {
+ { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+ { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+ { 0 },
+};
-#define NPU4_MPNPUCLK_FREQ_MAX 1267
-#define NPU4_HCLK_FREQ_MAX 1800
+const struct dpm_clk_freq npu4_dpm_clk_table[] = {
+ {396, 792},
+ {600, 1056},
+ {792, 1152},
+ {975, 1267},
+ {975, 1267},
+ {1056, 1408},
+ {1152, 1584},
+ {1267, 1800},
+ { 0 }
+};
const struct amdxdna_dev_priv npu4_dev_priv = {
.fw_path = "amdnpu/17f0_10/npu.sbin",
.protocol_major = 0x6,
.protocol_minor = 0x1,
- .rt_config = {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_APP},
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU4_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
@@ -97,8 +112,9 @@ const struct amdxdna_dev_priv npu4_dev_priv = {
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60),
},
- .smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX,
- .smu_hclk_freq_max = NPU4_HCLK_FREQ_MAX,
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
};
const struct amdxdna_dev_info dev_npu4_info = {
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index bed1baf8e160..67a5d5bc8a49 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -61,18 +61,12 @@
#define NPU5_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
-#define NPU5_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU5_MPNPUCLK_FREQ_MAX 1267
-#define NPU5_HCLK_FREQ_MAX 1800
-
const struct amdxdna_dev_priv npu5_dev_priv = {
.fw_path = "amdnpu/17f0_11/npu.sbin",
.protocol_major = 0x6,
.protocol_minor = 0x1,
- .rt_config = {NPU5_RT_CFG_TYPE_PDI_LOAD, NPU5_RT_CFG_VAL_PDI_LOAD_APP},
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU5_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
@@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu5_dev_priv = {
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU5_SMU, MP1_C2PMSG_60),
},
- .smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX,
- .smu_hclk_freq_max = NPU5_HCLK_FREQ_MAX,
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
};
const struct amdxdna_dev_info dev_npu5_info = {
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
index d1168fc55533..f46c760cefc7 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -61,23 +61,12 @@
#define NPU6_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU6_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
-#define NPU6_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU6_RT_CFG_TYPE_DEBUG_BO 10
-
-#define NPU6_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU6_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU6_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
-#define NPU6_RT_CFG_VAL_DEBUG_BO_LARGE 1
-
-#define NPU6_MPNPUCLK_FREQ_MAX 1267
-#define NPU6_HCLK_FREQ_MAX 1800
-
const struct amdxdna_dev_priv npu6_dev_priv = {
.fw_path = "amdnpu/17f0_10/npu.sbin",
.protocol_major = 0x6,
.protocol_minor = 12,
- .rt_config = {NPU6_RT_CFG_TYPE_PDI_LOAD, NPU6_RT_CFG_VAL_PDI_LOAD_APP},
+ .rt_config = npu4_default_rt_cfg,
+ .dpm_clk_tbl = npu4_dpm_clk_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU6_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
@@ -102,6 +91,10 @@ const struct amdxdna_dev_priv npu6_dev_priv = {
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU6_SMU, MP1_C2PMSG_60),
},
+ .hw_ops = {
+ .set_dpm = npu4_set_dpm,
+ },
+
};
const struct amdxdna_dev_info dev_npu6_info = {
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index 4f15e53a548d..9af9302baf90 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -33,6 +33,7 @@ enum amdxdna_drm_ioctl_id {
DRM_AMDXDNA_SYNC_BO,
DRM_AMDXDNA_EXEC_CMD,
DRM_AMDXDNA_GET_INFO,
+ DRM_AMDXDNA_SET_STATE,
};
/**
@@ -375,6 +376,24 @@ struct amdxdna_drm_query_hwctx {
__u64 errors;
};
+enum amdxdna_power_mode_type {
+ POWER_MODE_DEFAULT, /* Fallback to calculated DPM */
+ POWER_MODE_LOW, /* Set frequency to lowest DPM */
+ POWER_MODE_MEDIUM, /* Set frequency to medium DPM */
+ POWER_MODE_HIGH, /* Set frequency to highest DPM */
+ POWER_MODE_TURBO, /* Maximum power */
+};
+
+/**
+ * struct amdxdna_drm_get_power_mode - Get the configured power mode
+ * @power_mode: The mode type from enum amdxdna_power_mode_type
+ * @pad: MBZ.
+ */
+struct amdxdna_drm_get_power_mode {
+ __u8 power_mode;
+ __u8 pad[7];
+};
+
/**
* struct amdxdna_drm_query_firmware_version - Query the firmware version
* @major: The major version number
@@ -397,6 +416,7 @@ enum amdxdna_drm_get_param {
DRM_AMDXDNA_QUERY_SENSORS,
DRM_AMDXDNA_QUERY_HW_CONTEXTS,
DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
+ DRM_AMDXDNA_GET_POWER_MODE,
};
/**
@@ -411,6 +431,34 @@ struct amdxdna_drm_get_info {
__u64 buffer; /* in/out */
};
+enum amdxdna_drm_set_param {
+ DRM_AMDXDNA_SET_POWER_MODE,
+ DRM_AMDXDNA_WRITE_AIE_MEM,
+ DRM_AMDXDNA_WRITE_AIE_REG,
+};
+
+/**
+ * struct amdxdna_drm_set_state - Set the state of the AIE hardware.
+ * @param: Value in enum amdxdna_drm_set_param.
+ * @buffer_size: Size of the input param.
+ * @buffer: Input param.
+ */
+struct amdxdna_drm_set_state {
+ __u32 param; /* in */
+ __u32 buffer_size; /* in */
+ __u64 buffer; /* in */
+};
+
+/**
+ * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware
+ * @power_mode: The sensor type from enum amdxdna_power_mode_type
+ * @pad: MBZ.
+ */
+struct amdxdna_drm_set_power_mode {
+ __u8 power_mode;
+ __u8 pad[7];
+};
+
#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
struct amdxdna_drm_create_hwctx)
@@ -443,6 +491,10 @@ struct amdxdna_drm_get_info {
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \
struct amdxdna_drm_get_info)
+#define DRM_IOCTL_AMDXDNA_SET_STATE \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \
+ struct amdxdna_drm_set_state)
+
#if defined(__cplusplus)
} /* extern c end */
#endif