The hardware can be shared among multiple user applications. The hardware resources are allocated/freed based on the request from user application via driver IOCTLs. DRM_IOCTL_AMDXDNA_CREATE_HWCTX Allocate tile columns and create a hardware context structure to track the usage and status of the resources. A hardware context ID is returned for XDNA command execution. DRM_IOCTL_AMDXDNA_DESTROY_HWCTX Release hardware context based on its ID. The tile columns belong to this hardware context will be reclaimed. DRM_IOCTL_AMDXDNA_CONFIG_HWCTX Config hardware context. Bind the hardware context to the required resources. Co-developed-by: Min Ma <min.ma@xxxxxxx> Signed-off-by: Min Ma <min.ma@xxxxxxx> Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx> --- drivers/accel/amdxdna/Makefile | 2 + drivers/accel/amdxdna/aie2_ctx.c | 181 ++++++++++++++++++++ drivers/accel/amdxdna/aie2_message.c | 87 ++++++++++ drivers/accel/amdxdna/aie2_pci.c | 41 +++++ drivers/accel/amdxdna/aie2_pci.h | 12 ++ drivers/accel/amdxdna/amdxdna_ctx.c | 210 ++++++++++++++++++++++++ drivers/accel/amdxdna/amdxdna_ctx.h | 41 +++++ drivers/accel/amdxdna/amdxdna_drm.c | 107 +++++++++++- drivers/accel/amdxdna/amdxdna_drm.h | 21 +++ drivers/accel/amdxdna/amdxdna_pci_drv.c | 15 ++ include/uapi/drm/amdxdna_accel.h | 128 +++++++++++++++ 11 files changed, 844 insertions(+), 1 deletion(-) create mode 100644 drivers/accel/amdxdna/aie2_ctx.c create mode 100644 drivers/accel/amdxdna/amdxdna_ctx.c create mode 100644 drivers/accel/amdxdna/amdxdna_ctx.h diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile index b44de7fe0c9e..9cf90e92564d 100644 --- a/drivers/accel/amdxdna/Makefile +++ b/drivers/accel/amdxdna/Makefile @@ -1,11 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only amdxdna-y := \ + aie2_ctx.o \ aie2_message.o \ aie2_pci.o \ aie2_psp.o \ aie2_smu.o \ aie2_solver.o \ + amdxdna_ctx.o \ amdxdna_drm.o \ amdxdna_mailbox.o \ amdxdna_mailbox_helper.o \ diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c new file mode 100644 index 000000000000..8843bc210cb2 --- /dev/null +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ + +#include "aie2_pci.h" +#include "aie2_solver.h" + +static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct amdxdna_dev_hdl *ndev; + int start, end, first, last; + u32 width = 1, entries = 0; + int i; + + if (!hwctx->num_tiles) { + XDNA_ERR(xdna, "Number of tiles is zero"); + return -EINVAL; + } + + ndev = xdna->dev_handle; + if (unlikely(!ndev->metadata.core.row_count)) { + XDNA_WARN(xdna, "Core tile row count is zero"); + return -EINVAL; + } + + hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count; + if (!hwctx->num_col || hwctx->num_col > ndev->total_col) { + XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col); + return -EINVAL; + } + + if (ndev->priv->col_align == COL_ALIGN_NATURE) + width = hwctx->num_col; + + /* + * In range [start, end], find out columns that is multiple of width. + * 'first' is the first column, + * 'last' is the last column, + * 'entries' is the total number of columns. + */ + start = xdna->dev_info->first_col; + end = ndev->total_col - hwctx->num_col; + if (start > 0 && end == 0) { + XDNA_DBG(xdna, "Force start from col 0"); + start = 0; + } + first = start + (width - start % width) % width; + last = end - end % width; + if (last >= first) + entries = (last - first) / width + 1; + XDNA_DBG(xdna, "start %d end %d first %d last %d", + start, end, first, last); + + if (unlikely(!entries)) { + XDNA_ERR(xdna, "Start %d end %d width %d", + start, end, width); + return -EINVAL; + } + + hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL); + if (!hwctx->col_list) + return -ENOMEM; + + hwctx->col_list_len = entries; + hwctx->col_list[0] = first; + for (i = 1; i < entries; i++) + hwctx->col_list[i] = hwctx->col_list[i - 1] + width; + + print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list, + entries * sizeof(*hwctx->col_list), false); + return 0; +} + +static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + struct alloc_requests *xrs_req; + int ret; + + xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL); + if (!xrs_req) + return -ENOMEM; + + xrs_req->cdo.start_cols = hwctx->col_list; + xrs_req->cdo.cols_len = hwctx->col_list_len; + xrs_req->cdo.ncols = hwctx->num_col; + xrs_req->cdo.qos_cap.opc = hwctx->max_opc; + + xrs_req->rqos.gops = hwctx->qos.gops; + xrs_req->rqos.fps = hwctx->qos.fps; + xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth; + xrs_req->rqos.latency = hwctx->qos.latency; + xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time; + xrs_req->rqos.priority = hwctx->qos.priority; + + xrs_req->rid = (uintptr_t)hwctx; + + ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx); + if (ret) + XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret); + + kfree(xrs_req); + return ret; +} + +static void aie2_release_resource(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + int ret; + + ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx); + if (ret) + XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); +} + +int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_hwctx_priv *priv; + int ret; + + priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + hwctx->priv = priv; + + ret = aie2_hwctx_col_list(hwctx); + if (ret) { + XDNA_ERR(xdna, "Create col list failed, ret %d", ret); + goto free_priv; + } + + ret = aie2_alloc_resource(hwctx); + if (ret) { + XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret); + goto free_col_list; + } + + hwctx->status = HWCTX_STAT_INIT; + + XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); + + return 0; + +free_col_list: + kfree(hwctx->col_list); +free_priv: + kfree(priv); + return ret; +} + +void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna; + + xdna = hwctx->client->xdna; + aie2_release_resource(hwctx); + + kfree(hwctx->col_list); + kfree(hwctx->priv); + kfree(hwctx->cus); +} + +int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + switch (type) { + case DRM_AMDXDNA_HWCTX_CONFIG_CU: + case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: + case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: + return -EOPNOTSUPP; + default: + XDNA_DBG(xdna, "Not supported type %d", type); + return -EOPNOTSUPP; + } +} diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 71df005b7408..54fb0f68587a 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -185,3 +185,90 @@ int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, return 0; } + +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) +{ + DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT); + struct amdxdna_dev *xdna = ndev->xdna; + struct xdna_mailbox_chann_res x2i; + struct xdna_mailbox_chann_res i2x; + struct cq_pair *cq_pair; + u32 intr_reg; + int ret; + + req.aie_type = 1; + req.start_col = hwctx->start_col; + req.num_col = hwctx->num_col; + req.num_cq_pairs_requested = 1; + req.pasid = hwctx->client->pasid; + req.context_priority = 2; + + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + return ret; + + hwctx->fw_ctx_id = resp.context_id; + WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id"); + + cq_pair = &resp.cq_pair[0]; + x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr); + x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr); + x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr); + x2i.rb_size = cq_pair->x2i_q.buf_size; + + i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr); + i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr); + i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr); + i2x.rb_size = cq_pair->i2x_q.buf_size; + + ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id); + if (ret == -EINVAL) { + XDNA_ERR(xdna, "not able to create channel"); + goto out_destroy_context; + } + + intr_reg = i2x.mb_head_ptr_reg + 4; + hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x, + intr_reg, ret); + if (!hwctx->priv->mbox_chann) { + XDNA_ERR(xdna, "not able to create channel"); + ret = -EINVAL; + goto out_destroy_context; + } + + XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d", + hwctx->name, ret, resp.msix_id); + XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name, + hwctx->fw_ctx_id, hwctx->client->pasid); + + return 0; + +out_destroy_context: + aie2_destroy_context(ndev, hwctx); + return ret; +} + +int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) +{ + DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT); + struct amdxdna_dev *xdna = ndev->xdna; + int ret; + + if (hwctx->fw_ctx_id == -1) + return 0; + + xdna_mailbox_stop_channel(hwctx->priv->mbox_chann); + + req.context_id = hwctx->fw_ctx_id; + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) + XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret); + + xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann); + XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name, + hwctx->fw_ctx_id); + hwctx->priv->mbox_chann = NULL; + hwctx->fw_ctx_id = -1; + + return ret; +} diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 7fd5e4497189..5fe0db3462d4 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -204,6 +204,43 @@ static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev) XDNA_DBG(ndev->xdna, "Firmware suspended"); } +static int aie2_xrs_load(void *cb_arg, struct xrs_action_load *action) +{ + struct amdxdna_hwctx *hwctx = cb_arg; + struct amdxdna_dev *xdna; + int ret; + + xdna = hwctx->client->xdna; + + hwctx->start_col = action->part.start_col; + hwctx->num_col = action->part.ncols; + ret = aie2_create_context(xdna->dev_handle, hwctx); + if (ret) + XDNA_ERR(xdna, "create context failed, ret %d", ret); + + return ret; +} + +static int aie2_xrs_unload(void *cb_arg) +{ + struct amdxdna_hwctx *hwctx = cb_arg; + struct amdxdna_dev *xdna; + int ret; + + xdna = hwctx->client->xdna; + + ret = aie2_destroy_context(xdna->dev_handle, hwctx); + if (ret) + XDNA_ERR(xdna, "destroy context failed, ret %d", ret); + + return ret; +} + +static struct xrs_action_ops aie2_xrs_actions = { + .load = aie2_xrs_load, + .unload = aie2_xrs_unload, +}; + static void aie2_hw_stop(struct amdxdna_dev *xdna) { struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev); @@ -416,6 +453,7 @@ static int aie2_init(struct amdxdna_dev *xdna) xrs_cfg.clk_list.cu_clk_list[2] = 1000; xrs_cfg.sys_eff_factor = 1; xrs_cfg.dev = xdna->ddev.dev; + xrs_cfg.actions = &aie2_xrs_actions; xrs_cfg.total_col = ndev->total_col; xdna->xrs_hdl = xrsm_init(&xrs_cfg); @@ -452,4 +490,7 @@ static void aie2_fini(struct amdxdna_dev *xdna) const struct amdxdna_dev_ops aie2_ops = { .init = aie2_init, .fini = aie2_fini, + .hwctx_init = aie2_hwctx_init, + .hwctx_fini = aie2_hwctx_fini, + .hwctx_config = aie2_hwctx_config, }; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index a6b734f24955..818fc52dabbf 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -128,6 +128,10 @@ struct rt_config { u32 value; }; +struct amdxdna_hwctx_priv { + void *mbox_chann; +}; + struct amdxdna_dev_hdl { struct amdxdna_dev *xdna; const struct amdxdna_dev_priv *priv; @@ -200,4 +204,12 @@ int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *ver int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata); int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev, struct amdxdna_fw_ver *fw_ver); +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); +int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); + +/* aie2_hwctx.c */ +int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); +void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); +int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); + #endif /* _AIE2_PCI_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c new file mode 100644 index 000000000000..3daf7065acf4 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ +#include "amdxdna_drm.h" + +#define MAX_HWCTX_ID 255 + +static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_dev *xdna = hwctx->client->xdna; + + /* At this point, user is not able to submit new commands */ + mutex_lock(&xdna->dev_lock); + xdna->dev_info->ops->hwctx_fini(hwctx); + mutex_unlock(&xdna->dev_lock); + + kfree(hwctx->name); + kfree(hwctx); +} + +/* + * This should be called in close() and remove(). DO NOT call in other syscalls. + * This guarantee that when hwctx and resources will be released, if user + * doesn't call amdxdna_drm_destroy_hwctx_ioctl. + */ +void amdxdna_hwctx_remove_all(struct amdxdna_client *client) +{ + struct amdxdna_hwctx *hwctx; + int next = 0; + + mutex_lock(&client->hwctx_lock); + idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) { + XDNA_DBG(client->xdna, "PID %d close HW context %d", + client->pid, hwctx->id); + idr_remove(&client->hwctx_idr, hwctx->id); + mutex_unlock(&client->hwctx_lock); + amdxdna_hwctx_destroy(hwctx); + mutex_lock(&client->hwctx_lock); + } + mutex_unlock(&client->hwctx_lock); +} + +int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_create_hwctx *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_hwctx *hwctx; + int ret, idx; + + if (args->ext_flags) + return -EINVAL; + + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; + + hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL); + if (!hwctx) { + ret = -ENOMEM; + goto exit; + } + + if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) { + XDNA_ERR(xdna, "Access QoS info failed"); + ret = -EFAULT; + goto free_hwctx; + } + + hwctx->client = client; + hwctx->fw_ctx_id = -1; + hwctx->num_tiles = args->num_tiles; + hwctx->mem_size = args->mem_size; + hwctx->max_opc = args->max_opc; + mutex_lock(&client->hwctx_lock); + ret = idr_alloc_cyclic(&client->hwctx_idr, hwctx, 0, MAX_HWCTX_ID, GFP_KERNEL); + if (ret < 0) { + mutex_unlock(&client->hwctx_lock); + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret); + goto free_hwctx; + } + hwctx->id = ret; + mutex_unlock(&client->hwctx_lock); + + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id); + if (!hwctx->name) { + ret = -ENOMEM; + goto rm_id; + } + + mutex_lock(&xdna->dev_lock); + ret = xdna->dev_info->ops->hwctx_init(hwctx); + if (ret) { + mutex_unlock(&xdna->dev_lock); + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret); + goto free_name; + } + args->handle = hwctx->id; + mutex_unlock(&xdna->dev_lock); + + XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret); + drm_dev_exit(idx); + return 0; + +free_name: + kfree(hwctx->name); +rm_id: + mutex_lock(&client->hwctx_lock); + idr_remove(&client->hwctx_idr, hwctx->id); + mutex_unlock(&client->hwctx_lock); +free_hwctx: + kfree(hwctx); +exit: + drm_dev_exit(idx); + return ret; +} + +int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_destroy_hwctx *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_hwctx *hwctx; + int ret = 0, idx; + + if (!drm_dev_enter(dev, &idx)) + return -ENODEV; + + mutex_lock(&client->hwctx_lock); + hwctx = idr_find(&client->hwctx_idr, args->handle); + if (!hwctx) { + mutex_unlock(&client->hwctx_lock); + ret = -EINVAL; + XDNA_DBG(xdna, "PID %d HW context %d not exist", + client->pid, args->handle); + goto out; + } + idr_remove(&client->hwctx_idr, hwctx->id); + mutex_unlock(&client->hwctx_lock); + + amdxdna_hwctx_destroy(hwctx); + + XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle); +out: + drm_dev_exit(idx); + return ret; +} + +int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_drm_config_hwctx *args = data; + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_hwctx *hwctx; + u32 buf_size; + void *buf; + u64 val; + int ret; + + if (!xdna->dev_info->ops->hwctx_config) + return -EOPNOTSUPP; + + val = args->param_val; + buf_size = args->param_val_size; + + switch (args->param_type) { + case DRM_AMDXDNA_HWCTX_CONFIG_CU: + /* For those types that param_val is pointer */ + if (buf_size > PAGE_SIZE) { + XDNA_ERR(xdna, "Config CU param buffer too large"); + return -E2BIG; + } + + /* Hwctx needs to keep buf */ + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, u64_to_user_ptr(val), buf_size)) { + kfree(buf); + return -EFAULT; + } + + break; + case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF: + case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF: + /* For those types that param_val is a value */ + buf = NULL; + buf_size = 0; + break; + default: + XDNA_DBG(xdna, "Unknown HW context config type %d", args->param_type); + return -EINVAL; + } + + mutex_lock(&xdna->dev_lock); + hwctx = idr_find(&client->hwctx_idr, args->handle); + if (!hwctx) { + XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); + ret = -EINVAL; + goto unlock; + } + + ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size); + +unlock: + mutex_unlock(&xdna->dev_lock); + kfree(buf); + return ret; +} diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h new file mode 100644 index 000000000000..ceaebe3daa03 --- /dev/null +++ b/drivers/accel/amdxdna/amdxdna_ctx.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_CTX_H_ +#define _AMDXDNA_CTX_H_ + +#include <drm/drm_drv.h> +#include <linux/kref.h> + +struct amdxdna_hwctx { + struct amdxdna_client *client; + struct amdxdna_hwctx_priv *priv; + char *name; + + u32 id; + u32 max_opc; + u32 num_tiles; + u32 mem_size; + u32 fw_ctx_id; + u32 col_list_len; + u32 *col_list; + u32 start_col; + u32 num_col; +#define HWCTX_STAT_INIT 0 +#define HWCTX_STAT_READY 1 +#define HWCTX_STAT_STOP 2 + u32 status; + u32 old_status; + + struct amdxdna_qos_info qos; + struct amdxdna_hwctx_param_config_cu *cus; +}; + +void amdxdna_hwctx_remove_all(struct amdxdna_client *client); +int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); + +#endif /* _AMDXDNA_CTX_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_drm.c b/drivers/accel/amdxdna/amdxdna_drm.c index 91e4f9c9dac9..2365c064bad6 100644 --- a/drivers/accel/amdxdna/amdxdna_drm.c +++ b/drivers/accel/amdxdna/amdxdna_drm.c @@ -3,12 +3,113 @@ * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. */ +#include <linux/iommu.h> #include <drm/drm_ioctl.h> #include <drm/drm_accel.h> #include "amdxdna_drm.h" -DEFINE_DRM_ACCEL_FOPS(amdxdna_fops); +static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + struct amdxdna_client *client; + int ret; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return -ENOMEM; + + client->pid = pid_nr(filp->pid); + client->xdna = xdna; + + client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm); + if (IS_ERR(client->sva)) { + ret = PTR_ERR(client->sva); + XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret); + goto failed; + } + client->pasid = iommu_sva_get_pasid(client->sva); + if (client->pasid == IOMMU_PASID_INVALID) { + XDNA_ERR(xdna, "SVA get pasid failed"); + ret = -ENODEV; + goto unbind_sva; + } + mutex_init(&client->hwctx_lock); + idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1); + + mutex_lock(&xdna->dev_lock); + list_add_tail(&client->node, &xdna->client_list); + mutex_unlock(&xdna->dev_lock); + + filp->driver_priv = client; + client->filp = filp; + + XDNA_DBG(xdna, "pid %d opened", client->pid); + return 0; + +unbind_sva: + iommu_sva_unbind_device(client->sva); +failed: + kfree(client); + + return ret; +} + +static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) +{ + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = to_xdna_dev(ddev); + + XDNA_DBG(xdna, "closing pid %d", client->pid); + + idr_destroy(&client->hwctx_idr); + mutex_destroy(&client->hwctx_lock); + + iommu_sva_unbind_device(client->sva); + + XDNA_DBG(xdna, "pid %d closed", client->pid); + kfree(client); +} + +static int amdxdna_flush(struct file *f, fl_owner_t id) +{ + struct drm_file *filp = f->private_data; + struct amdxdna_client *client = filp->driver_priv; + struct amdxdna_dev *xdna = client->xdna; + int idx; + + XDNA_DBG(xdna, "PID %d flushing...", client->pid); + if (!drm_dev_enter(&xdna->ddev, &idx)) + return 0; + + mutex_lock(&xdna->dev_lock); + list_del_init(&client->node); + mutex_unlock(&xdna->dev_lock); + amdxdna_hwctx_remove_all(client); + + drm_dev_exit(idx); + return 0; +} + +static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = { + /* Context */ + DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_DESTROY_HWCTX, amdxdna_drm_destroy_hwctx_ioctl, 0), + DRM_IOCTL_DEF_DRV(AMDXDNA_CONFIG_HWCTX, amdxdna_drm_config_hwctx_ioctl, 0), +}; + +static const struct file_operations amdxdna_fops = { + .owner = THIS_MODULE, + .open = accel_open, + .release = drm_release, + .flush = amdxdna_flush, + .unlocked_ioctl = drm_ioctl, + .compat_ioctl = drm_compat_ioctl, + .poll = drm_poll, + .read = drm_read, + .llseek = noop_llseek, + .mmap = drm_gem_mmap, +}; const struct drm_driver amdxdna_drm_drv = { .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, @@ -17,4 +118,8 @@ const struct drm_driver amdxdna_drm_drv = { .desc = "AMD XDNA DRM implementation", .major = AMDXDNA_DRIVER_MAJOR, .minor = AMDXDNA_DRIVER_MINOR, + .open = amdxdna_drm_open, + .postclose = amdxdna_drm_close, + .ioctls = amdxdna_drm_ioctls, + .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls), }; diff --git a/drivers/accel/amdxdna/amdxdna_drm.h b/drivers/accel/amdxdna/amdxdna_drm.h index bf4b0b786606..08331c51a130 100644 --- a/drivers/accel/amdxdna/amdxdna_drm.h +++ b/drivers/accel/amdxdna/amdxdna_drm.h @@ -13,6 +13,8 @@ #include <drm/drm_print.h> #include <drm/drm_file.h> +#include "amdxdna_ctx.h" + #define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args) #define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args) #define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args) @@ -32,6 +34,9 @@ struct amdxdna_dev; struct amdxdna_dev_ops { int (*init)(struct amdxdna_dev *xdna); void (*fini)(struct amdxdna_dev *xdna); + int (*hwctx_init)(struct amdxdna_hwctx *hwctx); + void (*hwctx_fini)(struct amdxdna_hwctx *hwctx); + int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); }; /* @@ -68,7 +73,23 @@ struct amdxdna_dev { void *xrs_hdl; struct mutex dev_lock; /* per device lock */ + struct list_head client_list; struct amdxdna_fw_ver fw_ver; }; +/* + * struct amdxdna_client - amdxdna client + * A per fd data structure for managing context and other user process stuffs. + */ +struct amdxdna_client { + struct list_head node; + pid_t pid; + struct mutex hwctx_lock; /* protect hwctx */ + struct idr hwctx_idr; + struct amdxdna_dev *xdna; + struct drm_file *filp; + struct iommu_sva *sva; + int pasid; +}; + #endif /* _AMDXDNA_DRM_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 7d0cfd918b0e..924ee8b96ebb 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -58,6 +58,7 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) return -ENODEV; drmm_mutex_init(&xdna->ddev, &xdna->dev_lock); + INIT_LIST_HEAD(&xdna->client_list); pci_set_drvdata(pdev, xdna); mutex_lock(&xdna->dev_lock); @@ -94,11 +95,25 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) static void amdxdna_remove(struct pci_dev *pdev) { struct amdxdna_dev *xdna = pci_get_drvdata(pdev); + struct amdxdna_client *client; drm_dev_unplug(&xdna->ddev); amdxdna_sysfs_fini(xdna); mutex_lock(&xdna->dev_lock); + client = list_first_entry_or_null(&xdna->client_list, + struct amdxdna_client, node); + while (client) { + list_del_init(&client->node); + mutex_unlock(&xdna->dev_lock); + + amdxdna_hwctx_remove_all(client); + + mutex_lock(&xdna->dev_lock); + client = list_first_entry_or_null(&xdna->client_list, + struct amdxdna_client, node); + } + xdna->dev_info->ops->fini(xdna); mutex_unlock(&xdna->dev_lock); } diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index 1b699464150e..5c1d9707d6da 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -6,6 +6,7 @@ #ifndef _UAPI_AMDXDNA_ACCEL_H_ #define _UAPI_AMDXDNA_ACCEL_H_ +#include <linux/stddef.h> #include "drm.h" #if defined(__cplusplus) @@ -15,11 +16,138 @@ extern "C" { #define AMDXDNA_DRIVER_MAJOR 1 #define AMDXDNA_DRIVER_MINOR 0 +#define AMDXDNA_INVALID_CTX_HANDLE 0 + enum amdxdna_device_type { AMDXDNA_DEV_TYPE_UNKNOWN = -1, AMDXDNA_DEV_TYPE_KMQ, }; +enum amdxdna_drm_ioctl_id { + DRM_AMDXDNA_CREATE_HWCTX, + DRM_AMDXDNA_DESTROY_HWCTX, + DRM_AMDXDNA_CONFIG_HWCTX, +}; + +/** + * struct qos_info - QoS information for driver. + * @gops: Giga operations per second. + * @fps: Frames per second. + * @dma_bandwidth: DMA bandwidtha. + * @latency: Frame response latency. + * @frame_exec_time: Frame execution time. + * @priority: Request priority. + * + * User program can provide QoS hints to driver. + */ +struct amdxdna_qos_info { + __u32 gops; + __u32 fps; + __u32 dma_bandwidth; + __u32 latency; + __u32 frame_exec_time; + __u32 priority; +}; + +/** + * struct amdxdna_drm_create_hwctx - Create hardware context. + * @ext: MBZ. + * @ext_flags: MBZ. + * @qos_p: Address of QoS info. + * @umq_bo: BO handle for user mode queue(UMQ). + * @log_buf_bo: BO handle for log buffer. + * @max_opc: Maximum operations per cycle. + * @num_tiles: Number of AIE tiles. + * @mem_size: Size of AIE tile memory. + * @umq_doorbell: Returned offset of doorbell associated with UMQ. + * @handle: Returned hardware context handle. + */ +struct amdxdna_drm_create_hwctx { + __u64 ext; + __u64 ext_flags; + __u64 qos_p; + __u32 umq_bo; + __u32 log_buf_bo; + __u32 max_opc; + __u32 num_tiles; + __u32 mem_size; + __u32 umq_doorbell; + __u32 handle; +}; + +/** + * struct amdxdna_drm_destroy_hwctx - Destroy hardware context. + * @handle: Hardware context handle. + * @pad: MBZ. + */ +struct amdxdna_drm_destroy_hwctx { + __u32 handle; + __u32 pad; +}; + +/** + * struct amdxdna_cu_config - configuration for one CU + * @cu_bo: CU configuration buffer bo handle + * @cu_func: Functional of a CU + * @pad: MBZ + */ +struct amdxdna_cu_config { + __u32 cu_bo; + __u8 cu_func; + __u8 pad[3]; +}; + +/** + * struct amdxdna_hwctx_param_config_cu - configuration for CUs in hardware context + * @num_cus: Number of CUs to configure + * @pad: MBZ + * @cu_configs: Array of CU configurations of struct amdxdna_cu_config + */ +struct amdxdna_hwctx_param_config_cu { + __u16 num_cus; + __u16 pad[3]; + struct amdxdna_cu_config cu_configs[] __counted_by(num_cus); +}; + +enum amdxdna_drm_config_hwctx_param { + DRM_AMDXDNA_HWCTX_CONFIG_CU, + DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF, + DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF, + DRM_AMDXDNA_HWCTX_CONFIG_NUM +}; + +/** + * struct amdxdna_drm_config_hwctx - Configure hardware context. + * @handle: hardware context handle. + * @param_type: Value in enum amdxdna_drm_config_hwctx_param. Specifies the + * structure passed in via param_val. + * @param_val: A structure specified by the param_type struct member. + * @param_val_size: Size of the parameter buffer pointed to by the param_val. + * If param_val is not a pointer, driver can ignore this. + * + * Note: if the param_val is a pointer pointing to a buffer, the maximum size + * of the buffer is 4KiB(PAGE_SIZE). + */ +struct amdxdna_drm_config_hwctx { + __u32 handle; + __u32 param_type; + __u64 param_val; + __u32 param_val_size; + __u32 pad; +}; + +#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \ + struct amdxdna_drm_create_hwctx) + +#define DRM_IOCTL_AMDXDNA_DESTROY_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_DESTROY_HWCTX, \ + struct amdxdna_drm_destroy_hwctx) + +#define DRM_IOCTL_AMDXDNA_CONFIG_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CONFIG_HWCTX, \ + struct amdxdna_drm_config_hwctx) + #if defined(__cplusplus) } /* extern c end */ #endif -- 2.34.1