The remote queue is a hardware queue based ring buffer service between mgmt PF and PCIe hardware firmware for programming FPGA Program Logic, loading versal firmware and checking card healthy status. Co-developed-by: Nishad Saraf <nishads@xxxxxxx> Signed-off-by: Nishad Saraf <nishads@xxxxxxx> Co-developed-by: Prapul Krishnamurthy <prapulk@xxxxxxx> Signed-off-by: Prapul Krishnamurthy <prapulk@xxxxxxx> Signed-off-by: Yidong Zhang <yidong.zhang@xxxxxxx> --- drivers/fpga/amd/Makefile | 3 +- drivers/fpga/amd/versal-pci-rm-queue.c | 326 +++++++++++++++++++++++ drivers/fpga/amd/versal-pci-rm-queue.h | 21 ++ drivers/fpga/amd/versal-pci-rm-service.h | 209 +++++++++++++++ 4 files changed, 558 insertions(+), 1 deletion(-) create mode 100644 drivers/fpga/amd/versal-pci-rm-queue.c create mode 100644 drivers/fpga/amd/versal-pci-rm-queue.h create mode 100644 drivers/fpga/amd/versal-pci-rm-service.h diff --git a/drivers/fpga/amd/Makefile b/drivers/fpga/amd/Makefile index 7a604785e5f9..b2ffdbf23400 100644 --- a/drivers/fpga/amd/Makefile +++ b/drivers/fpga/amd/Makefile @@ -3,4 +3,5 @@ obj-$(CONFIG_AMD_VERSAL_PCI) += versal-pci.o versal-pci-$(CONFIG_AMD_VERSAL_PCI) := versal-pci-main.o \ - versal-pci-comm-chan.o + versal-pci-comm-chan.o \ + versal-pci-rm-queue.o diff --git a/drivers/fpga/amd/versal-pci-rm-queue.c b/drivers/fpga/amd/versal-pci-rm-queue.c new file mode 100644 index 000000000000..92f2e1165052 --- /dev/null +++ b/drivers/fpga/amd/versal-pci-rm-queue.c @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Versal PCIe device + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + */ + +#include <linux/pci.h> + +#include "versal-pci.h" +#include "versal-pci-rm-queue.h" +#include "versal-pci-rm-service.h" + +static inline struct rm_device *to_rdev_msg_monitor(struct work_struct *w) +{ + return container_of(w, struct rm_device, msg_monitor); +} + +static inline struct rm_device *to_rdev_msg_timer(struct timer_list *t) +{ + return container_of(t, struct rm_device, msg_timer); +} + +static inline u32 rm_io_read(struct rm_device *rdev, u32 offset) +{ + /* TODO */ + return 0; +} + +static inline int rm_io_write(struct rm_device *rdev, u32 offset, u32 value) +{ + /* TODO */ + return 0; +} + +static inline u32 rm_queue_read(struct rm_device *rdev, u32 offset) +{ + /* TODO */ + return 0; +} + +static inline void rm_queue_write(struct rm_device *rdev, u32 offset, u32 value) +{ + /* TODO */ +} + +static inline void rm_queue_bulk_read(struct rm_device *rdev, u32 offset, + u32 *value, u32 size) +{ + /* TODO */ +} + +static inline void rm_queue_bulk_write(struct rm_device *rdev, u32 offset, + u32 *value, u32 size) +{ + /* TODO */ +} + +static inline u32 rm_queue_get_cidx(struct rm_device *rdev, enum rm_queue_type type) +{ + u32 off; + + if (type == RM_QUEUE_SQ) + off = offsetof(struct rm_queue_header, sq_cidx); + else + off = offsetof(struct rm_queue_header, cq_cidx); + + return rm_queue_read(rdev, off); +} + +static inline void rm_queue_set_cidx(struct rm_device *rdev, enum rm_queue_type type, + u32 value) +{ + u32 off; + + if (type == RM_QUEUE_SQ) + off = offsetof(struct rm_queue_header, sq_cidx); + else + off = offsetof(struct rm_queue_header, cq_cidx); + + rm_queue_write(rdev, off, value); +} + +static inline u32 rm_queue_get_pidx(struct rm_device *rdev, enum rm_queue_type type) +{ + if (type == RM_QUEUE_SQ) + return rm_io_read(rdev, RM_IO_SQ_PIDX_OFF); + else + return rm_io_read(rdev, RM_IO_CQ_PIDX_OFF); +} + +static inline int rm_queue_set_pidx(struct rm_device *rdev, + enum rm_queue_type type, u32 value) +{ + if (type == RM_QUEUE_SQ) + return rm_io_write(rdev, RM_IO_SQ_PIDX_OFF, value); + else + return rm_io_write(rdev, RM_IO_CQ_PIDX_OFF, value); +} + +static inline u32 rm_queue_get_sq_slot_offset(struct rm_device *rdev) +{ + u32 index; + + if ((rdev->sq.pidx - rdev->sq.cidx) >= rdev->queue_size) + return RM_INVALID_SLOT; + + index = rdev->sq.pidx & (rdev->queue_size - 1); + return rdev->sq.offset + RM_CMD_SQ_SLOT_SIZE * index; +} + +static inline u32 rm_queue_get_cq_slot_offset(struct rm_device *rdev) +{ + u32 index; + + index = rdev->cq.cidx & (rdev->queue_size - 1); + return rdev->cq.offset + RM_CMD_CQ_SLOT_SIZE * index; +} + +static int rm_queue_submit_cmd(struct rm_cmd *cmd) +{ + struct versal_pci_device *vdev = cmd->rdev->vdev; + struct rm_device *rdev = cmd->rdev; + u32 offset; + int ret; + + mutex_lock(&rdev->queue); + + offset = rm_queue_get_sq_slot_offset(rdev); + if (!offset) { + vdev_err(vdev, "No SQ slot available"); + ret = -ENOSPC; + goto exit; + } + + rm_queue_bulk_write(rdev, offset, (u32 *)&cmd->sq_msg, + sizeof(cmd->sq_msg)); + + ret = rm_queue_set_pidx(rdev, RM_QUEUE_SQ, ++rdev->sq.pidx); + if (ret) { + vdev_err(vdev, "Failed to update PIDX, ret %d", ret); + goto exit; + } + + list_add_tail(&cmd->list, &rdev->submitted_cmds); +exit: + mutex_unlock(&rdev->queue); + return ret; +} + +void rm_queue_withdraw_cmd(struct rm_cmd *cmd) +{ + mutex_lock(&cmd->rdev->queue); + list_del(&cmd->list); + mutex_unlock(&cmd->rdev->queue); +} + +static int rm_queue_wait_cmd_timeout(struct rm_cmd *cmd, unsigned long timeout) +{ + struct versal_pci_device *vdev = cmd->rdev->vdev; + int ret; + + if (wait_for_completion_timeout(&cmd->executed, timeout)) { + ret = cmd->cq_msg.data.rcode; + if (!ret) + return 0; + + vdev_err(vdev, "CMD returned with a failure: %d", ret); + return ret; + } + + /* + * each cmds will be cleaned up by complete before it times out. + * if we reach here, the cmd should be cleared and hot reset should + * be issued. + */ + vdev_err(vdev, "cmd is timedout after, please reset the card"); + rm_queue_withdraw_cmd(cmd); + return -ETIME; +} + +int rm_queue_send_cmd(struct rm_cmd *cmd, unsigned long timeout) +{ + int ret; + + ret = rm_queue_submit_cmd(cmd); + if (ret) + return ret; + + return rm_queue_wait_cmd_timeout(cmd, timeout); +} + +static int rm_process_msg(struct rm_device *rdev) +{ + struct versal_pci_device *vdev = rdev->vdev; + struct rm_cmd *cmd, *next; + struct rm_cmd_cq_hdr header; + u32 offset; + + offset = rm_queue_get_cq_slot_offset(rdev); + if (!offset) { + vdev_err(vdev, "Invalid CQ offset"); + return -EINVAL; + } + + rm_queue_bulk_read(rdev, offset, (u32 *)&header, sizeof(header)); + + list_for_each_entry_safe(cmd, next, &rdev->submitted_cmds, list) { + u32 value = 0; + + if (cmd->sq_msg.hdr.id != header.id) + continue; + + rm_queue_bulk_read(rdev, offset + sizeof(cmd->cq_msg.hdr), + (u32 *)&cmd->cq_msg.data, + sizeof(cmd->cq_msg.data)); + + rm_queue_write(rdev, offset, value); + + list_del(&cmd->list); + complete(&cmd->executed); + return 0; + } + + vdev_err(vdev, "Unknown cmd ID %d found in CQ", header.id); + return -EFAULT; +} + +static void rm_check_msg(struct work_struct *w) +{ + struct rm_device *rdev = to_rdev_msg_monitor(w); + int ret; + + mutex_lock(&rdev->queue); + + rdev->sq.cidx = rm_queue_get_cidx(rdev, RM_QUEUE_SQ); + rdev->cq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_CQ); + + while (rdev->cq.cidx < rdev->cq.pidx) { + ret = rm_process_msg(rdev); + if (ret) + break; + + rdev->cq.cidx++; + + rm_queue_set_cidx(rdev, RM_QUEUE_CQ, rdev->cq.cidx); + } + + mutex_unlock(&rdev->queue); +} + +static void rm_sched_work(struct timer_list *t) +{ + struct rm_device *rdev = to_rdev_msg_timer(t); + + /* Schedule a work in the general workqueue */ + schedule_work(&rdev->msg_monitor); + /* Periodic timer */ + mod_timer(&rdev->msg_timer, jiffies + RM_COMPLETION_TIMER); +} + +void rm_queue_fini(struct rm_device *rdev) +{ + del_timer_sync(&rdev->msg_timer); + cancel_work_sync(&rdev->msg_monitor); + mutex_destroy(&rdev->queue); +} + +int rm_queue_init(struct rm_device *rdev) +{ + struct versal_pci_device *vdev = rdev->vdev; + struct rm_queue_header header = {0}; + int ret; + + INIT_LIST_HEAD(&rdev->submitted_cmds); + mutex_init(&rdev->queue); + + rm_queue_bulk_read(rdev, RM_HDR_OFF, (u32 *)&header, sizeof(header)); + + if (header.magic != RM_QUEUE_HDR_MAGIC_NUM) { + vdev_err(vdev, "Invalid RM queue header"); + ret = -ENODEV; + goto error; + } + + if (!header.version) { + vdev_err(vdev, "Invalid RM queue header"); + ret = -ENODEV; + goto error; + } + + sema_init(&rdev->sq.data_lock, 1); + sema_init(&rdev->cq.data_lock, 1); + rdev->queue_size = header.size; + rdev->sq.offset = header.sq_off; + rdev->cq.offset = header.cq_off; + rdev->sq.type = RM_QUEUE_SQ; + rdev->cq.type = RM_QUEUE_CQ; + rdev->sq.data_size = rdev->queue_buffer_size - RM_CMD_CQ_BUFFER_SIZE; + rdev->cq.data_size = RM_CMD_CQ_BUFFER_SIZE; + rdev->sq.data_offset = rdev->queue_buffer_start + + RM_CMD_CQ_BUFFER_OFFSET + RM_CMD_CQ_BUFFER_SIZE; + rdev->cq.data_offset = rdev->queue_buffer_start + + RM_CMD_CQ_BUFFER_OFFSET; + rdev->sq.cidx = header.sq_cidx; + rdev->cq.cidx = header.cq_cidx; + + rdev->sq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_SQ); + rdev->cq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_CQ); + + if (rdev->cq.cidx != rdev->cq.pidx) { + vdev_warn(vdev, "Clearing stale completions"); + rdev->cq.cidx = rdev->cq.pidx; + rm_queue_set_cidx(rdev, RM_QUEUE_CQ, rdev->cq.cidx); + } + + /* Create and schedule timer to do recurring work */ + INIT_WORK(&rdev->msg_monitor, &rm_check_msg); + timer_setup(&rdev->msg_timer, &rm_sched_work, 0); + mod_timer(&rdev->msg_timer, jiffies + RM_COMPLETION_TIMER); + + return 0; +error: + mutex_destroy(&rdev->queue); + return ret; +} diff --git a/drivers/fpga/amd/versal-pci-rm-queue.h b/drivers/fpga/amd/versal-pci-rm-queue.h new file mode 100644 index 000000000000..3bc7102c6a58 --- /dev/null +++ b/drivers/fpga/amd/versal-pci-rm-queue.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Driver for Versal PCIe device + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + */ + +#ifndef __RM_QUEUE_H +#define __RM_QUEUE_H + +struct rm_device; + +/* rm queue hardware setup */ +int rm_queue_init(struct rm_device *rdev); +void rm_queue_fini(struct rm_device *rdev); + +/* rm queue common API */ +int rm_queue_send_cmd(struct rm_cmd *cmd, unsigned long timeout); +void rm_queue_withdraw_cmd(struct rm_cmd *cmd); + +#endif /* __RM_QUEUE_H */ diff --git a/drivers/fpga/amd/versal-pci-rm-service.h b/drivers/fpga/amd/versal-pci-rm-service.h new file mode 100644 index 000000000000..85a78257770a --- /dev/null +++ b/drivers/fpga/amd/versal-pci-rm-service.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Driver for Versal PCIe device + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + */ + +#ifndef __RM_SERVICE_H +#define __RM_SERVICE_H + +#define RM_HDR_OFF 0x0 +#define RM_HDR_MAGIC_NUM 0x564D5230 +#define RM_QUEUE_HDR_MAGIC_NUM 0x5847513F +#define RM_PCI_IO_BAR_OFF 0x2010000 +#define RM_PCI_IO_SIZE SZ_4K +#define RM_PCI_SHMEM_BAR_OFF 0x8000000 +#define RM_PCI_SHMEM_SIZE SZ_128M +#define RM_PCI_SHMEM_HDR_SIZE 0x28 + +#define RM_QUEUE_HDR_MAGIC_NUM_OFF 0x0 +#define RM_IO_SQ_PIDX_OFF 0x0 +#define RM_IO_CQ_PIDX_OFF 0x100 + +#define RM_CMD_ID_MIN 1 +#define RM_CMD_ID_MAX (BIT(17) - 1) +#define RM_CMD_SQ_HDR_OPS_MSK GENMASK(15, 0) +#define RM_CMD_SQ_HDR_SIZE_MSK GENMASK(14, 0) +#define RM_CMD_SQ_SLOT_SIZE SZ_512 +#define RM_CMD_CQ_SLOT_SIZE SZ_16 +#define RM_CMD_CQ_BUFFER_SIZE (1024 * 1024) +#define RM_CMD_CQ_BUFFER_OFFSET 0x0 +#define RM_CMD_LOG_PAGE_TYPE_MASK GENMASK(15, 0) +#define RM_CMD_VMR_CONTROL_MSK GENMASK(10, 8) +#define RM_CMD_VMR_CONTROL_PS_MASK BIT(9) + +#define RM_CMD_WAIT_CONFIG_TIMEOUT msecs_to_jiffies(10 * 1000) +#define RM_CMD_WAIT_DOWNLOAD_TIMEOUT msecs_to_jiffies(300 * 1000) + +#define RM_COMPLETION_TIMER (HZ / 10) +#define RM_HEALTH_CHECK_TIMER (HZ) + +#define RM_INVALID_SLOT 0 + +enum rm_queue_opcode { + RM_QUEUE_OP_LOAD_XCLBIN = 0x0, + RM_QUEUE_OP_GET_LOG_PAGE = 0x8, + RM_QUEUE_OP_LOAD_FW = 0xA, + RM_QUEUE_OP_LOAD_APU_FW = 0xD, + RM_QUEUE_OP_VMR_CONTROL = 0xE, + RM_QUEUE_OP_IDENTIFY = 0x202, +}; + +struct rm_cmd_sq_hdr { + __u16 opcode; + __u16 msg_size; + __u16 id; + __u16 reserved; +} __packed; + +struct rm_cmd_cq_hdr { + __u16 id; + __u16 reserved; +} __packed; + +struct rm_cmd_sq_bin { + __u64 address; + __u32 size; + __u32 reserved1; + __u32 reserved2; + __u32 reserved3; + __u64 reserved4; +} __packed; + +struct rm_cmd_sq_log_page { + __u64 address; + __u32 size; + __u32 reserved1; + __u32 type; + __u32 reserved2; +} __packed; + +struct rm_cmd_sq_ctrl { + __u32 status; +} __packed; + +struct rm_cmd_sq_data { + union { + struct rm_cmd_sq_log_page page; + struct rm_cmd_sq_bin bin; + struct rm_cmd_sq_ctrl ctrl; + }; +} __packed; + +struct rm_cmd_cq_identify { + __u16 major; + __u16 minor; + __u32 reserved; +} __packed; + +struct rm_cmd_cq_log_page { + __u32 len; + __u32 reserved; +} __packed; + +struct rm_cmd_cq_control { + __u16 status; + __u16 reserved1; + __u32 reserved2; +} __packed; + +struct rm_cmd_cq_data { + union { + struct rm_cmd_cq_identify identify; + struct rm_cmd_cq_log_page page; + struct rm_cmd_cq_control ctrl; + __u32 reserved[2]; + }; + __u32 rcode; +} __packed; + +struct rm_cmd_sq_msg { + struct rm_cmd_sq_hdr hdr; + struct rm_cmd_sq_data data; +} __packed; + +struct rm_cmd_cq_msg { + struct rm_cmd_cq_hdr hdr; + struct rm_cmd_cq_data data; +} __packed; + +struct rm_cmd { + struct rm_device *rdev; + struct list_head list; + struct completion executed; + struct rm_cmd_sq_msg sq_msg; + struct rm_cmd_cq_msg cq_msg; + enum rm_queue_opcode opcode; + __u8 *buffer; + ssize_t size; +}; + +enum rm_queue_type { + RM_QUEUE_SQ, + RM_QUEUE_CQ +}; + +enum rm_cmd_log_page_type { + RM_CMD_LOG_PAGE_AXI_TRIP_STATUS = 0x0, + RM_CMD_LOG_PAGE_FW_ID = 0xA, +}; + +struct rm_queue { + enum rm_queue_type type; + __u32 pidx; + __u32 cidx; + __u32 offset; + __u32 data_offset; + __u32 data_size; + struct semaphore data_lock; +}; + +struct rm_queue_header { + __u32 magic; + __u32 version; + __u32 size; + __u32 sq_off; + __u32 sq_slot_size; + __u32 cq_off; + __u32 sq_cidx; + __u32 cq_cidx; +}; + +struct rm_header { + __u32 magic; + __u32 queue_base; + __u32 queue_size; + __u32 status_off; + __u32 status_len; + __u32 log_index; + __u32 log_off; + __u32 log_size; + __u32 data_start; + __u32 data_end; +}; + +struct rm_device { + struct versal_pci_device *vdev; + + struct rm_header rm_metadata; + __u32 queue_buffer_start; + __u32 queue_buffer_size; + __u32 queue_base; + + /* Lock to queue access */ + struct mutex queue; + struct rm_queue sq; + struct rm_queue cq; + __u32 queue_size; + + struct timer_list msg_timer; + struct work_struct msg_monitor; + struct timer_list health_timer; + struct work_struct health_monitor; + struct list_head submitted_cmds; + + __u32 firewall_tripped; +}; + +#endif /* __RM_SERVICE_H */ -- 2.34.1