Hi, On 13.03.2023 18:37, Jeffrey Hugo wrote: > On 3/13/2023 7:21 AM, Jacek Lawrynowicz wrote: >> Hi, >> >> On 06.03.2023 22:33, Jeffrey Hugo wrote: >>> Add the QAIC driver uapi file and core driver file that binds to the PCIe >>> device. The core driver file also creates the accel device and manages >>> all the interconnections between the different parts of the driver. >>> >>> The driver can be built as a module. If so, it will be called "qaic.ko". >>> >>> Signed-off-by: Jeffrey Hugo <quic_jhugo@xxxxxxxxxxx> >>> Reviewed-by: Carl Vanderlip <quic_carlv@xxxxxxxxxxx> >>> Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@xxxxxxxxxxx> >>> --- >>> drivers/accel/qaic/qaic.h | 282 ++++++++++++++++++ >>> drivers/accel/qaic/qaic_drv.c | 648 ++++++++++++++++++++++++++++++++++++++++++ >>> include/uapi/drm/qaic_accel.h | 397 ++++++++++++++++++++++++++ >>> 3 files changed, 1327 insertions(+) >>> create mode 100644 drivers/accel/qaic/qaic.h >>> create mode 100644 drivers/accel/qaic/qaic_drv.c >>> create mode 100644 include/uapi/drm/qaic_accel.h >>> >>> diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h >>> new file mode 100644 >>> index 0000000..996a68f >>> --- /dev/null >>> +++ b/drivers/accel/qaic/qaic.h >>> @@ -0,0 +1,282 @@ >>> +/* SPDX-License-Identifier: GPL-2.0-only >>> + * >>> + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. >>> + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. >>> + */ >>> + >>> +#ifndef _QAIC_H_ >>> +#define _QAIC_H_ >>> + >>> +#include <linux/interrupt.h> >>> +#include <linux/kref.h> >>> +#include <linux/mhi.h> >>> +#include <linux/mutex.h> >>> +#include <linux/pci.h> >>> +#include <linux/spinlock.h> >>> +#include <linux/srcu.h> >>> +#include <linux/wait.h> >>> +#include <linux/workqueue.h> >>> +#include <drm/drm_device.h> >>> +#include <drm/drm_gem.h> >>> + >>> +#define QAIC_DBC_BASE SZ_128K >>> +#define QAIC_DBC_SIZE SZ_4K >>> + >>> +#define QAIC_NO_PARTITION -1 >>> + >>> +#define QAIC_DBC_OFF(i) ((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE) >>> + >>> +#define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base) >>> + >>> +extern bool poll_datapath; >>> + >>> +struct qaic_user { >>> + /* Uniquely identifies this user for the device */ >>> + int handle; >>> + struct kref ref_count; >>> + /* Char device opened by this user */ >>> + struct qaic_drm_device *qddev; >>> + /* Node in list of users that opened this drm device */ >>> + struct list_head node; >>> + /* SRCU used to synchronize this user during cleanup */ >>> + struct srcu_struct qddev_lock; >>> + atomic_t chunk_id; >>> +}; >>> + >>> +struct dma_bridge_chan { >>> + /* Pointer to device strcut maintained by driver */ >>> + struct qaic_device *qdev; >>> + /* ID of this DMA bridge channel(DBC) */ >>> + unsigned int id; >>> + /* Synchronizes access to xfer_list */ >>> + spinlock_t xfer_lock; >>> + /* Base address of request queue */ >>> + void *req_q_base; >>> + /* Base address of response queue */ >>> + void *rsp_q_base; >>> + /* >>> + * Base bus address of request queue. Response queue bus address can be >>> + * calculated by adding request queue size to this variable >>> + */ >>> + dma_addr_t dma_addr; >>> + /* Total size of request and response queue in byte */ >>> + u32 total_size; >>> + /* Capacity of request/response queue */ >>> + u32 nelem; >>> + /* The user that opened this DBC */ >>> + struct qaic_user *usr; >>> + /* >>> + * Request ID of next memory handle that goes in request queue. One >>> + * memory handle can enqueue more than one request elements, all >>> + * this requests that belong to same memory handle have same request ID >>> + */ >>> + u16 next_req_id; >>> + /* true: DBC is in use; false: DBC not in use */ >>> + bool in_use; >>> + /* >>> + * Base address of device registers. Used to read/write request and >>> + * response queue's head and tail pointer of this DBC. >>> + */ >>> + void __iomem *dbc_base; >>> + /* Head of list where each node is a memory handle queued in request queue */ >>> + struct list_head xfer_list; >>> + /* Synchronizes DBC readers during cleanup */ >>> + struct srcu_struct ch_lock; >>> + /* >>> + * When this DBC is released, any thread waiting on this wait queue is >>> + * woken up >>> + */ >>> + wait_queue_head_t dbc_release; >>> + /* Head of list where each node is a bo associated with this DBC */ >>> + struct list_head bo_lists; >>> + /* The irq line for this DBC. Used for polling */ >>> + unsigned int irq; >>> + /* Polling work item to simulate interrupts */ >>> + struct work_struct poll_work; >>> +}; >>> + >>> +struct qaic_device { >>> + /* Pointer to base PCI device struct of our physical device */ >>> + struct pci_dev *pdev; >>> + /* Req. ID of request that will be queued next in MHI control device */ >>> + u32 next_seq_num; >>> + /* Base address of bar 0 */ >>> + void __iomem *bar_0; >>> + /* Base address of bar 2 */ >>> + void __iomem *bar_2; >>> + /* Controller structure for MHI devices */ >>> + struct mhi_controller *mhi_cntl; >>> + /* MHI control channel device */ >>> + struct mhi_device *cntl_ch; >>> + /* List of requests queued in MHI control device */ >>> + struct list_head cntl_xfer_list; >>> + /* Synchronizes MHI control device transactions and its xfer list */ >>> + struct mutex cntl_mutex; >>> + /* Array of DBC struct of this device */ >>> + struct dma_bridge_chan *dbc; >>> + /* Work queue for tasks related to MHI control device */ >>> + struct workqueue_struct *cntl_wq; >>> + /* Synchronizes all the users of device during cleanup */ >>> + struct srcu_struct dev_lock; >>> + /* true: Device under reset; false: Device not under reset */ >>> + bool in_reset; >>> + /* >>> + * true: A tx MHI transaction has failed and a rx buffer is still queued >>> + * in control device. Such a buffer is considered lost rx buffer >>> + * false: No rx buffer is lost in control device >>> + */ >>> + bool cntl_lost_buf; >>> + /* Maximum number of DBC supported by this device */ >>> + u32 num_dbc; >>> + /* Reference to the drm_device for this device when it is created */ >>> + struct qaic_drm_device *qddev; >>> + /* Generate the CRC of a control message */ >>> + u32 (*gen_crc)(void *msg); >>> + /* Validate the CRC of a control message */ >>> + bool (*valid_crc)(void *msg); >>> +}; >>> + >>> +struct qaic_drm_device { >>> + /* Pointer to the root device struct driven by this driver */ >>> + struct qaic_device *qdev; >>> + /* >>> + * The physical device can be partition in number of logical devices. >>> + * And each logical device is given a partition id. This member stores >>> + * that id. QAIC_NO_PARTITION is a sentinel used to mark that this drm >>> + * device is the actual physical device >>> + */ >>> + s32 partition_id; >>> + /* Pointer to the drm device struct of this drm device */ >>> + struct drm_device *ddev; >>> + /* Head in list of users who have opened this drm device */ >>> + struct list_head users; >>> + /* Synchronizes access to users list */ >>> + struct mutex users_mutex; >>> +}; >>> + >>> +struct qaic_bo { >>> + struct drm_gem_object base; >>> + /* Scatter/gather table for allocate/imported BO */ >>> + struct sg_table *sgt; >>> + /* BO size requested by user. GEM object might be bigger in size. */ >>> + u64 size; >>> + /* Head in list of slices of this BO */ >>> + struct list_head slices; >>> + /* Total nents, for all slices of this BO */ >>> + int total_slice_nents; >>> + /* >>> + * Direction of transfer. It can assume only two value DMA_TO_DEVICE and >>> + * DMA_FROM_DEVICE. >>> + */ >>> + int dir; >>> + /* The pointer of the DBC which operates on this BO */ >>> + struct dma_bridge_chan *dbc; >>> + /* Number of slice that belongs to this buffer */ >>> + u32 nr_slice; >>> + /* Number of slice that have been transferred by DMA engine */ >>> + u32 nr_slice_xfer_done; >>> + /* true = BO is queued for execution, true = BO is not queued */ >>> + bool queued; >>> + /* >>> + * If true then user has attached slicing information to this BO by >>> + * calling DRM_IOCTL_QAIC_ATTACH_SLICE_BO ioctl. >>> + */ >>> + bool sliced; >>> + /* Request ID of this BO if it is queued for execution */ >>> + u16 req_id; >>> + /* Handle assigned to this BO */ >>> + u32 handle; >>> + /* Wait on this for completion of DMA transfer of this BO */ >>> + struct completion xfer_done; >>> + /* >>> + * Node in linked list where head is dbc->xfer_list. >>> + * This link list contain BO's that are queued for DMA transfer. >>> + */ >>> + struct list_head xfer_list; >>> + /* >>> + * Node in linked list where head is dbc->bo_lists. >>> + * This link list contain BO's that are associated with the DBC it is >>> + * linked to. >>> + */ >>> + struct list_head bo_list; >>> + struct { >>> + /* >>> + * Latest timestamp(ns) at which kernel received a request to >>> + * execute this BO >>> + */ >>> + u64 req_received_ts; >>> + /* >>> + * Latest timestamp(ns) at which kernel enqueued requests of >>> + * this BO for execution in DMA queue >>> + */ >>> + u64 req_submit_ts; >>> + /* >>> + * Latest timestamp(ns) at which kernel received a completion >>> + * interrupt for requests of this BO >>> + */ >>> + u64 req_processed_ts; >>> + /* >>> + * Number of elements already enqueued in DMA queue before >>> + * enqueuing requests of this BO >>> + */ >>> + u32 queue_level_before; >>> + } perf_stats; >>> + >>> +}; >>> + >>> +struct bo_slice { >>> + /* Mapped pages */ >>> + struct sg_table *sgt; >>> + /* Number of requests required to queue in DMA queue */ >>> + int nents; >>> + /* See enum dma_data_direction */ >>> + int dir; >>> + /* Actual requests that will be copied in DMA queue */ >>> + struct dbc_req *reqs; >>> + struct kref ref_count; >>> + /* true: No DMA transfer required */ >>> + bool no_xfer; >>> + /* Pointer to the parent BO handle */ >>> + struct qaic_bo *bo; >>> + /* Node in list of slices maintained by parent BO */ >>> + struct list_head slice; >>> + /* Size of this slice in bytes */ >>> + u64 size; >>> + /* Offset of this slice in buffer */ >>> + u64 offset; >>> +}; >>> + >>> +int get_dbc_req_elem_size(void); >>> +int get_dbc_rsp_elem_size(void); >>> +int get_cntl_version(struct qaic_device *qdev, struct qaic_user *usr, u16 *major, u16 *minor); >>> +int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); >>> +void qaic_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result); >>> + >>> +void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result); >>> + >>> +int qaic_control_open(struct qaic_device *qdev); >>> +void qaic_control_close(struct qaic_device *qdev); >>> +void qaic_release_usr(struct qaic_device *qdev, struct qaic_user *usr); >>> + >>> +irqreturn_t dbc_irq_threaded_fn(int irq, void *data); >>> +irqreturn_t dbc_irq_handler(int irq, void *data); >>> +int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr); >>> +void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr); >>> +void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id); >>> +void release_dbc(struct qaic_device *qdev, u32 dbc_id); >>> + >>> +void wake_all_cntl(struct qaic_device *qdev); >>> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset); >>> + >>> +struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); >>> + >>> +int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); >>> +int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); >>> +int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); >>> +int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); >>> +int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); >>> +int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); >>> +int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); >>> +void irq_polling_work(struct work_struct *work); >>> + >>> +#endif /* _QAIC_H_ */ >>> diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c >>> new file mode 100644 >>> index 0000000..9172799 >>> --- /dev/null >>> +++ b/drivers/accel/qaic/qaic_drv.c >>> @@ -0,0 +1,648 @@ >>> +// SPDX-License-Identifier: GPL-2.0-only >>> + >>> +/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ >>> +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */ >>> + >>> +#include <linux/delay.h> >>> +#include <linux/dma-mapping.h> >>> +#include <linux/idr.h> >>> +#include <linux/interrupt.h> >>> +#include <linux/list.h> >>> +#include <linux/kref.h> >>> +#include <linux/mhi.h> >>> +#include <linux/module.h> >>> +#include <linux/msi.h> >>> +#include <linux/mutex.h> >>> +#include <linux/pci.h> >>> +#include <linux/spinlock.h> >>> +#include <linux/workqueue.h> >>> +#include <linux/wait.h> >>> +#include <drm/drm_accel.h> >>> +#include <drm/drm_drv.h> >>> +#include <drm/drm_file.h> >>> +#include <drm/drm_gem.h> >>> +#include <drm/drm_ioctl.h> >>> +#include <uapi/drm/qaic_accel.h> >>> + >>> +#include "mhi_controller.h" >>> +#include "mhi_qaic_ctrl.h" >>> +#include "qaic.h" >>> + >>> +MODULE_IMPORT_NS(DMA_BUF); >>> + >>> +#define PCI_DEV_AIC100 0xa100 >>> +#define QAIC_NAME "qaic" >>> +#define QAIC_DESC "Qualcomm Cloud AI Accelerators" >>> +#define CNTL_MAJOR 5 >>> +#define CNTL_MINOR 0 >>> + >>> +static unsigned int datapath_polling; >>> +module_param(datapath_polling, uint, 0400); >>> +bool poll_datapath; >> >> You can define this param as: >> static bool poll_datapath; >> module_param_named(poll_datapath, datapath_polling, bool, 0400); >> >> You woudn't have to set poll_datapath manually. >> I would also consider using a single name for the param and adding documentation using MODULE_PARM_DESC(). > > poll_datapath is referenced in a different file, therefore it cannot be static. > > Having two variables is intentional. The parameter only takes effect at module load. This file reads datapath_polling at module load, copies the value, and the driver uses the cached value (patch 5) during operation. If during operation, the module parameter changes value, that doesn't take effect. > > Your suggestion appears to make the cached variable, and the module parameter the same. Which means if the module parameter changes value during driver operation, it will take effect immediately. This seems like a "footgun", giving the user an oppertunity to break things. > > I think datapath_polling can be defined as a bool, and the module_param call can use the bool type, but the two variables remain. > > I could make poll_datapath device local, but then we'd have N copies of it, instead of just 1, which feels like a waste of memory. > > Using MODULE_PARAM_DESC feels like a good idea. Will do. The value of the poll_datapath cannot be changed at runtime as have read-only permissions (0400) set in sysfs.