[+cc Peter, Ingo, Arnaldo, Alexander, Christoph] On Thu, Feb 02, 2017 at 11:06:00AM -0700, Logan Gunthorpe wrote: > Microsemi's "Switchtec" line of PCI switch devices is already well > supported by the kernel with standard PCI switch drivers. However, the > Switchtec device advertises a special management endpoint with a separate > PCI function address and class code. This endpoint enables some additional > functionality which includes: > > * Packet and Byte Counters > * Switch Firmware Upgrades > * Event and Error logs > * Querying port link status > * Custom user firmware commands Would it make any sense to integrate this with the perf tool? It looks like switchtec-user measures bandwidth and latency, which sounds sort of perf-ish. > This patch introduces the switchtec kernel module which provides > PCI driver that exposes a char device. The char device provides > userspace access to this interface through read, write and (optionally) > poll calls. > > A userspace tool and library which utilizes this interface is available > at [1]. This tool takes inspiration (and borrows some code) from > nvme-cli [2]. The tool is largely complete at this time but additional > features may be added in the future. > > [1] https://github.com/sbates130272/switchtec-user > [2] https://github.com/linux-nvme/nvme-cli > > Signed-off-by: Logan Gunthorpe <logang@xxxxxxxxxxxx> > Signed-off-by: Stephen Bates <stephen.bates@xxxxxxxxxxxxx> > --- > MAINTAINERS | 8 + > drivers/pci/Kconfig | 1 + > drivers/pci/Makefile | 1 + > drivers/pci/switch/Kconfig | 13 + > drivers/pci/switch/Makefile | 1 + > drivers/pci/switch/switchtec.c | 1028 ++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 1052 insertions(+) > create mode 100644 drivers/pci/switch/Kconfig > create mode 100644 drivers/pci/switch/Makefile > create mode 100644 drivers/pci/switch/switchtec.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index 5f10c28..9c35198 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -9507,6 +9507,14 @@ S: Maintained > F: Documentation/devicetree/bindings/pci/aardvark-pci.txt > F: drivers/pci/host/pci-aardvark.c > > +PCI DRIVER FOR MICROSEMI SWITCHTEC > +M: Kurt Schwemmer <kurt.schwemmer@xxxxxxxxxxxxx> > +M: Stephen Bates <stephen.bates@xxxxxxxxxxxxx> > +M: Logan Gunthorpe <logang@xxxxxxxxxxxx> > +L: linux-pci@xxxxxxxxxxxxxxx > +S: Maintained > +F: drivers/pci/switch/switchtec* > + > PCI DRIVER FOR NVIDIA TEGRA > M: Thierry Reding <thierry.reding@xxxxxxxxx> > L: linux-tegra@xxxxxxxxxxxxxxx > diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig > index 6555eb7..f72e8c5 100644 > --- a/drivers/pci/Kconfig > +++ b/drivers/pci/Kconfig > @@ -133,3 +133,4 @@ config PCI_HYPERV > > source "drivers/pci/hotplug/Kconfig" > source "drivers/pci/host/Kconfig" > +source "drivers/pci/switch/Kconfig" > diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile > index 8db5079..15b46dd 100644 > --- a/drivers/pci/Makefile > +++ b/drivers/pci/Makefile > @@ -68,3 +68,4 @@ ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG > > # PCI host controller drivers > obj-y += host/ > +obj-y += switch/ > diff --git a/drivers/pci/switch/Kconfig b/drivers/pci/switch/Kconfig > new file mode 100644 > index 0000000..4c49648 > --- /dev/null > +++ b/drivers/pci/switch/Kconfig > @@ -0,0 +1,13 @@ > +menu "PCI switch controller drivers" > + depends on PCI > + > +config PCI_SW_SWITCHTEC > + tristate "MicroSemi Switchtec PCIe Switch Management Driver" > + help > + Enables support for the management interface for the MicroSemi > + Switchtec series of PCIe switches. Supports userspace access > + to submit MRPC commands to the switch via /dev/switchtecX > + devices. See <file:Documentation/switchtec.txt> for more > + information. > + > +endmenu > diff --git a/drivers/pci/switch/Makefile b/drivers/pci/switch/Makefile > new file mode 100644 > index 0000000..37d8cfb > --- /dev/null > +++ b/drivers/pci/switch/Makefile > @@ -0,0 +1 @@ > +obj-$(CONFIG_PCI_SW_SWITCHTEC) += switchtec.o > diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c > new file mode 100644 > index 0000000..e4a4294 > --- /dev/null > +++ b/drivers/pci/switch/switchtec.c > @@ -0,0 +1,1028 @@ > +/* > + * Microsemi Switchtec(tm) PCIe Management Driver > + * Copyright (c) 2017, Microsemi Corporation > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + */ > + > +#include <linux/interrupt.h> > +#include <linux/module.h> > +#include <linux/fs.h> > +#include <linux/uaccess.h> > +#include <linux/poll.h> > +#include <linux/pci.h> > +#include <linux/cdev.h> > +#include <linux/wait.h> > + > +MODULE_DESCRIPTION("Microsemi Switchtec(tm) PCI-E Management Driver"); Nit: s/PCI-E/PCIe/ > +MODULE_VERSION("0.1"); > +MODULE_LICENSE("GPL"); > +MODULE_AUTHOR("Microsemi Corporation"); > + > +static int max_devices = 16; This static initialization is slightly misleading because switchtec_init() immediately sets max_devices to at least 256. > +module_param(max_devices, int, 0644); > +MODULE_PARM_DESC(max_devices, "max number of switchtec device instances"); > + > +static dev_t switchtec_devt; > +static struct class *switchtec_class; > +static DEFINE_IDA(switchtec_minor_ida); > + > +#define MICROSEMI_VENDOR_ID 0x11f8 > +#define MICROSEMI_NTB_CLASSCODE 0x068000 > +#define MICROSEMI_MGMT_CLASSCODE 0x058000 > + > +#define SWITCHTEC_MRPC_PAYLOAD_SIZE 1024 > +#define SWITCHTEC_MAX_PFF_CSR 48 > + > +#define SWITCHTEC_EVENT_OCCURRED BIT(0) > +#define SWITCHTEC_EVENT_CLEAR BIT(0) > +#define SWITCHTEC_EVENT_EN_LOG BIT(1) > +#define SWITCHTEC_EVENT_EN_CLI BIT(2) > +#define SWITCHTEC_EVENT_EN_IRQ BIT(3) > +#define SWITCHTEC_EVENT_FATAL BIT(4) > + > +enum { > + SWITCHTEC_GAS_MRPC_OFFSET = 0x0000, > + SWITCHTEC_GAS_TOP_CFG_OFFSET = 0x1000, > + SWITCHTEC_GAS_SW_EVENT_OFFSET = 0x1800, > + SWITCHTEC_GAS_SYS_INFO_OFFSET = 0x2000, > + SWITCHTEC_GAS_FLASH_INFO_OFFSET = 0x2200, > + SWITCHTEC_GAS_PART_CFG_OFFSET = 0x4000, > + SWITCHTEC_GAS_NTB_OFFSET = 0x10000, > + SWITCHTEC_GAS_PFF_CSR_OFFSET = 0x134000, > +}; > + > +struct mrpc_regs { > + u8 input_data[SWITCHTEC_MRPC_PAYLOAD_SIZE]; > + u8 output_data[SWITCHTEC_MRPC_PAYLOAD_SIZE]; > + u32 cmd; > + u32 status; > + u32 ret_value; > +} __packed; > + > +enum mrpc_status { > + SWITCHTEC_MRPC_STATUS_INPROGRESS = 1, > + SWITCHTEC_MRPC_STATUS_DONE = 2, > + SWITCHTEC_MRPC_STATUS_ERROR = 0xFF, > + SWITCHTEC_MRPC_STATUS_INTERRUPTED = 0x100, > +}; > + > +struct sw_event_regs { > + u64 event_report_ctrl; > + u64 reserved1; > + u64 part_event_bitmap; > + u64 reserved2; > + u32 global_summary; > + u32 reserved3[3]; > + u32 stack_error_event_hdr; > + u32 stack_error_event_data; > + u32 reserved4[4]; > + u32 ppu_error_event_hdr; > + u32 ppu_error_event_data; > + u32 reserved5[4]; > + u32 isp_error_event_hdr; > + u32 isp_error_event_data; > + u32 reserved6[4]; > + u32 sys_reset_event_hdr; > + u32 reserved7[5]; > + u32 fw_exception_hdr; > + u32 reserved8[5]; > + u32 fw_nmi_hdr; > + u32 reserved9[5]; > + u32 fw_non_fatal_hdr; > + u32 reserved10[5]; > + u32 fw_fatal_hdr; > + u32 reserved11[5]; > + u32 twi_mrpc_comp_hdr; > + u32 twi_mrpc_comp_data; > + u32 reserved12[4]; > + u32 twi_mrpc_comp_async_hdr; > + u32 twi_mrpc_comp_async_data; > + u32 reserved13[4]; > + u32 cli_mrpc_comp_hdr; > + u32 cli_mrpc_comp_data; > + u32 reserved14[4]; > + u32 cli_mrpc_comp_async_hdr; > + u32 cli_mrpc_comp_async_data; > + u32 reserved15[4]; > + u32 gpio_interrupt_hdr; > + u32 gpio_interrupt_data; > + u32 reserved16[4]; > +} __packed; > + > +struct sys_info_regs { > + u32 device_id; > + u32 device_version; > + u32 firmware_version; > + u32 reserved1; > + u32 vendor_table_revision; > + u32 table_format_version; > + u32 partition_id; > + u32 cfg_file_fmt_version; > + u32 reserved2[58]; > + char vendor_id[8]; > + char product_id[16]; > + char product_revision[4]; > + char component_vendor[8]; > + u16 component_id; > + u8 component_revision; > +} __packed; > + > +struct flash_info_regs { > + u32 flash_part_map_upd_idx; > + > + struct active_partition_info { > + u32 address; > + u32 build_version; > + u32 build_string; > + } active_img; > + > + struct active_partition_info active_cfg; > + struct active_partition_info inactive_img; > + struct active_partition_info inactive_cfg; > + > + u32 flash_length; > + > + struct partition_info { > + u32 address; > + u32 length; > + } cfg0; > + > + struct partition_info cfg1; > + struct partition_info img0; > + struct partition_info img1; > + struct partition_info nvlog; > + struct partition_info vendor[8]; > +}; > + > +struct ntb_info_regs { > + u8 partition_count; > + u8 partition_id; > + u16 reserved1; > + u64 ep_map; > + u16 requester_id; > +} __packed; > + > +struct part_cfg_regs { > + u32 status; > + u32 state; > + u32 port_cnt; > + u32 usp_port_mode; > + u32 usp_pff_inst_id; > + u32 vep_pff_inst_id; > + u32 dsp_pff_inst_id[47]; > + u32 reserved1[11]; > + u16 vep_vector_number; > + u16 usp_vector_number; > + u32 port_event_bitmap; > + u32 reserved2[3]; > + u32 part_event_summary; > + u32 reserved3[3]; > + u32 part_reset_hdr; > + u32 part_reset_data[5]; > + u32 mrpc_comp_hdr; > + u32 mrpc_comp_data[5]; > + u32 mrpc_comp_async_hdr; > + u32 mrpc_comp_async_data[5]; > + u32 dyn_binding_hdr; > + u32 dyn_binding_data[5]; > + u32 reserved4[159]; > +} __packed; > + > +enum { > + SWITCHTEC_PART_CFG_EVENT_RESET = 1 << 0, > + SWITCHTEC_PART_CFG_EVENT_MRPC_CMP = 1 << 1, > + SWITCHTEC_PART_CFG_EVENT_MRPC_ASYNC_CMP = 1 << 2, > + SWITCHTEC_PART_CFG_EVENT_DYN_PART_CMP = 1 << 3, > +}; > + > +struct pff_csr_regs { > + u16 vendor_id; > + u16 device_id; > + u32 pci_cfg_header[15]; > + u32 pci_cap_region[48]; > + u32 pcie_cap_region[448]; > + u32 indirect_gas_window[128]; > + u32 indirect_gas_window_off; > + u32 reserved[127]; > + u32 pff_event_summary; > + u32 reserved2[3]; > + u32 aer_in_p2p_hdr; > + u32 aer_in_p2p_data[5]; > + u32 aer_in_vep_hdr; > + u32 aer_in_vep_data[5]; > + u32 dpc_hdr; > + u32 dpc_data[5]; > + u32 cts_hdr; > + u32 cts_data[5]; > + u32 reserved3[6]; > + u32 hotplug_hdr; > + u32 hotplug_data[5]; > + u32 ier_hdr; > + u32 ier_data[5]; > + u32 threshold_hdr; > + u32 threshold_data[5]; > + u32 power_mgmt_hdr; > + u32 power_mgmt_data[5]; > + u32 tlp_throttling_hdr; > + u32 tlp_throttling_data[5]; > + u32 force_speed_hdr; > + u32 force_speed_data[5]; > + u32 credit_timeout_hdr; > + u32 credit_timeout_data[5]; > + u32 link_state_hdr; > + u32 link_state_data[5]; > + u32 reserved4[174]; > +} __packed; > + > +struct switchtec_dev { > + struct pci_dev *pdev; > + struct msix_entry msix[4]; > + struct device dev; > + struct cdev cdev; > + unsigned int event_irq; > + > + int partition; > + int partition_count; > + int pff_csr_count; > + char pff_local[SWITCHTEC_MAX_PFF_CSR]; > + > + void __iomem *mmio; > + struct mrpc_regs __iomem *mmio_mrpc; > + struct sw_event_regs __iomem *mmio_sw_event; > + struct sys_info_regs __iomem *mmio_sys_info; > + struct flash_info_regs __iomem *mmio_flash_info; > + struct ntb_info_regs __iomem *mmio_ntb; > + struct part_cfg_regs __iomem *mmio_part_cfg; > + struct part_cfg_regs __iomem *mmio_part_cfg_all; > + struct pff_csr_regs __iomem *mmio_pff_csr; > + > + /* > + * The mrpc mutex must be held when accessing the other > + * mrpc fields > + */ > + struct mutex mrpc_mutex; > + struct list_head mrpc_queue; > + int mrpc_busy; > + struct work_struct mrpc_work; > + struct delayed_work mrpc_timeout; > + wait_queue_head_t event_wq; > + atomic_t event_cnt; Why is this atomic_t? You only do an atomic_set() (in stdev_create()) and atomic_reads() -- no writes other than the initial one. So I'm not sure what value atomic_t brings. > +}; > + > +static struct switchtec_dev *to_stdev(struct device *dev) > +{ > + return container_of(dev, struct switchtec_dev, dev); > +} > + > +struct switchtec_user { > + struct switchtec_dev *stdev; > + > + enum mrpc_state { > + MRPC_IDLE = 0, > + MRPC_QUEUED, > + MRPC_RUNNING, > + MRPC_DONE, > + } state; > + > + struct completion comp; > + struct kref kref; > + struct list_head list; > + > + u32 cmd; > + u32 status; > + u32 return_code; > + size_t data_len; > + unsigned char data[SWITCHTEC_MRPC_PAYLOAD_SIZE]; > + int event_cnt; > +}; > + > +static struct switchtec_user *stuser_create(struct switchtec_dev *stdev) > +{ > + struct switchtec_user *stuser; > + > + stuser = kzalloc(sizeof(*stuser), GFP_KERNEL); > + if (!stuser) > + return ERR_PTR(-ENOMEM); > + > + get_device(&stdev->dev); > + stuser->stdev = stdev; > + kref_init(&stuser->kref); > + INIT_LIST_HEAD(&stuser->list); > + init_completion(&stuser->comp); > + stuser->event_cnt = atomic_read(&stdev->event_cnt); > + > + dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); > + > + return stuser; > +} > + > +static void stuser_free(struct kref *kref) > +{ > + struct switchtec_user *stuser; > + > + stuser = container_of(kref, struct switchtec_user, kref); > + > + dev_dbg(&stuser->stdev->dev, "%s: %p\n", __func__, stuser); > + > + put_device(&stuser->stdev->dev); > + kfree(stuser); > +} > + > +static void stuser_put(struct switchtec_user *stuser) > +{ > + kref_put(&stuser->kref, stuser_free); > +} > + > +static void stuser_set_state(struct switchtec_user *stuser, > + enum mrpc_state state) > +{ > + const char * const state_names[] = { > + [MRPC_IDLE] = "IDLE", > + [MRPC_QUEUED] = "QUEUED", > + [MRPC_RUNNING] = "RUNNING", > + [MRPC_DONE] = "DONE", > + }; > + > + stuser->state = state; > + > + dev_dbg(&stuser->stdev->dev, "stuser state %p -> %s", > + stuser, state_names[state]); > +} > + > +static int stdev_is_alive(struct switchtec_dev *stdev) > +{ > + return stdev->mmio != NULL; > +} > + > +static void mrpc_complete_cmd(struct switchtec_dev *stdev); > + > +static void mrpc_cmd_submit(struct switchtec_dev *stdev) > +{ > + /* requires the mrpc_mutex to already be held when called */ > + > + struct switchtec_user *stuser; > + > + if (stdev->mrpc_busy) > + return; > + > + if (list_empty(&stdev->mrpc_queue)) > + return; > + > + stuser = list_entry(stdev->mrpc_queue.next, struct switchtec_user, > + list); > + > + stuser_set_state(stuser, MRPC_RUNNING); > + stdev->mrpc_busy = 1; > + memcpy_toio(&stdev->mmio_mrpc->input_data, > + stuser->data, stuser->data_len); > + iowrite32(stuser->cmd, &stdev->mmio_mrpc->cmd); > + > + stuser->status = ioread32(&stdev->mmio_mrpc->status); > + if (stuser->status != SWITCHTEC_MRPC_STATUS_INPROGRESS) > + mrpc_complete_cmd(stdev); > + > + schedule_delayed_work(&stdev->mrpc_timeout, > + msecs_to_jiffies(500)); > +} > + > +static void mrpc_queue_cmd(struct switchtec_user *stuser) > +{ > + /* requires the mrpc_mutex to already be held when called */ > + > + struct switchtec_dev *stdev = stuser->stdev; > + > + kref_get(&stuser->kref); > + stuser_set_state(stuser, MRPC_QUEUED); > + init_completion(&stuser->comp); > + list_add_tail(&stuser->list, &stdev->mrpc_queue); > + > + mrpc_cmd_submit(stdev); > +} > + > +static void mrpc_complete_cmd(struct switchtec_dev *stdev) > +{ > + /* requires the mrpc_mutex to already be held when called */ > + struct switchtec_user *stuser; > + > + if (list_empty(&stdev->mrpc_queue)) > + return; > + > + stuser = list_entry(stdev->mrpc_queue.next, struct switchtec_user, > + list); > + > + stuser->status = ioread32(&stdev->mmio_mrpc->status); > + if (stuser->status == SWITCHTEC_MRPC_STATUS_INPROGRESS) > + return; > + > + stuser_set_state(stuser, MRPC_DONE); > + stuser->return_code = 0; > + > + if (stuser->status != SWITCHTEC_MRPC_STATUS_DONE) > + goto out; > + > + stuser->return_code = ioread32(&stdev->mmio_mrpc->ret_value); > + if (stuser->return_code != 0) > + goto out; > + > + memcpy_fromio(stuser->data, &stdev->mmio_mrpc->output_data, > + sizeof(stuser->data)); Apparently you always get 1K of data back, no matter what? > + > +out: > + complete_all(&stuser->comp); > + list_del_init(&stuser->list); > + stuser_put(stuser); > + stdev->mrpc_busy = 0; > + > + mrpc_cmd_submit(stdev); > +} > + > +static void mrpc_event_work(struct work_struct *work) > +{ > + struct switchtec_dev *stdev; > + > + stdev = container_of(work, struct switchtec_dev, mrpc_work); > + > + dev_dbg(&stdev->dev, "%s\n", __func__); > + > + mutex_lock(&stdev->mrpc_mutex); > + cancel_delayed_work(&stdev->mrpc_timeout); > + mrpc_complete_cmd(stdev); > + mutex_unlock(&stdev->mrpc_mutex); > +} > + > +static void mrpc_timeout_work(struct work_struct *work) > +{ > + struct switchtec_dev *stdev; > + u32 status; > + > + stdev = container_of(work, struct switchtec_dev, mrpc_timeout.work); > + > + dev_dbg(&stdev->dev, "%s\n", __func__); > + > + mutex_lock(&stdev->mrpc_mutex); > + > + if (stdev_is_alive(stdev)) { > + status = ioread32(&stdev->mmio_mrpc->status); > + if (status == SWITCHTEC_MRPC_STATUS_INPROGRESS) { > + schedule_delayed_work(&stdev->mrpc_timeout, > + msecs_to_jiffies(500)); > + goto out; > + } > + } > + > + mrpc_complete_cmd(stdev); > + > +out: > + mutex_unlock(&stdev->mrpc_mutex); > +} > + > +static int switchtec_dev_open(struct inode *inode, struct file *filp) > +{ > + struct switchtec_dev *stdev; > + struct switchtec_user *stuser; > + > + stdev = container_of(inode->i_cdev, struct switchtec_dev, cdev); > + > + stuser = stuser_create(stdev); > + if (!stuser) > + return PTR_ERR(stuser); > + > + filp->private_data = stuser; > + nonseekable_open(inode, filp); > + > + dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); > + > + return 0; > +} > + > +static int switchtec_dev_release(struct inode *inode, struct file *filp) > +{ > + struct switchtec_user *stuser = filp->private_data; > + > + stuser_put(stuser); > + > + return 0; > +} > + > +static ssize_t switchtec_dev_write(struct file *filp, const char __user *data, > + size_t size, loff_t *off) > +{ > + struct switchtec_user *stuser = filp->private_data; > + struct switchtec_dev *stdev = stuser->stdev; > + int rc; > + > + if (!stdev_is_alive(stdev)) > + return -ENXIO; What exactly does this protect against? Is it OK if stdev_is_alive() becomes false immediately after we check? > + > + if (size < sizeof(stuser->cmd) || > + size > sizeof(stuser->cmd) + SWITCHTEC_MRPC_PAYLOAD_SIZE) I think I would use "sizeof(stuser->data)" instead of SWITCHTEC_MRPC_PAYLOAD_SIZE so it matches what mrpc_complete_cmd() does. Same below in switchtec_dev_read(). mrpc_mutex doesn't protect the stuser things, does it? If not, you could do this without the gotos. And I think it's a little easier to be confident there are no buffer overruns: if (stuser->state != MRPC_IDLE) return -EBADE; if (size < sizeof(stuser->cmd)) return -EINVAL; rc = copy_from_user(&stuser->cmd, data, sizeof(stuser->cmd)); if (rc) return -EFAULT; size -= sizeof(stuser->cmd); data += sizeof(stuser->cmd); if (size > sizeof(stuser->data)) return -EINVAL; rc = copy_from_user(&stuser->data, data, size); if (rc) return -EFAULT; stuser->data_len = size; if (mutex_lock_interruptible(&stdev->mrpc_mutex)) return -EINTR; mrpc_queue_cmd(stuser); mutex_unlock(&stdev->mrpc_mutex); return size; > + return -EINVAL; > + > + if (mutex_lock_interruptible(&stdev->mrpc_mutex)) > + return -EINTR; > + > + if (stuser->state != MRPC_IDLE) { > + rc = -EBADE; > + goto out; > + } > + > + rc = copy_from_user(&stuser->cmd, data, sizeof(stuser->cmd)); > + if (rc) { > + rc = -EFAULT; > + goto out; > + } > + > + data += sizeof(stuser->cmd); > + rc = copy_from_user(&stuser->data, data, size - sizeof(stuser->cmd)); > + if (rc) { > + rc = -EFAULT; > + goto out; > + } > + > + stuser->data_len = size - sizeof(stuser->cmd); > + > + mrpc_queue_cmd(stuser); > + > + rc = size; > + > +out: > + mutex_unlock(&stdev->mrpc_mutex); > + > + return rc; > +} > + > +static ssize_t switchtec_dev_read(struct file *filp, char __user *data, > + size_t size, loff_t *off) > +{ > + struct switchtec_user *stuser = filp->private_data; > + struct switchtec_dev *stdev = stuser->stdev; > + int rc; > + > + if (!stdev_is_alive(stdev)) > + return -ENXIO; > + > + if (size < sizeof(stuser->cmd) || > + size > sizeof(stuser->cmd) + SWITCHTEC_MRPC_PAYLOAD_SIZE) > + return -EINVAL; > + > + if (stuser->state == MRPC_IDLE) > + return -EBADE; > + > + if (filp->f_flags & O_NONBLOCK) { > + if (!try_wait_for_completion(&stuser->comp)) > + return -EAGAIN; > + } else { > + rc = wait_for_completion_interruptible(&stuser->comp); > + if (rc < 0) > + return rc; > + } > + > + if (mutex_lock_interruptible(&stdev->mrpc_mutex)) > + return -EINTR; > + > + if (stuser->state != MRPC_DONE) > + return -EBADE; > + > + rc = copy_to_user(data, &stuser->return_code, > + sizeof(stuser->return_code)); > + if (rc) { > + rc = -EFAULT; > + goto out; > + } > + > + data += sizeof(stuser->return_code); > + rc = copy_to_user(data, &stuser->data, > + size - sizeof(stuser->return_code)); > + if (rc) { > + rc = -EFAULT; > + goto out; > + } > + > + stuser_set_state(stuser, MRPC_IDLE); > + > + if (stuser->status == SWITCHTEC_MRPC_STATUS_DONE) > + rc = size; > + else if (stuser->status == SWITCHTEC_MRPC_STATUS_INTERRUPTED) > + rc = -ENXIO; > + else > + rc = -EBADMSG; > + > +out: > + mutex_unlock(&stdev->mrpc_mutex); > + > + return rc; > +} > + > +static unsigned int switchtec_dev_poll(struct file *filp, poll_table *wait) > +{ > + struct switchtec_user *stuser = filp->private_data; > + struct switchtec_dev *stdev = stuser->stdev; > + int ret = 0; > + > + poll_wait(filp, &stuser->comp.wait, wait); > + poll_wait(filp, &stdev->event_wq, wait); > + > + if (!stdev_is_alive(stdev)) > + return POLLERR; > + > + if (try_wait_for_completion(&stuser->comp)) > + ret |= POLLIN | POLLRDNORM; > + > + if (stuser->event_cnt != atomic_read(&stdev->event_cnt)) > + ret |= POLLPRI | POLLRDBAND; > + > + return ret; > +} > + > +static const struct file_operations switchtec_fops = { > + .owner = THIS_MODULE, > + .open = switchtec_dev_open, > + .release = switchtec_dev_release, > + .write = switchtec_dev_write, > + .read = switchtec_dev_read, > + .poll = switchtec_dev_poll, > +}; > + > +static void stdev_release(struct device *dev) > +{ > + struct switchtec_dev *stdev = to_stdev(dev); > + > + ida_simple_remove(&switchtec_minor_ida, > + MINOR(dev->devt)); > + kfree(stdev); > +} > + > +static void stdev_unregister(struct switchtec_dev *stdev) > +{ > + cdev_del(&stdev->cdev); > + device_unregister(&stdev->dev); > +} > + > +static struct switchtec_dev *stdev_create(struct pci_dev *pdev) > +{ > + struct switchtec_dev *stdev; > + int minor; > + struct device *dev; > + struct cdev *cdev; > + int rc; > + > + stdev = kzalloc_node(sizeof(*stdev), GFP_KERNEL, > + dev_to_node(&pdev->dev)); > + if (!stdev) > + return ERR_PTR(-ENOMEM); > + > + stdev->pdev = pdev; > + INIT_LIST_HEAD(&stdev->mrpc_queue); > + mutex_init(&stdev->mrpc_mutex); > + stdev->mrpc_busy = 0; > + INIT_WORK(&stdev->mrpc_work, mrpc_event_work); > + INIT_DELAYED_WORK(&stdev->mrpc_timeout, mrpc_timeout_work); > + init_waitqueue_head(&stdev->event_wq); > + atomic_set(&stdev->event_cnt, 0); > + > + minor = ida_simple_get(&switchtec_minor_ida, 0, 0, > + GFP_KERNEL); > + if (minor < 0) > + return ERR_PTR(minor); > + > + dev = &stdev->dev; > + device_initialize(dev); > + dev->devt = MKDEV(MAJOR(switchtec_devt), minor); > + dev->class = switchtec_class; > + dev->parent = &pdev->dev; > + dev->release = stdev_release; > + dev_set_name(dev, "switchtec%d", minor); > + > + cdev = &stdev->cdev; > + cdev_init(cdev, &switchtec_fops); > + cdev->owner = THIS_MODULE; > + cdev->kobj.parent = &dev->kobj; > + > + rc = cdev_add(&stdev->cdev, dev->devt, 1); > + if (rc) > + goto err_cdev; > + > + rc = device_add(dev); > + if (rc) { > + cdev_del(&stdev->cdev); > + put_device(dev); > + return ERR_PTR(rc); > + } > + > + return stdev; > + > +err_cdev: > + ida_simple_remove(&switchtec_minor_ida, minor); > + > + return ERR_PTR(rc); > +} > + > +static irqreturn_t switchtec_event_isr(int irq, void *dev) > +{ > + struct switchtec_dev *stdev = dev; > + u32 reg; > + irqreturn_t ret = IRQ_NONE; > + > + reg = ioread32(&stdev->mmio_part_cfg->mrpc_comp_hdr); > + if (reg & SWITCHTEC_EVENT_OCCURRED) { > + dev_dbg(&stdev->dev, "%s: mrpc comp\n", __func__); > + ret = IRQ_HANDLED; > + schedule_work(&stdev->mrpc_work); > + iowrite32(reg, &stdev->mmio_part_cfg->mrpc_comp_hdr); > + } > + > + return ret; > +} > + > +static int switchtec_init_msix_isr(struct switchtec_dev *stdev) > +{ > + struct pci_dev *pdev = stdev->pdev; > + int rc, i, msix_count, node; > + > + node = dev_to_node(&pdev->dev); Unused? > + for (i = 0; i < ARRAY_SIZE(stdev->msix); ++i) > + stdev->msix[i].entry = i; > + > + msix_count = pci_enable_msix_range(pdev, stdev->msix, 1, > + ARRAY_SIZE(stdev->msix)); > + if (msix_count < 0) > + return msix_count; Maybe this could be simplified by using pci_alloc_irq_vectors()? > + stdev->event_irq = ioread32(&stdev->mmio_part_cfg->vep_vector_number); > + if (stdev->event_irq < 0 || stdev->event_irq >= msix_count) { > + rc = -EFAULT; > + goto err_msix_request; > + } Not sure what this is doing, but you immediately overwrite stdev->event_irq below. If you're using it as just a temporary above for doing some validation, I would just use a local variable to avoid the connection with stdev->event_irq. > + stdev->event_irq = stdev->msix[stdev->event_irq].vector; Oh, I guess you allocate several MSI-X vectors, but you only actually use one of them? Why is that? I was confused about why you allocate several vectors, but there's only one devm_request_irq() call below. > + dev_dbg(&stdev->dev, "Using msix interrupts: event_irq=%d\n", > + stdev->event_irq); > + > + return 0; > + > +err_msix_request: > + pci_disable_msix(pdev); > + return rc; > +} > + > +static int switchtec_init_msi_isr(struct switchtec_dev *stdev) > +{ > + int rc; > + struct pci_dev *pdev = stdev->pdev; > + > + /* Try to set up msi irq */ > + rc = pci_enable_msi_range(pdev, 1, 4); > + if (rc < 0) > + return rc; > + > + stdev->event_irq = ioread32(&stdev->mmio_part_cfg->vep_vector_number); > + if (stdev->event_irq < 0 || stdev->event_irq >= 4) { > + rc = -EFAULT; > + goto err_msi_request; > + } > + > + stdev->event_irq = pdev->irq + stdev->event_irq; > + dev_dbg(&stdev->dev, "Using msi interrupts: event_irq=%d\n", > + stdev->event_irq); > + > + return 0; > + > +err_msi_request: > + pci_disable_msi(pdev); > + return rc; > +} > + > +static int switchtec_init_isr(struct switchtec_dev *stdev) > +{ > + int rc; > + > + rc = switchtec_init_msix_isr(stdev); > + if (rc) > + rc = switchtec_init_msi_isr(stdev); > + > + if (rc) > + return rc; > + > + rc = devm_request_irq(&stdev->pdev->dev, stdev->event_irq, > + switchtec_event_isr, 0, KBUILD_MODNAME, stdev); > + > + return rc; > +} > + > +static void switchtec_deinit_isr(struct switchtec_dev *stdev) > +{ > + devm_free_irq(&stdev->pdev->dev, stdev->event_irq, stdev); > + pci_disable_msix(stdev->pdev); > + pci_disable_msi(stdev->pdev); > +} > + > +static void init_pff(struct switchtec_dev *stdev) > +{ > + int i; > + u32 reg; > + struct part_cfg_regs *pcfg = stdev->mmio_part_cfg; > + > + for (i = 0; i < SWITCHTEC_MAX_PFF_CSR; i++) { > + reg = ioread16(&stdev->mmio_pff_csr[i].vendor_id); > + if (reg != MICROSEMI_VENDOR_ID) > + break; > + } > + > + stdev->pff_csr_count = i; > + > + reg = ioread32(&pcfg->usp_pff_inst_id); > + if (reg < SWITCHTEC_MAX_PFF_CSR) > + stdev->pff_local[reg] = 1; > + > + reg = ioread32(&pcfg->vep_pff_inst_id); > + if (reg < SWITCHTEC_MAX_PFF_CSR) > + stdev->pff_local[reg] = 1; > + > + for (i = 0; i < ARRAY_SIZE(pcfg->dsp_pff_inst_id); i++) { > + reg = ioread32(&pcfg->dsp_pff_inst_id[i]); > + if (reg < SWITCHTEC_MAX_PFF_CSR) > + stdev->pff_local[reg] = 1; > + } > +} > + > +static int switchtec_init_pci(struct switchtec_dev *stdev, > + struct pci_dev *pdev) > +{ > + int rc; > + > + rc = pcim_enable_device(pdev); > + if (rc) > + return rc; > + > + rc = pcim_iomap_regions(pdev, 0x1, KBUILD_MODNAME); > + if (rc) > + return rc; > + > + pci_set_master(pdev); > + > + stdev->mmio = pcim_iomap_table(pdev)[0]; > + stdev->mmio_mrpc = stdev->mmio + SWITCHTEC_GAS_MRPC_OFFSET; > + stdev->mmio_sw_event = stdev->mmio + SWITCHTEC_GAS_SW_EVENT_OFFSET; > + stdev->mmio_sys_info = stdev->mmio + SWITCHTEC_GAS_SYS_INFO_OFFSET; > + stdev->mmio_flash_info = stdev->mmio + SWITCHTEC_GAS_FLASH_INFO_OFFSET; > + stdev->mmio_ntb = stdev->mmio + SWITCHTEC_GAS_NTB_OFFSET; > + stdev->partition = ioread8(&stdev->mmio_ntb->partition_id); > + stdev->partition_count = ioread8(&stdev->mmio_ntb->partition_count); > + stdev->mmio_part_cfg_all = stdev->mmio + SWITCHTEC_GAS_PART_CFG_OFFSET; > + stdev->mmio_part_cfg = &stdev->mmio_part_cfg_all[stdev->partition]; > + stdev->mmio_pff_csr = stdev->mmio + SWITCHTEC_GAS_PFF_CSR_OFFSET; > + > + init_pff(stdev); > + > + iowrite32(SWITCHTEC_EVENT_CLEAR | > + SWITCHTEC_EVENT_EN_IRQ, Does this enable the device to generate IRQs? You haven't connected the ISR yet (but I guess you probably also haven't told the device to do anything that would actually generate an IRQ). > + &stdev->mmio_part_cfg->mrpc_comp_hdr); > + > + pci_set_drvdata(pdev, stdev); > + > + return 0; > +} > + > +static void switchtec_deinit_pci(struct switchtec_dev *stdev) > +{ > + struct pci_dev *pdev = stdev->pdev; > + > + stdev->mmio = NULL; > + pci_clear_master(pdev); > + pci_set_drvdata(pdev, NULL); > +} > + > +static int switchtec_pci_probe(struct pci_dev *pdev, > + const struct pci_device_id *id) > +{ > + struct switchtec_dev *stdev; > + int rc; > + > + stdev = stdev_create(pdev); > + if (!stdev) > + return PTR_ERR(stdev); > + > + rc = switchtec_init_pci(stdev, pdev); > + if (rc) > + goto err_init_pci; > + > + rc = switchtec_init_isr(stdev); > + if (rc) { > + dev_err(&stdev->dev, "failed to init isr.\n"); > + goto err_init_isr; > + } > + > + dev_info(&stdev->dev, "Management device registered.\n"); > + > + return 0; > + > +err_init_isr: > + switchtec_deinit_pci(stdev); > +err_init_pci: > + stdev_unregister(stdev); > + return rc; > +} > + > +static void switchtec_pci_remove(struct pci_dev *pdev) > +{ > + struct switchtec_dev *stdev = pci_get_drvdata(pdev); > + > + switchtec_deinit_isr(stdev); > + switchtec_deinit_pci(stdev); > + stdev_unregister(stdev); > +} > + > +#define SWITCHTEC_PCI_DEVICE(device_id) \ > + { \ > + .vendor = MICROSEMI_VENDOR_ID, \ > + .device = device_id, \ > + .subvendor = PCI_ANY_ID, \ > + .subdevice = PCI_ANY_ID, \ > + .class = MICROSEMI_MGMT_CLASSCODE, \ > + .class_mask = 0xFFFFFFFF, \ > + }, \ > + { \ > + .vendor = MICROSEMI_VENDOR_ID, \ > + .device = device_id, \ > + .subvendor = PCI_ANY_ID, \ > + .subdevice = PCI_ANY_ID, \ > + .class = MICROSEMI_NTB_CLASSCODE, \ > + .class_mask = 0xFFFFFFFF, \ > + } > + > +static const struct pci_device_id switchtec_pci_tbl[] = { > + SWITCHTEC_PCI_DEVICE(0x8531), //PFX 24xG3 > + SWITCHTEC_PCI_DEVICE(0x8532), //PFX 32xG3 > + SWITCHTEC_PCI_DEVICE(0x8533), //PFX 48xG3 > + SWITCHTEC_PCI_DEVICE(0x8534), //PFX 64xG3 > + SWITCHTEC_PCI_DEVICE(0x8535), //PFX 80xG3 > + SWITCHTEC_PCI_DEVICE(0x8536), //PFX 96xG3 > + SWITCHTEC_PCI_DEVICE(0x8543), //PSX 48xG3 > + SWITCHTEC_PCI_DEVICE(0x8544), //PSX 64xG3 > + SWITCHTEC_PCI_DEVICE(0x8545), //PSX 80xG3 > + SWITCHTEC_PCI_DEVICE(0x8546), //PSX 96xG3 > + {0} > +}; > +MODULE_DEVICE_TABLE(pci, switchtec_pci_tbl); > + > +static struct pci_driver switchtec_pci_driver = { > + .name = KBUILD_MODNAME, > + .id_table = switchtec_pci_tbl, > + .probe = switchtec_pci_probe, > + .remove = switchtec_pci_remove, > +}; > + > +static int __init switchtec_init(void) > +{ > + int rc; > + > + max_devices = max(max_devices, 256); > + rc = alloc_chrdev_region(&switchtec_devt, 0, max_devices, > + "switchtec"); > + if (rc) > + return rc; > + > + switchtec_class = class_create(THIS_MODULE, "switchtec"); > + if (IS_ERR(switchtec_class)) { > + rc = PTR_ERR(switchtec_class); > + goto err_create_class; > + } > + > + rc = pci_register_driver(&switchtec_pci_driver); > + if (rc) > + goto err_pci_register; > + > + pr_info(KBUILD_MODNAME ": loaded.\n"); > + > + return 0; > + > +err_pci_register: > + class_destroy(switchtec_class); > + > +err_create_class: > + unregister_chrdev_region(switchtec_devt, max_devices); > + > + return rc; > +} > +module_init(switchtec_init); > + > +static void __exit switchtec_exit(void) > +{ > + pci_unregister_driver(&switchtec_pci_driver); > + class_destroy(switchtec_class); > + unregister_chrdev_region(switchtec_devt, max_devices); > + ida_destroy(&switchtec_minor_ida); > + > + pr_info(KBUILD_MODNAME ": unloaded.\n"); > +} > +module_exit(switchtec_exit); > -- > 2.1.4