Sample driver creates mdev device that simulates serial port over PCI card. Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx> Signed-off-by: Neo Jia <cjia@xxxxxxxxxx> Change-Id: I857f8f12f8b275f2498dfe8c628a5cdc7193b1b2 --- Documentation/mdev/Makefile | 14 + Documentation/mdev/mtty.c | 1202 +++++++++++++++++++++ Documentation/{ => mdev}/vfio-mediated-device.txt | 61 ++ 3 files changed, 1277 insertions(+) create mode 100644 Documentation/mdev/Makefile create mode 100644 Documentation/mdev/mtty.c rename Documentation/{ => mdev}/vfio-mediated-device.txt (78%) diff --git a/Documentation/mdev/Makefile b/Documentation/mdev/Makefile new file mode 100644 index 000000000000..ff6f8a324c85 --- /dev/null +++ b/Documentation/mdev/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for mtty.c file +# +KDIR:=/lib/modules/$(shell uname -r)/build + +obj-m:=mtty.o + +default: + $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules + +clean: + @rm -rf .*.cmd *.mod.c *.o *.ko .tmp* + @rm -rf Module.* Modules.* modules.* .tmp_versions + diff --git a/Documentation/mdev/mtty.c b/Documentation/mdev/mtty.c new file mode 100644 index 000000000000..ce29d54b4275 --- /dev/null +++ b/Documentation/mdev/mtty.c @@ -0,0 +1,1202 @@ +/* + * Mediated virtual PCI serial host device driver + * + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Author: Neo Jia <cjia@xxxxxxxxxx> + * Kirti Wankhede <kwankhede@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Sample driver that creates mdev device that simulates serial port over PCI + * card. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/cdev.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/uuid.h> +#include <linux/vfio.h> +#include <linux/iommu.h> +#include <linux/sysfs.h> +#include <linux/ctype.h> +#include <linux/file.h> +#include <linux/mdev.h> +#include <linux/pci.h> +#include <linux/serial.h> +#include <uapi/linux/serial_reg.h> +/* + * #defines + */ + +#define VERSION_STRING "0.1" +#define DRIVER_AUTHOR "NVIDIA Corporation" + +#define MTTY_CLASS_NAME "mtty" + +#define MTTY_NAME "mtty" + +#define MTTY_CONFIG_SPACE_SIZE 0xff +#define MTTY_IO_BAR_SIZE 0x8 +#define MTTY_MMIO_BAR_SIZE 0x100000 + +#define STORE_LE16(addr, val) (*(u16 *)addr = val) +#define STORE_LE32(addr, val) (*(u32 *)addr = val) + +#define MAX_FIFO_SIZE 16 + +#define CIRCULAR_BUF_INC_IDX(idx) (idx = (idx + 1) & (MAX_FIFO_SIZE - 1)) + +#define MTTY_VFIO_PCI_OFFSET_SHIFT 40 + +#define MTTY_VFIO_PCI_OFFSET_TO_INDEX(off) (off >> MTTY_VFIO_PCI_OFFSET_SHIFT) +#define MTTY_VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << MTTY_VFIO_PCI_OFFSET_SHIFT) +#define MTTY_VFIO_PCI_OFFSET_MASK (((u64)(1) << MTTY_VFIO_PCI_OFFSET_SHIFT) - 1) + + +/* + * Global Structures + */ + +struct mtty_dev { + dev_t vd_devt; + struct class *vd_class; + struct cdev vd_cdev; + struct idr vd_idr; + struct device dev; +} mtty_dev; + +struct mdev_region_info { + u64 start; + u64 phys_start; + u32 size; + u64 vfio_offset; +}; + +#if defined(DEBUG_REGS) +const char *wr_reg[] = { + "TX", + "IER", + "FCR", + "LCR", + "MCR", + "LSR", + "MSR", + "SCR" +}; + +const char *rd_reg[] = { + "RX", + "IER", + "IIR", + "LCR", + "MCR", + "LSR", + "MSR", + "SCR" +}; +#endif + +// loop back buffer +struct rxtx { + u8 fifo[MAX_FIFO_SIZE]; + u8 head, tail; + u8 count; +}; + +struct serial_port { + u8 uart_reg[8]; /* 8 registers */ + struct rxtx rxtx; /* loop back buffer */ + bool dlab; + bool overrun; + u16 divisor; + u8 fcr; /* FIFO control register */ + u8 max_fifo_size; + u8 intr_trigger_level; /* interrupt trigger level */ +}; + +/* State of each mdev device */ +struct mdev_state { + int irq_fd; + struct file *intx_file; + struct file *msi_file; + int irq_index; + u8 *vconfig; + struct mutex ops_lock; + struct mdev_device *mdev; + struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS]; + u32 bar_mask[VFIO_PCI_NUM_REGIONS]; + struct list_head next; + struct serial_port s[2]; + struct mutex rxtx_lock; +}; + +struct mutex mdev_list_lock; +struct list_head mdev_devices_list; + +static struct file_operations vd_fops = { + .owner = THIS_MODULE, +}; + +/* function prototypes */ + +static int mtty_dev_mdev_trigger_interrupt(uuid_le uuid); + +/* Helper functions */ +static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid) +{ + struct mdev_state *mds; + + list_for_each_entry(mds, &mdev_devices_list, next) { + if (uuid_le_cmp(mds->mdev->uuid, uuid) == 0) + return mds; + } + + return NULL; +} + +void dump_buffer(char *buf, uint32_t count) +{ +#if defined(DEBUG) + int i; + + pr_info("Buffer: \n"); + for (i = 0; i < count; i++) { + printk(KERN_INFO "%2x ", *(buf + i)); + if ((i + 1) % 16 == 0) + pr_info("\n"); + } +#endif +} + +static void mtty_create_config_space(struct mdev_state *mdev_state) +{ + /* PCI dev ID */ + STORE_LE32((u32 *) &mdev_state->vconfig[0x0], 0x32534348); + + /* Control: I/O+, Mem-, BusMaster- */ + STORE_LE16((u16 *) &mdev_state->vconfig[0x4], 0x0001); + + /* Status: capabilities list absent */ + STORE_LE16((u16 *) &mdev_state->vconfig[0x6], 0x0200); + + /* Rev ID */ + mdev_state->vconfig[0x8] = 0x10; + + /* programming interface class : 16550-compatible serial controller */ + mdev_state->vconfig[0x9] = 0x02; + + /* Sub class : 00 */ + mdev_state->vconfig[0xa] = 0x00; + + /* Base class : Simple Communication controllers */ + mdev_state->vconfig[0xb] = 0x07; + + /* base address registers */ + /* BAR0: IO space */ + STORE_LE32((u32 *) &mdev_state->vconfig[0x10], 0x000001); + mdev_state->bar_mask[0] = ~(MTTY_IO_BAR_SIZE) + 1; + + /* BAR1: IO space */ + STORE_LE32((u32 *) &mdev_state->vconfig[0x14], 0x000001); + mdev_state->bar_mask[1] = ~(MTTY_IO_BAR_SIZE) + 1; + + /* Subsystem ID */ + STORE_LE32((u32 *) &mdev_state->vconfig[0x2c], 0x32534348); + + mdev_state->vconfig[0x34] = 0x00; /* Cap Ptr */ + mdev_state->vconfig[0x3d] = 0x01; /* interrupt pin (INTA#) */ + + /* Vendor specific data */ + mdev_state->vconfig[0x40] = 0x23; + mdev_state->vconfig[0x43] = 0x80; + mdev_state->vconfig[0x44] = 0x23; + mdev_state->vconfig[0x48] = 0x23; + mdev_state->vconfig[0x4c] = 0x23; + + mdev_state->vconfig[0x60] = 0x50; + mdev_state->vconfig[0x61] = 0x43; + mdev_state->vconfig[0x62] = 0x49; + mdev_state->vconfig[0x63] = 0x20; + mdev_state->vconfig[0x64] = 0x53; + mdev_state->vconfig[0x65] = 0x65; + mdev_state->vconfig[0x66] = 0x72; + mdev_state->vconfig[0x67] = 0x69; + mdev_state->vconfig[0x68] = 0x61; + mdev_state->vconfig[0x69] = 0x6c; + mdev_state->vconfig[0x6a] = 0x2f; + mdev_state->vconfig[0x6b] = 0x55; + mdev_state->vconfig[0x6c] = 0x41; + mdev_state->vconfig[0x6d] = 0x52; + mdev_state->vconfig[0x6e] = 0x54; +} + +static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset, + char *buf, u32 count) +{ + u32 cfg_addr, bar_mask, bar_index = 0; + + switch (offset) { + case 0x04: /* device control */ + case 0x06: /* device status */ + /* do nothing */ + break; + case 0x3c: /* interrupt line */ + mdev_state->vconfig[0x3c] = buf[0]; + break; + case 0x3d: + /* + * Interrupt Pin is hardwired to INTA. + * This field is write protected by hardware + */ + break; + case 0x10: /* BAR0 */ + case 0x14: /* BAR1 */ + if (offset == 0x10) + bar_index = 0; + else if (offset == 0x14) + bar_index = 1; + + cfg_addr = *(u32 *)buf; + pr_info("BAR%d addr 0x%x\n", bar_index, cfg_addr); + + if (cfg_addr == 0xffffffff) { + bar_mask = mdev_state->bar_mask[bar_index]; + cfg_addr = (cfg_addr & bar_mask); + } + + cfg_addr |= (mdev_state->vconfig[offset] & 0x3ul); + STORE_LE32(&mdev_state->vconfig[offset], cfg_addr); + break; + case 0x18: /* BAR2 */ + case 0x1c: /* BAR3 */ + case 0x20: /* BAR4 */ + STORE_LE32(&mdev_state->vconfig[offset], 0); + break; + default: + pr_info("PCI config write @0x%x of %d bytes not handled \n", + offset, count); + break; + } +} + +static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, + u16 offset, char *buf, u32 count) +{ + u8 data = *buf; + + /* Handle data written by guest */ + switch (offset) { + case UART_TX: + /* if DLAB set, data is LSB of divisor */ + if (mdev_state->s[index].dlab) { + mdev_state->s[index].divisor |= data; + break; + } + + mutex_lock(&mdev_state->rxtx_lock); + + /* save in TX buffer */ + if (mdev_state->s[index].rxtx.count < mdev_state->s[index].max_fifo_size) { + mdev_state->s[index].rxtx.fifo[mdev_state->s[index].rxtx.head] = data; + mdev_state->s[index].rxtx.count++; + CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.head); + mdev_state->s[index].overrun = false; + + /* trigger interrupt if receive data interrupt is enabled and fifo + reached trigger level */ + if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_RDI) && + (mdev_state->s[index].rxtx.count == mdev_state->s[index].intr_trigger_level)) { + /* trigger interrupt */ +#if defined(DEBUG_INTR) + pr_err("Serial port %d: Fifo level trigger\n", index); +#endif + mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid); + } + } else { +#if defined(DEBUG_INTR) + pr_err("Serial port %d: Buffer Overflow\n", index); +#endif + mdev_state->s[index].overrun = true; + + /* trigger interrupt if receiver line status interrupt is enabled */ + if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_RLSI) + mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid); + } + + mutex_unlock(&mdev_state->rxtx_lock); + break; + + case UART_IER: + /* if DLAB set, data is MSB of divisor */ + if (mdev_state->s[index].dlab) + mdev_state->s[index].divisor |= (u16)data << 8; + else { + mdev_state->s[index].uart_reg[offset] = data; + + mutex_lock(&mdev_state->rxtx_lock); + if ((data & UART_IER_THRI) && + (mdev_state->s[index].rxtx.head == mdev_state->s[index].rxtx.tail)) { +#if defined(DEBUG_INTR) + pr_err("Serial port %d: IER_THRI write\n", index); +#endif + mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid); + } + + mutex_unlock(&mdev_state->rxtx_lock); + } + + break; + + case UART_FCR: + mdev_state->s[index].fcr = data; + + mutex_lock(&mdev_state->rxtx_lock); + if (data & (UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT)) { + /* clear loop back FIFO */ + mdev_state->s[index].rxtx.count = 0; + mdev_state->s[index].rxtx.head = 0; + mdev_state->s[index].rxtx.tail = 0; + } + mutex_unlock(&mdev_state->rxtx_lock); + + switch (data & UART_FCR_TRIGGER_MASK) { + case UART_FCR_TRIGGER_1: + mdev_state->s[index].intr_trigger_level = 1; + break; + + case UART_FCR_TRIGGER_4: + mdev_state->s[index].intr_trigger_level = 4; + break; + + case UART_FCR_TRIGGER_8: + mdev_state->s[index].intr_trigger_level = 8; + break; + + case UART_FCR_TRIGGER_14: + mdev_state->s[index].intr_trigger_level = 14; + break; + } + + /* Set trigger level to 1 otherwise or implement timer with timeout of + * 4 characters and on expiring that timer set Recevice data timeout in + * IIR register */ + mdev_state->s[index].intr_trigger_level = 1; + if (data & UART_FCR_ENABLE_FIFO) + mdev_state->s[index].max_fifo_size = MAX_FIFO_SIZE; + else { + mdev_state->s[index].max_fifo_size = 1; + mdev_state->s[index].intr_trigger_level = 1; + } + + break; + + case UART_LCR: + if (data & UART_LCR_DLAB) { + mdev_state->s[index].dlab = true; + mdev_state->s[index].divisor = 0; + } else + mdev_state->s[index].dlab = false; + + mdev_state->s[index].uart_reg[offset] = data; + break; + + case UART_MCR: + mdev_state->s[index].uart_reg[offset] = data; + + if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) && + (data & UART_MCR_OUT2)) { +#if defined(DEBUG_INTR) + pr_err("Serial port %d: MCR_OUT2 write\n", index); +#endif + mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid); + } + + if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) && + (data & (UART_MCR_RTS | UART_MCR_DTR))) { +#if defined(DEBUG_INTR) + pr_err("Serial port %d: MCR RTS/DTR write\n", index); +#endif + mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid); + } + break; + + case UART_LSR: + case UART_MSR: + /* do nothing */ + break; + + case UART_SCR: + mdev_state->s[index].uart_reg[offset] = data; + break; + + default: + break; + } +} + +static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state, + u16 offset, char *buf, u32 count) +{ + /* Handle read requests by guest */ + switch (offset) { + case UART_RX: + /* if DLAB set, data is LSB of divisor */ + if (mdev_state->s[index].dlab) { + *buf = (u8)mdev_state->s[index].divisor; + break; + } + + mutex_lock(&mdev_state->rxtx_lock); + /* return data in tx buffer */ + if (mdev_state->s[index].rxtx.head != mdev_state->s[index].rxtx.tail) { + *buf = mdev_state->s[index].rxtx.fifo[mdev_state->s[index].rxtx.tail]; + mdev_state->s[index].rxtx.count--; + CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.tail); + } + + if (mdev_state->s[index].rxtx.head == mdev_state->s[index].rxtx.tail) { + /* trigger interrupt if tx buffer empty interrupt is enabled and + * fifo is empty */ +#if defined(DEBUG_INTR) + pr_err("Serial port %d: Buffer Empty\n", index); +#endif + if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_THRI) + mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid); + + } + mutex_unlock(&mdev_state->rxtx_lock); + + break; + + case UART_IER: + if (mdev_state->s[index].dlab) { + *buf = (u8)(mdev_state->s[index].divisor >> 8); + break; + } + *buf = mdev_state->s[index].uart_reg[offset] & 0x0f; + break; + + case UART_IIR: + { + u8 ier = mdev_state->s[index].uart_reg[UART_IER]; + *buf = 0; + + mutex_lock(&mdev_state->rxtx_lock); + /* Interrupt priority 1: Parity, overrun, or framing error or break + * interrupt */ + if ((ier & UART_IER_RLSI) && mdev_state->s[index].overrun) + *buf |= UART_IIR_RLSI; + + /* Interrupt priority 2: Fifo trigger level reached */ + if ((ier & UART_IER_RDI) && + (mdev_state->s[index].rxtx.count == mdev_state->s[index].intr_trigger_level)) + *buf |= UART_IIR_RDI; + + /* Interrupt priotiry 3: transmitter holding register empty */ + if ((ier & UART_IER_THRI) && + (mdev_state->s[index].rxtx.head == mdev_state->s[index].rxtx.tail)) + *buf |= UART_IIR_THRI; + + /* Interrupt priotiry 4: Modem status: CTS, DSR, RI or DCD */ + if ((ier & UART_IER_MSI) && + (mdev_state->s[index].uart_reg[UART_MCR] & (UART_MCR_RTS | UART_MCR_DTR))) + *buf |= UART_IIR_MSI; + + /* bit0: 0 indicate interrupt pending, 1 indicate no interrupt is pending */ + if (*buf == 0) + *buf = UART_IIR_NO_INT; + + /* set bit 6 & 7 to be 16550 compatible */ + *buf |= 0xC0; + mutex_unlock(&mdev_state->rxtx_lock); + } + + break; + + case UART_LCR: + case UART_MCR: + *buf = mdev_state->s[index].uart_reg[offset]; + break; + + case UART_LSR: + { + u8 lsr = 0; + + mutex_lock(&mdev_state->rxtx_lock); + /* atleast one char in FIFO */ + if (mdev_state->s[index].rxtx.head != mdev_state->s[index].rxtx.tail) + lsr |= UART_LSR_DR; + + /* if FIFO overrun */ + if (mdev_state->s[index].overrun) + lsr |= UART_LSR_OE; + + /* transmit FIFO empty and tramsitter empty */ + if (mdev_state->s[index].rxtx.head == mdev_state->s[index].rxtx.tail) + lsr |= UART_LSR_TEMT | UART_LSR_THRE; + + mutex_unlock(&mdev_state->rxtx_lock); + *buf = lsr; + break; + } + case UART_MSR: + *buf = UART_MSR_DSR | UART_MSR_DDSR | UART_MSR_DCD; + + mutex_lock(&mdev_state->rxtx_lock); + /* if AFE is 1 and FIFO have space, set CTS bit */ + if (mdev_state->s[index].uart_reg[UART_MCR] & UART_MCR_AFE) { + if (mdev_state->s[index].rxtx.count < mdev_state->s[index].max_fifo_size) + *buf |= UART_MSR_CTS | UART_MSR_DCTS; + } else + *buf |= UART_MSR_CTS | UART_MSR_DCTS; + mutex_unlock(&mdev_state->rxtx_lock); + + break; + + case UART_SCR: + *buf = mdev_state->s[index].uart_reg[offset]; + break; + + default: + break; + } +} + +static void mdev_read_base(struct mdev_state *mdev_state) +{ + int index, pos; + u32 start_lo, start_hi; + u32 mem_type; + + pos = PCI_BASE_ADDRESS_0; + + for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++) { + + if (!mdev_state->region_info[index].size) + continue; + + start_lo = (*(u32 *)(mdev_state->vconfig + pos)) & + PCI_BASE_ADDRESS_MEM_MASK; + mem_type = (*(u32 *)(mdev_state->vconfig + pos)) & + PCI_BASE_ADDRESS_MEM_TYPE_MASK; + + switch (mem_type) { + case PCI_BASE_ADDRESS_MEM_TYPE_64: + start_hi = (*(u32 *)(mdev_state->vconfig + pos + 4)); + pos += 4; + break; + case PCI_BASE_ADDRESS_MEM_TYPE_32: + case PCI_BASE_ADDRESS_MEM_TYPE_1M: + /* 1M mem BAR treated as 32-bit BAR */ + default: + /* mem unknown type treated as 32-bit BAR */ + start_hi = 0; + break; + } + pos += 4; + mdev_state->region_info[index].start = ((u64)start_hi << 32) | start_lo; + } +} + +static ssize_t mdev_access(struct mdev_device *mdev, char *buf, + size_t count, loff_t pos, bool is_write) +{ + struct mdev_state *mdev_state; + unsigned int index; + loff_t offset; + int ret = 0; + + if (!mdev || !buf) + return -EINVAL; + + mdev_state = mdev_get_drvdata(mdev); + if (!mdev_state) { + pr_err("%s mdev_state not found\n", __func__); + return -EINVAL; + } + + mutex_lock(&mdev_state->ops_lock); + + index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(pos); + offset = pos & MTTY_VFIO_PCI_OFFSET_MASK; + switch (index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + +#if defined(DEBUG) + pr_info("%s: PCI config space %s at offset 0x%llx\n", __func__, + is_write? "write": "read", offset); +#endif + + if (is_write) { + dump_buffer(buf, count); + handle_pci_cfg_write(mdev_state, offset, buf, count); + } + else { + memcpy(buf, (mdev_state->vconfig + offset), count); + dump_buffer(buf, count); + } + + break; + + case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: + if (!mdev_state->region_info[index].start) + mdev_read_base(mdev_state); + + if (is_write) { + dump_buffer(buf, count); + +#if defined(DEBUG_REGS) + pr_info("%s: BAR%d write at offset 0x%llx %s val: 0x%02x dlab: %d \n", + __func__, index, offset, wr_reg[offset], (u8)*buf, + mdev_state->s[index].dlab); +#endif + handle_bar_write(index, mdev_state, offset, buf, count); + } + else { + handle_bar_read(index, mdev_state, offset, buf, count); + dump_buffer(buf, count); + +#if defined(DEBUG_REGS) + pr_info("%s: BAR%d read at offset 0x%llx %s val: 0x%02x dlab: %d \n", + __func__, index, offset, rd_reg[offset], (u8)*buf, + mdev_state->s[index].dlab); +#endif + } + + break; + + default: + ret = -1; + goto accessfailed; + } + + ret = count; + + +accessfailed: + mutex_unlock(&mdev_state->ops_lock); + + return ret; +} + +/* mdev fops */ + +int mtty_supported_config(struct device *dev, char *config) +{ + pr_info("%s \n", __func__); + return 0; +} + +int mtty_create(struct mdev_device *mdev, char *mdev_params) +{ + struct mdev_state *mdev_state; + + if (!mdev) + return -EINVAL; + + mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); + if (mdev_state == NULL) + return -ENOMEM; + + mdev_state->irq_index = -1; + mdev_state->s[0].max_fifo_size = MAX_FIFO_SIZE; + mdev_state->s[1].max_fifo_size = MAX_FIFO_SIZE; + mutex_init(&mdev_state->rxtx_lock); + mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL); + + if (mdev_state->vconfig == NULL) { + pr_err("%s failed to allocate config space\n", __func__); + kfree(mdev_state); + return -ENOMEM; + } + + mutex_init(&mdev_state->ops_lock); + mdev_state->mdev = mdev; + mdev_set_drvdata(mdev, mdev_state); + + mtty_create_config_space(mdev_state); + + mutex_lock(&mdev_list_lock); + list_add(&mdev_state->next, &mdev_devices_list); + mutex_unlock(&mdev_list_lock); + + return 0; +} + +int mtty_destroy(struct mdev_device *mdev) +{ + struct mdev_state *mds, *tmp_mds; + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + int ret = -EINVAL; + + mutex_lock(&mdev_list_lock); + list_for_each_entry_safe(mds, tmp_mds, &mdev_devices_list, next) { + if (mdev_state == mds) { + list_del(&mdev_state->next); + mdev_set_drvdata(mdev, NULL); + kfree(mdev_state->vconfig); + kfree(mdev_state); + ret = 0; + break; + } + } + mutex_unlock(&mdev_list_lock); + + return ret; +} + +int mtty_reset(struct mdev_device *mdev) +{ + struct mdev_state *mdev_state; + + if (!mdev) + return -EINVAL; + + mdev_state = mdev_get_drvdata(mdev); + if (!mdev_state) + return -EINVAL; + + pr_info("%s: called\n", __func__); + + return 0; +} + +int mtty_set_online_status(struct mdev_device *mdev, bool online) +{ + pr_info("%s: called, online: %d\n", __func__, online); + return 0; +} + +int mtty_get_online_status(struct mdev_device *mdev, bool *online) +{ + *online = true; + return 0; +} + +ssize_t mtty_read(struct mdev_device *mdev, char *buf, + size_t count, loff_t pos) +{ + return mdev_access(mdev, buf, count, pos, false); +} + +ssize_t mtty_write(struct mdev_device *mdev, char *buf, + size_t count, loff_t pos) +{ + return mdev_access(mdev, buf, count, pos, true); +} + +static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags, + unsigned index, unsigned start, unsigned count, + void *data) +{ + int ret = 0; + struct mdev_state *mdev_state; + + if (!mdev) + return -EINVAL; + + mdev_state = mdev_get_drvdata(mdev); + if (!mdev_state) + return -EINVAL; + + mutex_lock(&mdev_state->ops_lock); + switch (index) { + case VFIO_PCI_INTX_IRQ_INDEX: + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_MASK: + case VFIO_IRQ_SET_ACTION_UNMASK: + break; + case VFIO_IRQ_SET_ACTION_TRIGGER: + { + if (flags & VFIO_IRQ_SET_DATA_NONE) + { + pr_info("%s: disable INTx\n", __func__); + break; + } + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) + { + int fd = *(int *)data; + if (fd > 0) + { + struct fd irqfd; + + irqfd = fdget(fd); + if (!irqfd.file) { + ret = -EBADF; + break; + } + + mdev_state->intx_file = irqfd.file; + fdput(irqfd); + + mdev_state->irq_fd = fd; + mdev_state->irq_index = index; + break; + } + } + break; + } + } + break; + case VFIO_PCI_MSI_IRQ_INDEX: + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_MASK: + case VFIO_IRQ_SET_ACTION_UNMASK: + break; + case VFIO_IRQ_SET_ACTION_TRIGGER: + { + if (flags & VFIO_IRQ_SET_DATA_NONE) + { + pr_info("%s: disable MSI\n", __func__); + mdev_state->irq_index = VFIO_PCI_INTX_IRQ_INDEX; + break; + } + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) + { + int fd = *(int *)data; + if (fd > 0) + { + if (mdev_state->msi_file == NULL) + { + struct fd irqfd; + + irqfd = fdget(fd); + if (!irqfd.file) { + ret = -EBADF; + break; + } + + mdev_state->msi_file = irqfd.file; + fdput(irqfd); + + mdev_state->irq_fd = fd; + mdev_state->irq_index = index; + break; + } + } + } + break; + } + } + break; + case VFIO_PCI_MSIX_IRQ_INDEX: + pr_info("%s: MSIX_IRQ \n", __func__); + break; + case VFIO_PCI_ERR_IRQ_INDEX: + pr_info("%s: ERR_IRQ \n", __func__); + break; + case VFIO_PCI_REQ_IRQ_INDEX: + pr_info("%s: REQ_IRQ \n", __func__); + break; + } + + mutex_unlock(&mdev_state->ops_lock); + return ret; +} + +static int mtty_dev_mdev_trigger_interrupt(uuid_le uuid) +{ + mm_segment_t old_fs; + u64 val = 1; + loff_t offset = 0; + int ret = -1; + struct file *pfile = NULL; + struct mdev_state *mdev_state; + + mdev_state = find_mdev_state_by_uuid(uuid); + + if (!mdev_state) { + pr_info("%s: mdev not found\n", __func__); + return -EINVAL; + } + + if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) && + (mdev_state->msi_file == NULL)) + return -EINVAL; + else if ((mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX) && + (mdev_state->intx_file == NULL)) + { + pr_info("%s: Intr file not found\n", __func__); + return -EINVAL; + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + + if (mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) + pfile = mdev_state->msi_file; + else + pfile = mdev_state->intx_file; + + if (pfile && pfile->f_op && pfile->f_op->write) { + ret = pfile->f_op->write(pfile, (char *)&val, sizeof(val), &offset); +#if defined(DEBUG_INTR) + pr_info("Intx triggered \n"); +#endif + } + else + pr_err("%s: pfile not valid, intr_type = %d\n", __func__, + mdev_state->irq_index); + + set_fs(old_fs); + + if (ret < 0) + pr_err("%s: eventfd write failed (%d)\n", __func__, ret); + + return ret; +} + +int mtty_get_region_info(struct mdev_device *mdev, + struct vfio_region_info *region_info, + u16 *cap_type_id, void **cap_type) +{ + unsigned int size = 0; + struct mdev_state *mdev_state; + int bar_index; + + if (!mdev) + return -EINVAL; + + mdev_state = mdev_get_drvdata(mdev); + if (!mdev_state) + return -EINVAL; + + mutex_lock(&mdev_state->ops_lock); + bar_index = region_info->index; + + switch (bar_index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + size = MTTY_CONFIG_SPACE_SIZE; + break; + case VFIO_PCI_BAR0_REGION_INDEX: + size = MTTY_IO_BAR_SIZE; + break; + case VFIO_PCI_BAR1_REGION_INDEX: + size = MTTY_IO_BAR_SIZE; + break; + default: + size = 0; + break; + } + + mdev_state->region_info[bar_index].size = size; + mdev_state->region_info[bar_index].vfio_offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index); + + region_info->size = size; + region_info->offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index); + region_info->flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + mutex_unlock(&mdev_state->ops_lock); + return 0; +} + +int mtty_validate_map_request(struct mdev_device *mdev, loff_t pos, + u64 *virtaddr, unsigned long *pfn, + unsigned long *size, pgprot_t *prot) +{ + *size = PAGE_SIZE; + *prot = pgprot_noncached(*prot); + return 0; +} + +int mtty_get_irq_info(struct mdev_device *mdev, + struct vfio_irq_info *irq_info) +{ + switch (irq_info->index) { + case VFIO_PCI_INTX_IRQ_INDEX: + case VFIO_PCI_MSI_IRQ_INDEX: + case VFIO_PCI_REQ_IRQ_INDEX: + break; + + default: + return -EINVAL; + } + + irq_info->flags = VFIO_IRQ_INFO_EVENTFD; + irq_info->count = 1; + + if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX) + irq_info->flags |= (VFIO_IRQ_INFO_MASKABLE | + VFIO_IRQ_INFO_AUTOMASKED); + else + irq_info->flags |= VFIO_IRQ_INFO_NORESIZE; + + return 0; +} + +int mtty_get_device_info(struct mdev_device *mdev, + struct vfio_device_info *dev_info) +{ + dev_info->flags = VFIO_DEVICE_FLAGS_PCI; + dev_info->num_regions = VFIO_PCI_NUM_REGIONS; + dev_info->num_irqs = VFIO_PCI_NUM_IRQS; + + return 0; +} + +static ssize_t +sample_mtty_dev_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "This is phy device \n"); +} + +static DEVICE_ATTR_RO(sample_mtty_dev); + +static struct attribute *mtty_dev_attrs[] = { + &dev_attr_sample_mtty_dev.attr, + NULL, +}; + +static const struct attribute_group mtty_dev_group = { + .name = "mtty_dev", + .attrs = mtty_dev_attrs, +}; + +const struct attribute_group *mtty_dev_groups[] = { + &mtty_dev_group, + NULL, +}; + +static ssize_t +sample_mdev_dev_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct mdev_device *mdev = to_mdev_device(dev); + + if (mdev) + return sprintf(buf, "This is MDEV %s\n", dev_name(&mdev->dev)); + + return sprintf(buf, " \n"); +} + +static DEVICE_ATTR_RO(sample_mdev_dev); + +static struct attribute *mdev_dev_attrs[] = { + &dev_attr_sample_mdev_dev.attr, + NULL, +}; + +static const struct attribute_group mdev_dev_group = { + .name = "vendor", + .attrs = mdev_dev_attrs, +}; + +const struct attribute_group *mdev_dev_groups[] = { + &mdev_dev_group, + NULL, +}; + +struct parent_ops mdev_fops = { + .owner = THIS_MODULE, + .dev_attr_groups = mtty_dev_groups, + .mdev_attr_groups = mdev_dev_groups, + .supported_config = mtty_supported_config, + .create = mtty_create, + .destroy = mtty_destroy, + .reset = mtty_reset, + .set_online_status = mtty_set_online_status, + .get_online_status = mtty_get_online_status, + .read = mtty_read, + .write = mtty_write, + .get_irq_info = mtty_get_irq_info, + .set_irqs = mtty_set_irqs, + .get_region_info = mtty_get_region_info, + .get_device_info = mtty_get_device_info, +}; + +static void mtty_device_release(struct device *dev) +{ + dev_dbg(dev, "mtty: released\n"); +} + +static int __init mtty_dev_init(void) +{ + int ret = 0; + + pr_info("mtty_dev: %s\n", __FUNCTION__); + + memset(&mtty_dev, 0 , sizeof(mtty_dev)); + + idr_init(&mtty_dev.vd_idr); + + ret = alloc_chrdev_region(&mtty_dev.vd_devt, 0, MINORMASK, MTTY_NAME); + + if (ret < 0) { + pr_err("Error: failed to register mtty_dev, err:%d\n", ret); + return ret; + } + + cdev_init(&mtty_dev.vd_cdev, &vd_fops); + cdev_add(&mtty_dev.vd_cdev, mtty_dev.vd_devt, MINORMASK); + + pr_info("major_number:%d \n", MAJOR(mtty_dev.vd_devt)); + + mtty_dev.vd_class = class_create(THIS_MODULE, MTTY_CLASS_NAME); + + if (IS_ERR(mtty_dev.vd_class)) { + printk(KERN_ERR "Error: failed to register mtty_dev class\n"); + goto failed1; + } + + mtty_dev.dev.release = mtty_device_release; + dev_set_name(&mtty_dev.dev, "%s", MTTY_NAME); + + ret = device_register(&mtty_dev.dev); + if (ret) + goto failed2; + + if (mdev_register_device(&mtty_dev.dev, &mdev_fops) != 0) + goto failed3; + + mutex_init(&mdev_list_lock); + INIT_LIST_HEAD(&mdev_devices_list); + + goto all_done; + +failed3: + + device_unregister(&mtty_dev.dev); +failed2: + class_destroy(mtty_dev.vd_class); + +failed1: + cdev_del(&mtty_dev.vd_cdev); + unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK); + +all_done: + return ret; +} + +static void __exit mtty_dev_exit(void) +{ + mtty_dev.dev.bus = NULL; + mdev_unregister_device(&mtty_dev.dev); + + device_unregister(&mtty_dev.dev); + idr_destroy(&mtty_dev.vd_idr); + cdev_del(&mtty_dev.vd_cdev); + unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK); + class_destroy(mtty_dev.vd_class); + mtty_dev.vd_class = NULL; + printk(KERN_INFO "mtty_dev: Unloaded!\n"); +} + +module_init(mtty_dev_init) +module_exit(mtty_dev_exit) + +MODULE_LICENSE("GPL"); +MODULE_INFO(supported, "Test driver that simulate serial port over PCI"); +MODULE_VERSION(VERSION_STRING); +MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/mdev/vfio-mediated-device.txt similarity index 78% rename from Documentation/vfio-mediated-device.txt rename to Documentation/mdev/vfio-mediated-device.txt index 237d8eb630b7..c67576f1da29 100644 --- a/Documentation/vfio-mediated-device.txt +++ b/Documentation/mdev/vfio-mediated-device.txt @@ -193,6 +193,67 @@ supported in TYPE1 IOMMU module. To enable the same for other IOMMU backend modules, such as PPC64 sPAPR module, they need to provide these two callback functions. +Sample code +------------------------------------------------------------------------------ +mtty.c in this folder is a sample code to demonstrate how to use mediated device +framework. + +Sample driver creates mdev device that simulates serial port over PCI card. + +Build and load mtty.ko module. This creates a dummy device, /sys/devices/mtty +Files in this device directory in sysfs looks like: + +# ls /sys/devices/mtty/ -l +total 0 +--w------- 1 root root 4096 Sep 29 12:18 mdev_create +--w------- 1 root root 4096 Sep 29 12:18 mdev_destroy +-r--r--r-- 1 root root 4096 Sep 29 12:34 mdev_supported_types +drwxr-xr-x 2 root root 0 Sep 29 12:34 mtty_dev +drwxr-xr-x 2 root root 0 Sep 29 12:34 power +-rw-r--r-- 1 root root 4096 Sep 29 12:34 uevent + +Create mediated device using this device: +# echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" > /sys/devices/mtty/mdev_create + +Add parameters to qemu-kvm: +-device vfio-pci,\ + sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 + +Boot the VM. In Linux guest (no hardware in host), device is seen as below: + +# lspci -s 00:05.0 -xxvv +00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550]) + Subsystem: Device 4348:3253 + Physical Slot: 5 + Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- +Stepping- SERR- FastB2B- DisINTx- + Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- +<TAbort- <MAbort- >SERR- <PERR- INTx- + Interrupt: pin A routed to IRQ 10 + Region 0: I/O ports at c150 [size=8] + Region 1: I/O ports at c158 [size=8] + Kernel driver in use: serial +00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00 +10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00 +20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32 +30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00 + +In guest dmesg: +serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10 +0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A +0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A + +Check the serial ports in guest: +# setserial -g /dev/ttyS* +/dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4 +/dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10 +/dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10 + +Using minicom or any terminal enulation program, open port /dev/ttyS1 or +/dev/ttyS2 with hardware flow control disabled. Type data on minicom terminal or +send data to terminal emulation program and read tha data. Data is loop backed +from hosts mtty driver. + References ------------------------------------------------------------------------------- -- 2.7.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html