[PATCH 1/1] Add simple sample driver for mediated device framework

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sample driver creates mdev device that simulates serial port over PCI card.

Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx>
Signed-off-by: Neo Jia <cjia@xxxxxxxxxx>
Change-Id: I857f8f12f8b275f2498dfe8c628a5cdc7193b1b2
---
 Documentation/mdev/Makefile                       |   14 +
 Documentation/mdev/mtty.c                         | 1202 +++++++++++++++++++++
 Documentation/{ => mdev}/vfio-mediated-device.txt |   61 ++
 3 files changed, 1277 insertions(+)
 create mode 100644 Documentation/mdev/Makefile
 create mode 100644 Documentation/mdev/mtty.c
 rename Documentation/{ => mdev}/vfio-mediated-device.txt (78%)

diff --git a/Documentation/mdev/Makefile b/Documentation/mdev/Makefile
new file mode 100644
index 000000000000..ff6f8a324c85
--- /dev/null
+++ b/Documentation/mdev/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for mtty.c file
+#
+KDIR:=/lib/modules/$(shell uname -r)/build
+
+obj-m:=mtty.o
+
+default:
+	$(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules
+
+clean:
+	@rm -rf .*.cmd *.mod.c *.o *.ko .tmp*
+	@rm -rf Module.* Modules.* modules.* .tmp_versions
+
diff --git a/Documentation/mdev/mtty.c b/Documentation/mdev/mtty.c
new file mode 100644
index 000000000000..ce29d54b4275
--- /dev/null
+++ b/Documentation/mdev/mtty.c
@@ -0,0 +1,1202 @@
+/*
+ * Mediated virtual PCI serial host device driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <cjia@xxxxxxxxxx>
+ *             Kirti Wankhede <kwankhede@xxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Sample driver that creates mdev device that simulates serial port over PCI
+ * card.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/uuid.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/file.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <linux/serial.h>
+#include <uapi/linux/serial_reg.h>
+/*
+ * #defines
+ */
+
+#define VERSION_STRING  "0.1"
+#define DRIVER_AUTHOR   "NVIDIA Corporation"
+
+#define MTTY_CLASS_NAME "mtty"
+
+#define MTTY_NAME       "mtty"
+
+#define MTTY_CONFIG_SPACE_SIZE  0xff
+#define MTTY_IO_BAR_SIZE        0x8
+#define MTTY_MMIO_BAR_SIZE      0x100000
+
+#define STORE_LE16(addr, val)   (*(u16 *)addr = val)
+#define STORE_LE32(addr, val)   (*(u32 *)addr = val)
+
+#define MAX_FIFO_SIZE   16
+
+#define CIRCULAR_BUF_INC_IDX(idx)    (idx = (idx + 1) & (MAX_FIFO_SIZE - 1))
+
+#define MTTY_VFIO_PCI_OFFSET_SHIFT   40
+
+#define MTTY_VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> MTTY_VFIO_PCI_OFFSET_SHIFT)
+#define MTTY_VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << MTTY_VFIO_PCI_OFFSET_SHIFT)
+#define MTTY_VFIO_PCI_OFFSET_MASK    (((u64)(1) << MTTY_VFIO_PCI_OFFSET_SHIFT) - 1)
+
+
+/*
+ * Global Structures
+ */
+
+struct mtty_dev {
+    dev_t               vd_devt;
+    struct class        *vd_class;
+    struct cdev         vd_cdev;
+    struct idr          vd_idr;
+    struct device       dev;
+} mtty_dev;
+
+struct mdev_region_info {
+    u64 start;
+    u64 phys_start;
+    u32 size;
+    u64 vfio_offset;
+};
+
+#if defined(DEBUG_REGS)
+const char *wr_reg[] = {
+    "TX",
+    "IER",
+    "FCR",
+    "LCR",
+    "MCR",
+    "LSR",
+    "MSR",
+    "SCR"
+};
+
+const char *rd_reg[] = {
+    "RX",
+    "IER",
+    "IIR",
+    "LCR",
+    "MCR",
+    "LSR",
+    "MSR",
+    "SCR"
+};
+#endif
+
+// loop back buffer
+struct rxtx {
+    u8 fifo[MAX_FIFO_SIZE];
+    u8 head, tail;
+    u8 count;
+};
+
+struct serial_port {
+    u8 uart_reg[8];         /* 8 registers */
+    struct rxtx rxtx;       /* loop back buffer */
+    bool dlab;
+    bool overrun;
+    u16 divisor;
+    u8 fcr;                 /* FIFO control register */
+    u8 max_fifo_size;
+    u8 intr_trigger_level;  /* interrupt trigger level */
+};
+
+/* State of each mdev device */
+struct mdev_state {
+    int irq_fd;
+    struct file *intx_file;
+    struct file *msi_file;
+    int irq_index;
+    u8 *vconfig;
+    struct mutex ops_lock;
+    struct mdev_device *mdev;
+    struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];
+    u32 bar_mask[VFIO_PCI_NUM_REGIONS];
+    struct list_head next;
+    struct serial_port s[2];
+    struct mutex rxtx_lock;
+};
+
+struct mutex mdev_list_lock;
+struct list_head mdev_devices_list;
+
+static struct file_operations vd_fops = {
+    .owner          = THIS_MODULE,
+};
+
+/* function prototypes */
+
+static int mtty_dev_mdev_trigger_interrupt(uuid_le uuid);
+
+/* Helper functions */
+static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid)
+{
+    struct mdev_state *mds;
+
+    list_for_each_entry(mds, &mdev_devices_list, next) {
+        if (uuid_le_cmp(mds->mdev->uuid, uuid) == 0)
+            return mds;
+    }
+
+    return NULL;
+}
+
+void dump_buffer(char *buf, uint32_t count)
+{
+#if defined(DEBUG)
+    int i;
+
+    pr_info("Buffer: \n");
+    for (i = 0; i < count; i++) {
+        printk(KERN_INFO "%2x ", *(buf + i));
+        if ((i + 1) % 16 == 0)
+            pr_info("\n");
+    }
+#endif
+}
+
+static void mtty_create_config_space(struct mdev_state *mdev_state)
+{
+    /* PCI dev ID */
+    STORE_LE32((u32 *) &mdev_state->vconfig[0x0], 0x32534348);
+
+    /* Control: I/O+, Mem-, BusMaster- */
+    STORE_LE16((u16 *) &mdev_state->vconfig[0x4], 0x0001);
+
+    /* Status: capabilities list absent */
+    STORE_LE16((u16 *) &mdev_state->vconfig[0x6], 0x0200);
+
+    /* Rev ID */
+    mdev_state->vconfig[0x8] =  0x10;
+
+    /* programming interface class : 16550-compatible serial controller */
+    mdev_state->vconfig[0x9] =  0x02;
+
+    /* Sub class : 00 */
+    mdev_state->vconfig[0xa] =  0x00;
+
+    /* Base class : Simple Communication controllers */
+    mdev_state->vconfig[0xb] =  0x07;
+
+    /* base address registers */
+    /* BAR0: IO space */
+    STORE_LE32((u32 *) &mdev_state->vconfig[0x10], 0x000001);
+    mdev_state->bar_mask[0] = ~(MTTY_IO_BAR_SIZE) + 1;
+
+    /* BAR1: IO space */
+    STORE_LE32((u32 *) &mdev_state->vconfig[0x14], 0x000001);
+    mdev_state->bar_mask[1] = ~(MTTY_IO_BAR_SIZE) + 1;
+
+    /* Subsystem ID */
+    STORE_LE32((u32 *) &mdev_state->vconfig[0x2c], 0x32534348);
+
+    mdev_state->vconfig[0x34] =  0x00;   /* Cap Ptr */
+    mdev_state->vconfig[0x3d] =  0x01;   /* interrupt pin (INTA#) */
+
+    /* Vendor specific data */
+    mdev_state->vconfig[0x40] =  0x23;
+    mdev_state->vconfig[0x43] =  0x80;
+    mdev_state->vconfig[0x44] =  0x23;
+    mdev_state->vconfig[0x48] =  0x23;
+    mdev_state->vconfig[0x4c] =  0x23;
+
+    mdev_state->vconfig[0x60] =  0x50;
+    mdev_state->vconfig[0x61] =  0x43;
+    mdev_state->vconfig[0x62] =  0x49;
+    mdev_state->vconfig[0x63] =  0x20;
+    mdev_state->vconfig[0x64] =  0x53;
+    mdev_state->vconfig[0x65] =  0x65;
+    mdev_state->vconfig[0x66] =  0x72;
+    mdev_state->vconfig[0x67] =  0x69;
+    mdev_state->vconfig[0x68] =  0x61;
+    mdev_state->vconfig[0x69] =  0x6c;
+    mdev_state->vconfig[0x6a] =  0x2f;
+    mdev_state->vconfig[0x6b] =  0x55;
+    mdev_state->vconfig[0x6c] =  0x41;
+    mdev_state->vconfig[0x6d] =  0x52;
+    mdev_state->vconfig[0x6e] =  0x54;
+}
+
+static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
+                                 char *buf, u32 count)
+{
+    u32 cfg_addr, bar_mask, bar_index = 0;
+
+    switch (offset) {
+    case 0x04: /* device control */
+    case 0x06: /* device status */
+            /* do nothing */
+        break;
+    case 0x3c:  /* interrupt line */
+        mdev_state->vconfig[0x3c] = buf[0];
+        break;
+    case 0x3d:
+        /*
+         * Interrupt Pin is hardwired to INTA.
+         * This field is write protected by hardware
+         */
+        break;
+    case 0x10:  /* BAR0 */
+    case 0x14:  /* BAR1 */
+        if (offset == 0x10)
+            bar_index = 0;
+        else if (offset == 0x14)
+            bar_index = 1;
+
+        cfg_addr = *(u32 *)buf;
+        pr_info("BAR%d addr 0x%x\n", bar_index, cfg_addr);
+
+        if (cfg_addr == 0xffffffff) {
+            bar_mask = mdev_state->bar_mask[bar_index];
+            cfg_addr = (cfg_addr & bar_mask);
+        }
+
+        cfg_addr |= (mdev_state->vconfig[offset] & 0x3ul);
+        STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
+        break;
+    case 0x18:  /* BAR2 */
+    case 0x1c:  /* BAR3 */
+    case 0x20:  /* BAR4 */
+        STORE_LE32(&mdev_state->vconfig[offset], 0);
+        break;
+    default:
+        pr_info("PCI config write @0x%x of %d bytes not handled \n",
+                 offset, count);
+        break;
+    }
+}
+
+static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
+                             u16 offset, char *buf, u32 count)
+{
+    u8 data = *buf;
+
+    /* Handle data written by guest */
+    switch (offset) {
+    case UART_TX:
+        /* if DLAB set, data is LSB of divisor */
+        if (mdev_state->s[index].dlab) {
+            mdev_state->s[index].divisor |= data;
+            break;
+        }
+
+        mutex_lock(&mdev_state->rxtx_lock);
+
+        /* save in TX buffer */
+        if (mdev_state->s[index].rxtx.count < mdev_state->s[index].max_fifo_size) {
+            mdev_state->s[index].rxtx.fifo[mdev_state->s[index].rxtx.head] = data;
+            mdev_state->s[index].rxtx.count++;
+            CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.head);
+            mdev_state->s[index].overrun = false;
+
+            /* trigger interrupt if receive data interrupt is enabled and fifo
+               reached trigger level */
+            if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_RDI) &&
+                (mdev_state->s[index].rxtx.count == mdev_state->s[index].intr_trigger_level)) {
+                /* trigger interrupt */
+#if defined(DEBUG_INTR)
+                pr_err("Serial port %d: Fifo level trigger\n", index);
+#endif
+                mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid);
+            }
+        } else {
+#if defined(DEBUG_INTR)
+            pr_err("Serial port %d: Buffer Overflow\n", index);
+#endif
+            mdev_state->s[index].overrun = true;
+
+            /* trigger interrupt if receiver line status interrupt is enabled */
+            if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_RLSI)
+                mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid);
+        }
+
+        mutex_unlock(&mdev_state->rxtx_lock);
+        break;
+
+    case UART_IER:
+        /* if DLAB set, data is MSB of divisor */
+        if (mdev_state->s[index].dlab)
+            mdev_state->s[index].divisor |= (u16)data << 8;
+        else {
+            mdev_state->s[index].uart_reg[offset] = data;
+
+            mutex_lock(&mdev_state->rxtx_lock);
+            if ((data & UART_IER_THRI) &&
+               (mdev_state->s[index].rxtx.head == mdev_state->s[index].rxtx.tail)) {
+#if defined(DEBUG_INTR)
+                pr_err("Serial port %d: IER_THRI write\n", index);
+#endif
+                mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid);
+            }
+
+            mutex_unlock(&mdev_state->rxtx_lock);
+        }
+
+        break;
+
+    case UART_FCR:
+        mdev_state->s[index].fcr = data;
+
+        mutex_lock(&mdev_state->rxtx_lock);
+        if (data & (UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT)) {
+            /* clear loop back FIFO */
+            mdev_state->s[index].rxtx.count = 0;
+            mdev_state->s[index].rxtx.head = 0;
+            mdev_state->s[index].rxtx.tail = 0;
+        }
+        mutex_unlock(&mdev_state->rxtx_lock);
+
+        switch (data & UART_FCR_TRIGGER_MASK) {
+        case UART_FCR_TRIGGER_1:
+            mdev_state->s[index].intr_trigger_level = 1;
+            break;
+
+        case UART_FCR_TRIGGER_4:
+            mdev_state->s[index].intr_trigger_level = 4;
+            break;
+
+        case UART_FCR_TRIGGER_8:
+            mdev_state->s[index].intr_trigger_level = 8;
+            break;
+
+        case UART_FCR_TRIGGER_14:
+            mdev_state->s[index].intr_trigger_level = 14;
+            break;
+        }
+
+        /* Set trigger level to 1 otherwise or  implement timer with timeout of
+         * 4 characters and on expiring that timer set Recevice data timeout in
+         * IIR register */
+        mdev_state->s[index].intr_trigger_level = 1;
+        if (data & UART_FCR_ENABLE_FIFO)
+            mdev_state->s[index].max_fifo_size = MAX_FIFO_SIZE;
+        else {
+            mdev_state->s[index].max_fifo_size = 1;
+            mdev_state->s[index].intr_trigger_level = 1;
+        }
+
+        break;
+
+    case UART_LCR:
+        if (data & UART_LCR_DLAB) {
+            mdev_state->s[index].dlab = true;
+            mdev_state->s[index].divisor = 0;
+        } else
+            mdev_state->s[index].dlab = false;
+
+        mdev_state->s[index].uart_reg[offset] = data;
+        break;
+
+    case UART_MCR:
+        mdev_state->s[index].uart_reg[offset] = data;
+
+        if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
+            (data & UART_MCR_OUT2)) {
+#if defined(DEBUG_INTR)
+            pr_err("Serial port %d: MCR_OUT2 write\n", index);
+#endif
+            mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid);
+        }
+
+        if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
+            (data & (UART_MCR_RTS | UART_MCR_DTR))) {
+#if defined(DEBUG_INTR)
+            pr_err("Serial port %d: MCR RTS/DTR write\n", index);
+#endif
+            mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid);
+        }
+        break;
+
+    case UART_LSR:
+    case UART_MSR:
+        /* do nothing */
+        break;
+
+    case UART_SCR:
+        mdev_state->s[index].uart_reg[offset] = data;
+        break;
+
+    default:
+        break;
+    }
+}
+
+static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
+                             u16 offset, char *buf, u32 count)
+{
+    /* Handle read requests by guest */
+    switch (offset) {
+    case UART_RX:
+        /* if DLAB set, data is LSB of divisor */
+        if (mdev_state->s[index].dlab) {
+            *buf  = (u8)mdev_state->s[index].divisor;
+            break;
+        }
+
+        mutex_lock(&mdev_state->rxtx_lock);
+        /* return data in tx buffer */
+        if (mdev_state->s[index].rxtx.head != mdev_state->s[index].rxtx.tail) {
+            *buf = mdev_state->s[index].rxtx.fifo[mdev_state->s[index].rxtx.tail];
+            mdev_state->s[index].rxtx.count--;
+            CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.tail);
+        }
+
+        if (mdev_state->s[index].rxtx.head == mdev_state->s[index].rxtx.tail) {
+            /* trigger interrupt if tx buffer empty interrupt is enabled and
+             * fifo is empty */
+#if defined(DEBUG_INTR)
+            pr_err("Serial port %d: Buffer Empty\n", index);
+#endif
+            if (mdev_state->s[index].uart_reg[UART_IER] & UART_IER_THRI)
+                mtty_dev_mdev_trigger_interrupt(mdev_state->mdev->uuid);
+
+        }
+        mutex_unlock(&mdev_state->rxtx_lock);
+
+        break;
+
+    case UART_IER:
+        if (mdev_state->s[index].dlab) {
+            *buf = (u8)(mdev_state->s[index].divisor >> 8);
+            break;
+        }
+        *buf = mdev_state->s[index].uart_reg[offset] & 0x0f;
+        break;
+
+    case UART_IIR:
+        {
+            u8 ier = mdev_state->s[index].uart_reg[UART_IER];
+            *buf = 0;
+
+            mutex_lock(&mdev_state->rxtx_lock);
+            /* Interrupt priority 1: Parity, overrun, or framing error or break
+             * interrupt */
+            if ((ier & UART_IER_RLSI) && mdev_state->s[index].overrun)
+                *buf |= UART_IIR_RLSI;
+
+            /* Interrupt priority 2: Fifo trigger level reached */
+            if ((ier & UART_IER_RDI) &&
+                (mdev_state->s[index].rxtx.count == mdev_state->s[index].intr_trigger_level))
+                *buf |= UART_IIR_RDI;
+
+            /* Interrupt priotiry 3: transmitter holding register empty */
+            if ((ier & UART_IER_THRI) &&
+                (mdev_state->s[index].rxtx.head == mdev_state->s[index].rxtx.tail))
+                *buf |= UART_IIR_THRI;
+
+            /* Interrupt priotiry 4: Modem status: CTS, DSR, RI or DCD  */
+            if ((ier & UART_IER_MSI) &&
+                (mdev_state->s[index].uart_reg[UART_MCR] & (UART_MCR_RTS | UART_MCR_DTR)))
+                *buf |= UART_IIR_MSI;
+
+            /* bit0: 0 indicate interrupt pending, 1 indicate no interrupt is pending */
+            if (*buf == 0)
+                *buf = UART_IIR_NO_INT;
+
+            /* set bit 6 & 7 to be 16550 compatible */
+            *buf |= 0xC0;
+            mutex_unlock(&mdev_state->rxtx_lock);
+        }
+
+        break;
+
+    case UART_LCR:
+    case UART_MCR:
+        *buf = mdev_state->s[index].uart_reg[offset];
+        break;
+
+    case UART_LSR:
+    {
+        u8 lsr = 0;
+
+        mutex_lock(&mdev_state->rxtx_lock);
+        /* atleast one char in FIFO */
+        if (mdev_state->s[index].rxtx.head != mdev_state->s[index].rxtx.tail)
+            lsr |= UART_LSR_DR;
+
+        /* if FIFO overrun */
+        if (mdev_state->s[index].overrun)
+            lsr |= UART_LSR_OE;
+
+        /* transmit FIFO empty and tramsitter empty */
+        if (mdev_state->s[index].rxtx.head == mdev_state->s[index].rxtx.tail)
+            lsr |= UART_LSR_TEMT | UART_LSR_THRE;
+
+        mutex_unlock(&mdev_state->rxtx_lock);
+        *buf = lsr;
+        break;
+    }
+    case UART_MSR:
+        *buf = UART_MSR_DSR | UART_MSR_DDSR | UART_MSR_DCD;
+
+        mutex_lock(&mdev_state->rxtx_lock);
+        /* if AFE is 1 and FIFO have space, set CTS bit */
+        if (mdev_state->s[index].uart_reg[UART_MCR] & UART_MCR_AFE) {
+            if (mdev_state->s[index].rxtx.count < mdev_state->s[index].max_fifo_size)
+            *buf |= UART_MSR_CTS | UART_MSR_DCTS;
+        } else
+            *buf |= UART_MSR_CTS | UART_MSR_DCTS;
+        mutex_unlock(&mdev_state->rxtx_lock);
+
+        break;
+
+    case UART_SCR:
+        *buf = mdev_state->s[index].uart_reg[offset];
+        break;
+
+    default:
+        break;
+    }
+}
+
+static void mdev_read_base(struct mdev_state *mdev_state)
+{
+    int index, pos;
+    u32 start_lo, start_hi;
+    u32 mem_type;
+
+    pos = PCI_BASE_ADDRESS_0;
+
+    for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++) {
+
+        if (!mdev_state->region_info[index].size)
+            continue;
+
+        start_lo = (*(u32 *)(mdev_state->vconfig + pos)) &
+                    PCI_BASE_ADDRESS_MEM_MASK;
+        mem_type = (*(u32 *)(mdev_state->vconfig + pos)) &
+                    PCI_BASE_ADDRESS_MEM_TYPE_MASK;
+
+        switch (mem_type) {
+        case PCI_BASE_ADDRESS_MEM_TYPE_64:
+            start_hi = (*(u32 *)(mdev_state->vconfig + pos + 4));
+            pos += 4;
+            break;
+        case PCI_BASE_ADDRESS_MEM_TYPE_32:
+        case PCI_BASE_ADDRESS_MEM_TYPE_1M:
+            /* 1M mem BAR treated as 32-bit BAR */
+        default:
+            /* mem unknown type treated as 32-bit BAR */
+            start_hi = 0;
+            break;
+        }
+        pos += 4;
+        mdev_state->region_info[index].start = ((u64)start_hi << 32) | start_lo;
+    }
+}
+
+static ssize_t mdev_access(struct mdev_device *mdev, char *buf,
+                           size_t count, loff_t pos, bool is_write)
+{
+    struct mdev_state *mdev_state;
+    unsigned int index;
+    loff_t offset;
+    int ret = 0;
+
+    if (!mdev || !buf)
+        return -EINVAL;
+
+    mdev_state = mdev_get_drvdata(mdev);
+    if (!mdev_state) {
+        pr_err("%s mdev_state not found\n", __func__);
+        return -EINVAL;
+    }
+
+    mutex_lock(&mdev_state->ops_lock);
+
+    index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(pos);
+    offset = pos & MTTY_VFIO_PCI_OFFSET_MASK;
+    switch (index) {
+        case VFIO_PCI_CONFIG_REGION_INDEX:
+
+#if defined(DEBUG)
+            pr_info("%s: PCI config space %s at offset 0x%llx\n", __func__,
+                    is_write? "write": "read", offset);
+#endif
+
+            if (is_write) {
+                dump_buffer(buf, count);
+                handle_pci_cfg_write(mdev_state, offset, buf, count);
+            }
+            else {
+                memcpy(buf, (mdev_state->vconfig + offset), count);
+                dump_buffer(buf, count);
+            }
+
+            break;
+
+        case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
+            if (!mdev_state->region_info[index].start)
+                 mdev_read_base(mdev_state);
+
+            if (is_write) {
+                dump_buffer(buf, count);
+
+#if defined(DEBUG_REGS)
+                pr_info("%s: BAR%d  write at offset 0x%llx %s val: 0x%02x dlab: %d \n",
+                     __func__, index, offset, wr_reg[offset], (u8)*buf,
+                    mdev_state->s[index].dlab);
+#endif
+                handle_bar_write(index, mdev_state, offset, buf, count);
+            }
+            else {
+                handle_bar_read(index, mdev_state, offset, buf, count);
+                dump_buffer(buf, count);
+
+#if defined(DEBUG_REGS)
+                pr_info("%s: BAR%d  read at offset 0x%llx %s val: 0x%02x dlab: %d \n",
+                     __func__, index, offset, rd_reg[offset], (u8)*buf,
+                    mdev_state->s[index].dlab);
+#endif
+            }
+
+            break;
+
+        default:
+            ret = -1;
+            goto accessfailed;
+    }
+
+    ret = count;
+
+
+accessfailed:
+    mutex_unlock(&mdev_state->ops_lock);
+
+    return ret;
+}
+
+/* mdev fops */
+
+int mtty_supported_config(struct device *dev, char *config)
+{
+    pr_info("%s \n", __func__);
+    return 0;
+}
+
+int mtty_create(struct mdev_device *mdev, char *mdev_params)
+{
+    struct mdev_state *mdev_state;
+
+    if (!mdev)
+        return -EINVAL;
+
+    mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
+    if (mdev_state == NULL)
+        return -ENOMEM;
+
+    mdev_state->irq_index = -1;
+    mdev_state->s[0].max_fifo_size = MAX_FIFO_SIZE;
+    mdev_state->s[1].max_fifo_size = MAX_FIFO_SIZE;
+    mutex_init(&mdev_state->rxtx_lock);
+    mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL);
+
+    if (mdev_state->vconfig == NULL) {
+        pr_err("%s failed to allocate config space\n", __func__);
+        kfree(mdev_state);
+        return -ENOMEM;
+    }
+
+    mutex_init(&mdev_state->ops_lock);
+    mdev_state->mdev = mdev;
+    mdev_set_drvdata(mdev, mdev_state);
+
+    mtty_create_config_space(mdev_state);
+
+    mutex_lock(&mdev_list_lock);
+    list_add(&mdev_state->next, &mdev_devices_list);
+    mutex_unlock(&mdev_list_lock);
+
+    return 0;
+}
+
+int mtty_destroy(struct mdev_device *mdev)
+{
+    struct mdev_state *mds, *tmp_mds;
+    struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+    int ret = -EINVAL;
+
+    mutex_lock(&mdev_list_lock);
+    list_for_each_entry_safe(mds, tmp_mds, &mdev_devices_list, next) {
+        if (mdev_state == mds) {
+            list_del(&mdev_state->next);
+            mdev_set_drvdata(mdev, NULL);
+            kfree(mdev_state->vconfig);
+            kfree(mdev_state);
+            ret = 0;
+            break;
+        }
+    }
+    mutex_unlock(&mdev_list_lock);
+
+    return ret;
+}
+
+int mtty_reset(struct mdev_device *mdev)
+{
+    struct mdev_state *mdev_state;
+
+    if (!mdev)
+        return -EINVAL;
+
+    mdev_state = mdev_get_drvdata(mdev);
+    if (!mdev_state)
+        return -EINVAL;
+
+    pr_info("%s: called\n", __func__);
+
+    return 0;
+}
+
+int mtty_set_online_status(struct mdev_device *mdev, bool online)
+{
+    pr_info("%s: called, online: %d\n", __func__, online);
+    return 0;
+}
+
+int mtty_get_online_status(struct mdev_device *mdev, bool *online)
+{
+    *online = true;
+    return 0;
+}
+
+ssize_t mtty_read(struct mdev_device *mdev, char *buf,
+                                 size_t count, loff_t pos)
+{
+    return mdev_access(mdev, buf, count, pos, false);
+}
+
+ssize_t mtty_write(struct mdev_device *mdev, char *buf,
+                                 size_t count, loff_t pos)
+{
+    return mdev_access(mdev, buf, count, pos, true);
+}
+
+static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags,
+                         unsigned index, unsigned start, unsigned count,
+                         void *data)
+{
+    int ret = 0;
+    struct mdev_state *mdev_state;
+
+    if (!mdev)
+        return -EINVAL;
+
+    mdev_state = mdev_get_drvdata(mdev);
+    if (!mdev_state)
+        return -EINVAL;
+
+    mutex_lock(&mdev_state->ops_lock);
+    switch (index) {
+        case VFIO_PCI_INTX_IRQ_INDEX:
+            switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+                case VFIO_IRQ_SET_ACTION_MASK:
+                case VFIO_IRQ_SET_ACTION_UNMASK:
+                    break;
+                case VFIO_IRQ_SET_ACTION_TRIGGER:
+                {
+                    if (flags & VFIO_IRQ_SET_DATA_NONE)
+                    {
+                        pr_info("%s: disable INTx\n", __func__);
+                        break;
+                    }
+
+                    if (flags & VFIO_IRQ_SET_DATA_EVENTFD)
+                    {
+                        int fd = *(int *)data;
+                        if (fd > 0)
+                        {
+                            struct fd irqfd;
+
+                            irqfd = fdget(fd);
+                            if (!irqfd.file) {
+                                ret = -EBADF;
+                                break;
+                            }
+
+                            mdev_state->intx_file = irqfd.file;
+                            fdput(irqfd);
+
+                            mdev_state->irq_fd = fd;
+                            mdev_state->irq_index = index;
+                            break;
+                        }
+                    }
+                    break;
+                }
+            }
+            break;
+        case VFIO_PCI_MSI_IRQ_INDEX:
+            switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+                case VFIO_IRQ_SET_ACTION_MASK:
+                case VFIO_IRQ_SET_ACTION_UNMASK:
+                    break;
+                case VFIO_IRQ_SET_ACTION_TRIGGER:
+                {
+                    if (flags & VFIO_IRQ_SET_DATA_NONE)
+                    {
+                        pr_info("%s: disable MSI\n", __func__);
+                        mdev_state->irq_index = VFIO_PCI_INTX_IRQ_INDEX;
+                        break;
+                    }
+
+                    if (flags & VFIO_IRQ_SET_DATA_EVENTFD)
+                    {
+                        int fd = *(int *)data;
+                        if (fd > 0)
+                        {
+                            if (mdev_state->msi_file == NULL)
+                            {
+                                struct fd irqfd;
+
+                                irqfd = fdget(fd);
+                                if (!irqfd.file) {
+                                    ret = -EBADF;
+                                    break;
+                                }
+
+                                mdev_state->msi_file = irqfd.file;
+                                fdput(irqfd);
+
+                                mdev_state->irq_fd = fd;
+                                mdev_state->irq_index = index;
+                                break;
+                            }
+                        }
+                    }
+                    break;
+                }
+            }
+            break;
+        case VFIO_PCI_MSIX_IRQ_INDEX:
+            pr_info("%s: MSIX_IRQ \n", __func__);
+            break;
+        case VFIO_PCI_ERR_IRQ_INDEX:
+            pr_info("%s: ERR_IRQ \n", __func__);
+            break;
+        case VFIO_PCI_REQ_IRQ_INDEX:
+            pr_info("%s: REQ_IRQ \n", __func__);
+            break;
+    }
+
+    mutex_unlock(&mdev_state->ops_lock);
+    return ret;
+}
+
+static int mtty_dev_mdev_trigger_interrupt(uuid_le uuid)
+{
+    mm_segment_t old_fs;
+    u64 val = 1;
+    loff_t offset = 0;
+    int ret = -1;
+    struct file *pfile = NULL;
+    struct mdev_state *mdev_state;
+
+    mdev_state = find_mdev_state_by_uuid(uuid);
+
+    if (!mdev_state) {
+        pr_info("%s: mdev not found\n", __func__);
+        return -EINVAL;
+    }
+
+    if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) &&
+        (mdev_state->msi_file == NULL))
+        return -EINVAL;
+    else if ((mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX) &&
+             (mdev_state->intx_file == NULL))
+    {
+        pr_info("%s: Intr file not found\n", __func__);
+        return -EINVAL;
+    }
+
+    old_fs = get_fs();
+    set_fs(KERNEL_DS);
+
+    if (mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX)
+        pfile = mdev_state->msi_file;
+    else
+        pfile = mdev_state->intx_file;
+
+    if (pfile && pfile->f_op && pfile->f_op->write) {
+       ret = pfile->f_op->write(pfile, (char *)&val, sizeof(val), &offset);
+#if defined(DEBUG_INTR)
+       pr_info("Intx triggered \n");
+#endif
+    }
+    else
+        pr_err("%s: pfile not valid, intr_type = %d\n", __func__,
+                 mdev_state->irq_index);
+
+    set_fs(old_fs);
+
+    if (ret < 0)
+        pr_err("%s: eventfd write failed (%d)\n", __func__, ret);
+
+    return ret;
+}
+
+int mtty_get_region_info(struct mdev_device *mdev,
+                         struct vfio_region_info *region_info,
+                         u16 *cap_type_id, void **cap_type)
+{
+    unsigned int size = 0;
+    struct mdev_state *mdev_state;
+    int bar_index;
+
+    if (!mdev)
+        return -EINVAL;
+
+    mdev_state = mdev_get_drvdata(mdev);
+    if (!mdev_state)
+        return -EINVAL;
+
+    mutex_lock(&mdev_state->ops_lock);
+    bar_index = region_info->index;
+
+    switch (bar_index) {
+        case VFIO_PCI_CONFIG_REGION_INDEX:
+            size = MTTY_CONFIG_SPACE_SIZE;
+            break;
+        case VFIO_PCI_BAR0_REGION_INDEX:
+            size = MTTY_IO_BAR_SIZE;
+            break;
+        case VFIO_PCI_BAR1_REGION_INDEX:
+            size = MTTY_IO_BAR_SIZE;
+            break;
+        default:
+            size = 0;
+            break;
+    }
+
+    mdev_state->region_info[bar_index].size = size;
+    mdev_state->region_info[bar_index].vfio_offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
+
+    region_info->size = size;
+    region_info->offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
+    region_info->flags = VFIO_REGION_INFO_FLAG_READ |
+                         VFIO_REGION_INFO_FLAG_WRITE;
+    mutex_unlock(&mdev_state->ops_lock);
+    return 0;
+}
+
+int mtty_validate_map_request(struct mdev_device *mdev, loff_t pos,
+                              u64 *virtaddr, unsigned long *pfn,
+                              unsigned long *size, pgprot_t *prot)
+{
+    *size = PAGE_SIZE;
+    *prot = pgprot_noncached(*prot);
+    return 0;
+}
+
+int mtty_get_irq_info(struct mdev_device *mdev,
+                      struct vfio_irq_info *irq_info)
+{
+    switch (irq_info->index) {
+        case VFIO_PCI_INTX_IRQ_INDEX:
+        case VFIO_PCI_MSI_IRQ_INDEX:
+        case VFIO_PCI_REQ_IRQ_INDEX:
+            break;
+
+        default:
+            return -EINVAL;
+    }
+
+    irq_info->flags = VFIO_IRQ_INFO_EVENTFD;
+    irq_info->count = 1;
+
+    if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)
+        irq_info->flags |= (VFIO_IRQ_INFO_MASKABLE |
+                            VFIO_IRQ_INFO_AUTOMASKED);
+    else
+        irq_info->flags |= VFIO_IRQ_INFO_NORESIZE;
+
+    return 0;
+}
+
+int mtty_get_device_info(struct mdev_device *mdev,
+                         struct vfio_device_info *dev_info)
+{
+    dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
+    dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+    dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+
+    return 0;
+}
+
+static ssize_t
+sample_mtty_dev_show(struct device *dev, struct device_attribute *attr,
+                     char *buf)
+{
+    return sprintf(buf, "This is phy device \n");
+}
+
+static DEVICE_ATTR_RO(sample_mtty_dev);
+
+static struct attribute *mtty_dev_attrs[] = {
+        &dev_attr_sample_mtty_dev.attr,
+        NULL,
+};
+
+static const struct attribute_group mtty_dev_group = {
+        .name  = "mtty_dev",
+        .attrs = mtty_dev_attrs,
+};
+
+const struct attribute_group *mtty_dev_groups[] = {
+        &mtty_dev_group,
+        NULL,
+};
+
+static ssize_t
+sample_mdev_dev_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+        struct mdev_device *mdev = to_mdev_device(dev);
+
+        if (mdev)
+                return sprintf(buf, "This is MDEV %s\n", dev_name(&mdev->dev));
+
+        return sprintf(buf, " \n");
+}
+
+static DEVICE_ATTR_RO(sample_mdev_dev);
+
+static struct attribute *mdev_dev_attrs[] = {
+        &dev_attr_sample_mdev_dev.attr,
+        NULL,
+};
+
+static const struct attribute_group mdev_dev_group = {
+        .name  = "vendor",
+        .attrs = mdev_dev_attrs,
+};
+
+const struct attribute_group *mdev_dev_groups[] = {
+        &mdev_dev_group,
+        NULL,
+};
+
+struct parent_ops mdev_fops = {
+    .owner                  = THIS_MODULE,
+    .dev_attr_groups        = mtty_dev_groups,
+    .mdev_attr_groups       = mdev_dev_groups,
+    .supported_config       = mtty_supported_config,
+    .create                 = mtty_create,
+    .destroy                = mtty_destroy,
+    .reset                  = mtty_reset,
+    .set_online_status      = mtty_set_online_status,
+    .get_online_status      = mtty_get_online_status,
+    .read                   = mtty_read,
+    .write                  = mtty_write,
+    .get_irq_info           = mtty_get_irq_info,
+    .set_irqs               = mtty_set_irqs,
+    .get_region_info        = mtty_get_region_info,
+    .get_device_info        = mtty_get_device_info,
+};
+
+static void mtty_device_release(struct device *dev)
+{
+	dev_dbg(dev, "mtty: released\n");
+}
+
+static int __init mtty_dev_init(void)
+{
+    int ret = 0;
+
+    pr_info("mtty_dev: %s\n", __FUNCTION__);
+
+    memset(&mtty_dev, 0 , sizeof(mtty_dev));
+
+    idr_init(&mtty_dev.vd_idr);
+
+    ret = alloc_chrdev_region(&mtty_dev.vd_devt, 0, MINORMASK, MTTY_NAME);
+
+    if (ret < 0) {
+        pr_err("Error: failed to register mtty_dev, err:%d\n", ret);
+        return ret;
+    }
+
+    cdev_init(&mtty_dev.vd_cdev, &vd_fops);
+    cdev_add(&mtty_dev.vd_cdev, mtty_dev.vd_devt, MINORMASK);
+
+    pr_info("major_number:%d \n", MAJOR(mtty_dev.vd_devt));
+
+    mtty_dev.vd_class = class_create(THIS_MODULE, MTTY_CLASS_NAME);
+
+    if (IS_ERR(mtty_dev.vd_class)) {
+        printk(KERN_ERR "Error: failed to register mtty_dev class\n");
+        goto failed1;
+    }
+
+    mtty_dev.dev.release = mtty_device_release;
+    dev_set_name(&mtty_dev.dev, "%s", MTTY_NAME);
+
+    ret = device_register(&mtty_dev.dev);
+    if (ret)
+        goto failed2;
+
+    if (mdev_register_device(&mtty_dev.dev, &mdev_fops) != 0)
+        goto failed3;
+
+    mutex_init(&mdev_list_lock);
+    INIT_LIST_HEAD(&mdev_devices_list);
+
+    goto all_done;
+
+failed3:
+
+    device_unregister(&mtty_dev.dev);
+failed2:
+    class_destroy(mtty_dev.vd_class);
+
+failed1:
+    cdev_del(&mtty_dev.vd_cdev);
+    unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK);
+
+all_done:
+    return ret;
+}
+
+static void __exit mtty_dev_exit(void)
+{
+    mtty_dev.dev.bus = NULL;
+    mdev_unregister_device(&mtty_dev.dev);
+
+    device_unregister(&mtty_dev.dev);
+    idr_destroy(&mtty_dev.vd_idr);
+    cdev_del(&mtty_dev.vd_cdev);
+    unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK);
+    class_destroy(mtty_dev.vd_class);
+    mtty_dev.vd_class = NULL;
+    printk(KERN_INFO "mtty_dev: Unloaded!\n");
+}
+
+module_init(mtty_dev_init)
+module_exit(mtty_dev_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_INFO(supported, "Test driver that simulate serial port over PCI");
+MODULE_VERSION(VERSION_STRING);
+MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/mdev/vfio-mediated-device.txt
similarity index 78%
rename from Documentation/vfio-mediated-device.txt
rename to Documentation/mdev/vfio-mediated-device.txt
index 237d8eb630b7..c67576f1da29 100644
--- a/Documentation/vfio-mediated-device.txt
+++ b/Documentation/mdev/vfio-mediated-device.txt
@@ -193,6 +193,67 @@ supported in TYPE1 IOMMU module. To enable the same for other IOMMU backend
 modules, such as PPC64 sPAPR module, they need to provide these two callback
 functions.
 
+Sample code
+------------------------------------------------------------------------------
+mtty.c in this folder is a sample code to demonstrate how to use mediated device
+framework.
+
+Sample driver creates mdev device that simulates serial port over PCI card.
+
+Build and load mtty.ko module. This creates a dummy device, /sys/devices/mtty
+Files in this device directory in sysfs looks like:
+
+# ls /sys/devices/mtty/ -l
+total 0
+--w------- 1 root root 4096 Sep 29 12:18 mdev_create
+--w------- 1 root root 4096 Sep 29 12:18 mdev_destroy
+-r--r--r-- 1 root root 4096 Sep 29 12:34 mdev_supported_types
+drwxr-xr-x 2 root root    0 Sep 29 12:34 mtty_dev
+drwxr-xr-x 2 root root    0 Sep 29 12:34 power
+-rw-r--r-- 1 root root 4096 Sep 29 12:34 uevent
+
+Create mediated device using this device:
+# echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" > /sys/devices/mtty/mdev_create
+
+Add parameters to qemu-kvm:
+-device vfio-pci,\
+ sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
+
+Boot the VM. In Linux guest (no hardware in host), device is seen as below:
+
+# lspci -s 00:05.0 -xxvv
+00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550])
+        Subsystem: Device 4348:3253
+        Physical Slot: 5
+        Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
+Stepping- SERR- FastB2B- DisINTx-
+        Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort-
+<TAbort- <MAbort- >SERR- <PERR- INTx-
+        Interrupt: pin A routed to IRQ 10
+        Region 0: I/O ports at c150 [size=8]
+        Region 1: I/O ports at c158 [size=8]
+        Kernel driver in use: serial
+00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00
+10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00
+20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32
+30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00
+
+In guest dmesg:
+serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10
+0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A
+0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A
+
+Check the serial ports in guest:
+# setserial -g /dev/ttyS*
+/dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4
+/dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10
+/dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10
+
+Using minicom or any terminal enulation program, open port /dev/ttyS1 or
+/dev/ttyS2 with hardware flow control disabled. Type data on minicom terminal or
+send data to terminal emulation program and read tha data. Data is loop backed
+from hosts mtty driver.
+
 References
 -------------------------------------------------------------------------------
 
-- 
2.7.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux