Re: [PATCH v2] Shared memory device with interrupt support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 15-May-09, at 8:54 PM, Kumar, Venkat wrote:

Cam,

A questions on interrupts as well.
What is "unix:path" that needs to be passed in the argument list?
Can it be any string?

It has to be a valid path on the host. It will create a unix domain socket on that path.


If my understanding is correct both the VM's who wants to communicate would gives this path in the command line with one of them specifying as "server".

Exactly, the one with the "server" in the parameter list will wait for a connection before booting.

Cam


Thx,
Venkat






Support an inter-vm shared memory device that maps a shared- memory object as a PCI device in the guest. This patch also supports interrupts between guest by communicating over a unix domain socket. This patch applies to the
qemu-kvm repository.

This device now creates a qemu character device and sends 1-bytes messages to trigger interrupts. Writes are trigger by writing to the "Doorbell" register on the shared memory PCI device. The lower 8-bits of the value written to this register are sent as the 1-byte message so different meanings of interrupts can
be supported.

Interrupts are only supported between 2 VMs currently. One VM must act as the server by adding "server" to the command-line argument. Shared memory devices
are created with the following command-line:

-ivhshmem <shm object>,<size in MB>,[unix:<path>][,server]

Interrupts can also be used between host and guest as well by implementing a
listener on the host.

Cam

---
Makefile.target |    3 +
hw/ivshmem.c | 421 ++++++++++++++++++++++++++++++++++++++++++++++ +++++++++
hw/pc.c         |    6 +
hw/pc.h         |    3 +
qemu-options.hx |   14 ++
sysemu.h        |    8 +
vl.c            |   14 ++
7 files changed, 469 insertions(+), 0 deletions(-)
create mode 100644 hw/ivshmem.c

diff --git a/Makefile.target b/Makefile.target
index b68a689..3190bba 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -643,6 +643,9 @@ OBJS += pcnet.o
OBJS += rtl8139.o
OBJS += e1000.o

+# Inter-VM PCI shared memory
+OBJS += ivshmem.o
+
# Generic watchdog support and some watchdog devices
OBJS += watchdog.o
OBJS += wdt_ib700.o wdt_i6300esb.o
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
new file mode 100644
index 0000000..95e2268
--- /dev/null
+++ b/hw/ivshmem.c
@@ -0,0 +1,421 @@
+/*
+ * Inter-VM Shared Memory PCI device.
+ *
+ * Author:
+ *      Cam Macdonell <c...@xxxxxxxxxxxxxx>
+ *
+ * Based On: cirrus_vga.c and rtl8139.c
+ *
+ * This code is licensed under the GNU GPL v2.
+ */
+
+#include "hw.h"
+#include "console.h"
+#include "pc.h"
+#include "pci.h"
+#include "sysemu.h"
+
+#include "qemu-common.h"
+#include <sys/mman.h>
+
+#define PCI_COMMAND_IOACCESS                0x0001
+#define PCI_COMMAND_MEMACCESS               0x0002
+#define PCI_COMMAND_BUSMASTER               0x0004
+
+//#define DEBUG_IVSHMEM
+
+#ifdef DEBUG_IVSHMEM
+#define IVSHMEM_DPRINTF(fmt, args...)        \
+    do {printf("IVSHMEM: " fmt, ##args); } while (0)
+#else
+#define IVSHMEM_DPRINTF(fmt, args...)
+#endif
+
+typedef struct IVShmemState {
+    uint16_t intrmask;
+    uint16_t intrstatus;
+    uint16_t doorbell;
+    uint8_t *ivshmem_ptr;
+    unsigned long ivshmem_offset;
+    unsigned int ivshmem_size;
+    unsigned long bios_offset;
+    unsigned int bios_size;
+    target_phys_addr_t base_ctrl;
+    int it_shift;
+    PCIDevice *pci_dev;
+    CharDriverState * chr;
+    unsigned long map_addr;
+    unsigned long map_end;
+    int ivshmem_mmio_io_addr;
+} IVShmemState;
+
+typedef struct PCI_IVShmemState {
+    PCIDevice dev;
+    IVShmemState ivshmem_state;
+} PCI_IVShmemState;
+
+typedef struct IVShmemDesc {
+    char name[1024];
+    char * chrdev;
+    int size;
+} IVShmemDesc;
+
+
+/* registers for the Inter-VM shared memory device */
+enum ivshmem_registers {
+    IntrMask = 0,
+    IntrStatus = 16,
+    Doorbell = 32
+};
+
+static int num_ivshmem_devices = 0;
+static IVShmemDesc ivshmem_desc;
+
+static void ivshmem_map(PCIDevice *pci_dev, int region_num,
+                    uint32_t addr, uint32_t size, int type)
+{
+    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+    IVShmemState *s = &d->ivshmem_state;
+
+    IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
+ cpu_register_physical_memory(addr, s->ivshmem_size, s- >ivshmem_offset);
+
+}
+
+void ivshmem_init(const char * optarg) {
+
+    char * temp;
+    char * ivshmem_sz;
+    int size;
+
+    num_ivshmem_devices++;
+
+    /* currently we only support 1 device */
+    if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
+        return;
+    }
+
+    temp = strdup(optarg);
+    snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
+    ivshmem_sz=strsep(&temp,",");
+    if (ivshmem_sz != NULL){
+        size = atol(ivshmem_sz);
+    } else {
+        size = -1;
+    }
+
+    ivshmem_desc.chrdev = strsep(&temp,"\0");
+
+    if ( size == -1) {
+        ivshmem_desc.size = TARGET_PAGE_SIZE;
+    } else {
+        ivshmem_desc.size = size*1024*1024;
+    }
+ IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev is %s\n",
+                                        optarg, ivshmem_desc.name,
+                                        ivshmem_desc.size,
ivshmem_desc.chrdev);
+}
+
+int ivshmem_get_size(void) {
+    return ivshmem_desc.size;
+}
+
+/* accessing registers - based on rtl8139 */
+static void ivshmem_update_irq(IVShmemState *s)
+{
+    int isr;
+    isr = (s->intrstatus & s->intrmask) & 0xffff;
+
+    /* don't print ISR resets */
+    if (isr) {
+        IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
+           isr ? 1 : 0, s->intrstatus, s->intrmask);
+    }
+
+    qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
+}
+
+static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
+                       uint32_t addr, uint32_t size, int type)
+{
+    PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+    IVShmemState *s = &d->ivshmem_state;
+
+ cpu_register_physical_memory(addr + 0, 0x100, s- >ivshmem_mmio_io_addr);
+}
+
+static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
+{
+    IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
+
+    s->intrmask = val;
+
+    ivshmem_update_irq(s);
+}
+
+static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
+{
+    uint32_t ret = s->intrmask;
+
+    IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
+
+    return ret;
+}
+
+static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
+{
+    IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
+
+    s->intrstatus = val;
+
+    ivshmem_update_irq(s);
+    return;
+}
+
+static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
+{
+    uint32_t ret = s->intrstatus;
+
+    /* reading ISR clears all interrupts */
+    s->intrstatus = 0;
+
+    ivshmem_update_irq(s);
+
+    return ret;
+}
+
+static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVShmemState *s = opaque;
+
+ IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned long) opaque);
+
+    addr &= 0xfe;
+
+    switch (addr)
+    {
+        case IntrMask:
+            ivshmem_IntrMask_write(s, val);
+            break;
+
+        case IntrStatus:
+            ivshmem_IntrStatus_write(s, val);
+            break;
+
+        default:
+            IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
+    }
+}
+
+static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
+}
+
+static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVShmemState *s = opaque;
+    uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
+
+    switch (addr)
+ { // in future, we will probably want to support more types of doorbells
+        case Doorbell:
+            // wake up the other side
+            qemu_chr_write(s->chr, &writebyte, 1);
+ IVSHMEM_DPRINTF("Writing to the other side 0x%x\n", writebyte);
+            break;
+        default:
+            IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
+    }
+}
+
+static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
+{
+
+    IVShmemState *s = opaque;
+    uint32_t ret;
+
+    switch (addr)
+    {
+        case IntrMask:
+            ret = ivshmem_IntrMask_read(s);
+            break;
+        case IntrStatus:
+            ret = ivshmem_IntrStatus_read(s);
+            break;
+        default:
+            IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
+            ret = 0;
+    }
+
+    return ret;
+}
+
+static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
+{
+    IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
+    return 0;
+}
+
+static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
+{
+    IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
+
+    return 0;
+}
+
+static void ivshmem_mmio_writeb(void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writeb(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writew(void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writew(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writel(void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+    ivshmem_io_writel(opaque, addr & 0xFF, val);
+}
+
+static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    return ivshmem_io_readb(opaque, addr & 0xFF);
+}
+
+static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
+    return val;
+}
+
+static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
+    return val;
+}
+
+static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
+    ivshmem_mmio_readb,
+    ivshmem_mmio_readw,
+    ivshmem_mmio_readl,
+};
+
+static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
+    ivshmem_mmio_writeb,
+    ivshmem_mmio_writew,
+    ivshmem_mmio_writel,
+};
+
+static int ivshmem_can_receive(void * opaque)
+{
+    return 1;
+}
+
+static void ivshmem_receive(void *opaque, const uint8_t *buf, int size)
+{
+    IVShmemState *s = opaque;
+
+    ivshmem_IntrStatus_write(s, *buf);
+
+    IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
+}
+
+static void ivshmem_event(void *opaque, int event)
+{
+    IVShmemState *s = opaque;
+    IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
+}
+
+int pci_ivshmem_init(PCIBus *bus)
+{
+    PCI_IVShmemState *d;
+    IVShmemState *s;
+    uint8_t *pci_conf;
+    int ivshmem_fd;
+
+    IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
+    d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
+                                           sizeof(PCI_IVShmemState),
+                                           -1, NULL, NULL);
+    if (!d) {
+        return -1;
+    }
+
+    s = &d->ivshmem_state;
+
+    /* allocate shared memory RAM */
+    s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
+    IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
+    IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
+
+    s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
+
+    s->pci_dev = &d->dev;
+    s->ivshmem_size = ivshmem_desc.size;
+
+    pci_conf = d->dev.config;
+    pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
+    pci_conf[0x01] = 0x1a;
+    pci_conf[0x02] = 0x10;
+    pci_conf[0x03] = 0x11;
+    pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
+    pci_conf[0x0a] = 0x00; // RAM controller
+    pci_conf[0x0b] = 0x05;
+    pci_conf[0x0e] = 0x00; // header_type
+
+ pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support interrupts
+
+    /* XXX: ivshmem_desc.size must be a power of two */
+
+ s->ivshmem_mmio_io_addr = cpu_register_io_memory(0, ivshmem_mmio_read,
+                                    ivshmem_mmio_write, s);
+
+    /* region for registers*/
+    pci_register_io_region(&d->dev, 0, 0x100,
+                           PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
+
+    /* region for shared memory */
+    pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
+                           PCI_ADDRESS_SPACE_MEM, ivshmem_map);
+
+    /* open shared memory file  */
+ if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR, S_IRWXU)) <
0)
+    {
+        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
+        exit(-1);
+    }
+
+    ftruncate(ivshmem_fd, ivshmem_desc.size);
+
+    /* mmap onto PCI device's memory */
+    if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
+ MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) == MAP_FAILED)
+    {
+        fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
+        exit(-1);
+    }
+
+ IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s- >ivshmem_ptr);
+
+    /* setup character device channel */
+
+    if (ivshmem_desc.chrdev != NULL) {
+        char label[32];
+        snprintf(label, 32, "ivshmem_chardev");
+        s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
+        if (s->chr == NULL) {
+            fprintf(stderr, "No server listening on %s\n",
ivshmem_desc.chrdev);
+            exit(-1);
+        }
+ qemu_chr_add_handlers(s->chr, ivshmem_can_receive, ivshmem_receive,
+                          ivshmem_event, s);
+    }
+
+    return 0;
+}
+
diff --git a/hw/pc.c b/hw/pc.c
index 34a4d25..7d0cff2 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -67,6 +67,8 @@ static PITState *pit;
static IOAPICState *ioapic;
static PCIDevice *i440fx_state;

+extern int ivshmem_enabled;
+
static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
{
}
@@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int
vga_ram_size,
        }
    }

+    if (pci_enabled && ivshmem_enabled) {
+        pci_ivshmem_init(pci_bus);
+    }
+
    rtc_state = rtc_init(0x70, i8259[8], 2000);

    qemu_register_boot_set(pc_boot_set, rtc_state);
diff --git a/hw/pc.h b/hw/pc.h
index 885c918..0ae0493 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd);

void extboot_init(BlockDriverState *bs, int cmd);

+/* ivshmem.c */
+int pci_ivshmem_init(PCIBus *bus);
+
#endif
diff --git a/qemu-options.hx b/qemu-options.hx
index 173f458..9ab3e2d 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical mode and
@code{stdio} in
non graphical mode.
ETEXI

+DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
+ "-ivshmem name,size[,unix:path][,server] creates or opens a shared file
'name' of size \
+    'size' (in MB) and exposes it as a PCI device in the guest\n")
+STEXI
+...@item -ivshmem @var{file},@var{size}
+Creates a POSIX shared file named @var{file} of size @var{size} and creates a +PCI device of the same size that maps the shared file into the device for
guests
+to access.  The created file on the host is located in /dev/shm/
+
+...@item unix:@var{path}[,server]
+A unix domain socket is used to send and receive interrupts between VMs. The
unix domain socket
+...@var{path} is used for connections.
+ETEXI
+
DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
    "-pidfile file   write PID to 'file'\n")
STEXI
diff --git a/sysemu.h b/sysemu.h
index 1f45fd6..862b79e 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -217,6 +217,14 @@ extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];

extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];

+/* inter-VM shared memory devices */
+
+#define MAX_IVSHMEM_DEVICES 1
+
+extern CharDriverState * ivshmem_chardev;
+void ivshmem_init(const char * optarg);
+int ivshmem_get_size(void);
+
#define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)

#ifdef NEED_CPU_H
diff --git a/vl.c b/vl.c
index 0420634..7260fa1 100644
--- a/vl.c
+++ b/vl.c
@@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no change */
int cirrus_vga_enabled = 1;
int std_vga_enabled = 0;
int vmsvga_enabled = 0;
+int ivshmem_enabled = 0;
int xenfb_enabled = 0;
#ifdef TARGET_SPARC
int graphic_width = 1024;
@@ -239,6 +240,8 @@ int no_quit = 0;
CharDriverState *serial_hds[MAX_SERIAL_PORTS];
CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
+CharDriverState *ivshmem_chardev;
+const char * ivshmem_device;
#ifdef TARGET_I386
int win2k_install_hack = 0;
int rtc_td_hack = 0;
@@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
    cyls = heads = secs = 0;
    translation = BIOS_ATA_TRANSLATION_AUTO;
    monitor_device = "vc:80Cx24C";
+    ivshmem_device = NULL;
+    ivshmem_chardev = NULL;

    serial_devices[0] = "vc:80Cx24C";
    for(i = 1; i < MAX_SERIAL_PORTS; i++)
@@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
                parallel_devices[parallel_device_index] = optarg;
                parallel_device_index++;
                break;
+            case QEMU_OPTION_ivshmem:
+                ivshmem_device = optarg;
+                ivshmem_enabled = 1;
+                break;
           case QEMU_OPTION_loadvm:
               loadvm = optarg;
               break;
@@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
           }
    }

+    if (ivshmem_enabled) {
+        ivshmem_init(ivshmem_device);
+        ram_size += ivshmem_get_size();
+    }
+
#ifdef CONFIG_KQEMU
/* FIXME: This is a nasty hack because kqemu can't cope with dynamic
       guest ram allocation.  It needs to go away.  */
Thx,

Venkat



-----------------------------------------------
A. Cameron Macdonell
Ph.D. Student
Department of Computing Science
University of Alberta
cam@xxxxxxxxxxxxxx



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux