Thx,
Venkat
Support an inter-vm shared memory device that maps a shared-
memory object
as a PCI device in the guest. This patch also supports interrupts
between
guest by communicating over a unix domain socket. This patch
applies to the
qemu-kvm repository.
This device now creates a qemu character device and sends 1-bytes
messages to
trigger interrupts. Writes are trigger by writing to the "Doorbell"
register
on the shared memory PCI device. The lower 8-bits of the value
written to this
register are sent as the 1-byte message so different meanings of
interrupts can
be supported.
Interrupts are only supported between 2 VMs currently. One VM must
act as the
server by adding "server" to the command-line argument. Shared
memory devices
are created with the following command-line:
-ivhshmem <shm object>,<size in MB>,[unix:<path>][,server]
Interrupts can also be used between host and guest as well by
implementing a
listener on the host.
Cam
---
Makefile.target | 3 +
hw/ivshmem.c | 421 ++++++++++++++++++++++++++++++++++++++++++++++
+++++++++
hw/pc.c | 6 +
hw/pc.h | 3 +
qemu-options.hx | 14 ++
sysemu.h | 8 +
vl.c | 14 ++
7 files changed, 469 insertions(+), 0 deletions(-)
create mode 100644 hw/ivshmem.c
diff --git a/Makefile.target b/Makefile.target
index b68a689..3190bba 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -643,6 +643,9 @@ OBJS += pcnet.o
OBJS += rtl8139.o
OBJS += e1000.o
+# Inter-VM PCI shared memory
+OBJS += ivshmem.o
+
# Generic watchdog support and some watchdog devices
OBJS += watchdog.o
OBJS += wdt_ib700.o wdt_i6300esb.o
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
new file mode 100644
index 0000000..95e2268
--- /dev/null
+++ b/hw/ivshmem.c
@@ -0,0 +1,421 @@
+/*
+ * Inter-VM Shared Memory PCI device.
+ *
+ * Author:
+ * Cam Macdonell <c...@xxxxxxxxxxxxxx>
+ *
+ * Based On: cirrus_vga.c and rtl8139.c
+ *
+ * This code is licensed under the GNU GPL v2.
+ */
+
+#include "hw.h"
+#include "console.h"
+#include "pc.h"
+#include "pci.h"
+#include "sysemu.h"
+
+#include "qemu-common.h"
+#include <sys/mman.h>
+
+#define PCI_COMMAND_IOACCESS 0x0001
+#define PCI_COMMAND_MEMACCESS 0x0002
+#define PCI_COMMAND_BUSMASTER 0x0004
+
+//#define DEBUG_IVSHMEM
+
+#ifdef DEBUG_IVSHMEM
+#define IVSHMEM_DPRINTF(fmt, args...) \
+ do {printf("IVSHMEM: " fmt, ##args); } while (0)
+#else
+#define IVSHMEM_DPRINTF(fmt, args...)
+#endif
+
+typedef struct IVShmemState {
+ uint16_t intrmask;
+ uint16_t intrstatus;
+ uint16_t doorbell;
+ uint8_t *ivshmem_ptr;
+ unsigned long ivshmem_offset;
+ unsigned int ivshmem_size;
+ unsigned long bios_offset;
+ unsigned int bios_size;
+ target_phys_addr_t base_ctrl;
+ int it_shift;
+ PCIDevice *pci_dev;
+ CharDriverState * chr;
+ unsigned long map_addr;
+ unsigned long map_end;
+ int ivshmem_mmio_io_addr;
+} IVShmemState;
+
+typedef struct PCI_IVShmemState {
+ PCIDevice dev;
+ IVShmemState ivshmem_state;
+} PCI_IVShmemState;
+
+typedef struct IVShmemDesc {
+ char name[1024];
+ char * chrdev;
+ int size;
+} IVShmemDesc;
+
+
+/* registers for the Inter-VM shared memory device */
+enum ivshmem_registers {
+ IntrMask = 0,
+ IntrStatus = 16,
+ Doorbell = 32
+};
+
+static int num_ivshmem_devices = 0;
+static IVShmemDesc ivshmem_desc;
+
+static void ivshmem_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+ IVShmemState *s = &d->ivshmem_state;
+
+ IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size);
+ cpu_register_physical_memory(addr, s->ivshmem_size, s-
ivshmem_offset);
+
+}
+
+void ivshmem_init(const char * optarg) {
+
+ char * temp;
+ char * ivshmem_sz;
+ int size;
+
+ num_ivshmem_devices++;
+
+ /* currently we only support 1 device */
+ if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) {
+ return;
+ }
+
+ temp = strdup(optarg);
+ snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,","));
+ ivshmem_sz=strsep(&temp,",");
+ if (ivshmem_sz != NULL){
+ size = atol(ivshmem_sz);
+ } else {
+ size = -1;
+ }
+
+ ivshmem_desc.chrdev = strsep(&temp,"\0");
+
+ if ( size == -1) {
+ ivshmem_desc.size = TARGET_PAGE_SIZE;
+ } else {
+ ivshmem_desc.size = size*1024*1024;
+ }
+ IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev
is %s\n",
+ optarg, ivshmem_desc.name,
+ ivshmem_desc.size,
ivshmem_desc.chrdev);
+}
+
+int ivshmem_get_size(void) {
+ return ivshmem_desc.size;
+}
+
+/* accessing registers - based on rtl8139 */
+static void ivshmem_update_irq(IVShmemState *s)
+{
+ int isr;
+ isr = (s->intrstatus & s->intrmask) & 0xffff;
+
+ /* don't print ISR resets */
+ if (isr) {
+ IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
+ isr ? 1 : 0, s->intrstatus, s->intrmask);
+ }
+
+ qemu_set_irq(s->pci_dev->irq[0], (isr != 0));
+}
+
+static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev;
+ IVShmemState *s = &d->ivshmem_state;
+
+ cpu_register_physical_memory(addr + 0, 0x100, s-
ivshmem_mmio_io_addr);
+}
+
+static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
+{
+ IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
+
+ s->intrmask = val;
+
+ ivshmem_update_irq(s);
+}
+
+static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
+{
+ uint32_t ret = s->intrmask;
+
+ IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
+
+ return ret;
+}
+
+static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
+{
+ IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
+
+ s->intrstatus = val;
+
+ ivshmem_update_irq(s);
+ return;
+}
+
+static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
+{
+ uint32_t ret = s->intrstatus;
+
+ /* reading ISR clears all interrupts */
+ s->intrstatus = 0;
+
+ ivshmem_update_irq(s);
+
+ return ret;
+}
+
+static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t
val)
+{
+ IVShmemState *s = opaque;
+
+ IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned
long) opaque);
+
+ addr &= 0xfe;
+
+ switch (addr)
+ {
+ case IntrMask:
+ ivshmem_IntrMask_write(s, val);
+ break;
+
+ case IntrStatus:
+ ivshmem_IntrStatus_write(s, val);
+ break;
+
+ default:
+ IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr);
+ }
+}
+
+static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t
val)
+{
+ IVSHMEM_DPRINTF("We shouldn't be writing longs\n");
+}
+
+static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t
val)
+{
+ IVShmemState *s = opaque;
+ uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val'
+
+ switch (addr)
+ { // in future, we will probably want to support more types
of doorbells
+ case Doorbell:
+ // wake up the other side
+ qemu_chr_write(s->chr, &writebyte, 1);
+ IVSHMEM_DPRINTF("Writing to the other side 0x%x\n",
writebyte);
+ break;
+ default:
+ IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr);
+ }
+}
+
+static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr)
+{
+
+ IVShmemState *s = opaque;
+ uint32_t ret;
+
+ switch (addr)
+ {
+ case IntrMask:
+ ret = ivshmem_IntrMask_read(s);
+ break;
+ case IntrStatus:
+ ret = ivshmem_IntrStatus_read(s);
+ break;
+ default:
+ IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr)
+{
+ IVSHMEM_DPRINTF("We shouldn't be reading longs\n");
+ return 0;
+}
+
+static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr)
+{
+ IVSHMEM_DPRINTF("We shouldn't be reading bytes\n");
+
+ return 0;
+}
+
+static void ivshmem_mmio_writeb(void *opaque,
+ target_phys_addr_t addr, uint32_t
val)
+{
+ ivshmem_io_writeb(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writew(void *opaque,
+ target_phys_addr_t addr, uint32_t
val)
+{
+ ivshmem_io_writew(opaque, addr & 0xFF, val);
+}
+
+static void ivshmem_mmio_writel(void *opaque,
+ target_phys_addr_t addr, uint32_t
val)
+{
+ ivshmem_io_writel(opaque, addr & 0xFF, val);
+}
+
+static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t
addr)
+{
+ return ivshmem_io_readb(opaque, addr & 0xFF);
+}
+
+static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t
addr)
+{
+ uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF);
+ return val;
+}
+
+static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t
addr)
+{
+ uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF);
+ return val;
+}
+
+static CPUReadMemoryFunc *ivshmem_mmio_read[3] = {
+ ivshmem_mmio_readb,
+ ivshmem_mmio_readw,
+ ivshmem_mmio_readl,
+};
+
+static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = {
+ ivshmem_mmio_writeb,
+ ivshmem_mmio_writew,
+ ivshmem_mmio_writel,
+};
+
+static int ivshmem_can_receive(void * opaque)
+{
+ return 1;
+}
+
+static void ivshmem_receive(void *opaque, const uint8_t *buf, int
size)
+{
+ IVShmemState *s = opaque;
+
+ ivshmem_IntrStatus_write(s, *buf);
+
+ IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf);
+}
+
+static void ivshmem_event(void *opaque, int event)
+{
+ IVShmemState *s = opaque;
+ IVSHMEM_DPRINTF("ivshmem_event %d\n", event);
+}
+
+int pci_ivshmem_init(PCIBus *bus)
+{
+ PCI_IVShmemState *d;
+ IVShmemState *s;
+ uint8_t *pci_conf;
+ int ivshmem_fd;
+
+ IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name);
+ d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem",
+ sizeof(PCI_IVShmemState),
+ -1, NULL, NULL);
+ if (!d) {
+ return -1;
+ }
+
+ s = &d->ivshmem_state;
+
+ /* allocate shared memory RAM */
+ s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size);
+ IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size);
+ IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset);
+
+ s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset);
+
+ s->pci_dev = &d->dev;
+ s->ivshmem_size = ivshmem_desc.size;
+
+ pci_conf = d->dev.config;
+ pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002
+ pci_conf[0x01] = 0x1a;
+ pci_conf[0x02] = 0x10;
+ pci_conf[0x03] = 0x11;
+ pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS;
+ pci_conf[0x0a] = 0x00; // RAM controller
+ pci_conf[0x0b] = 0x05;
+ pci_conf[0x0e] = 0x00; // header_type
+
+ pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support
interrupts
+
+ /* XXX: ivshmem_desc.size must be a power of two */
+
+ s->ivshmem_mmio_io_addr = cpu_register_io_memory(0,
ivshmem_mmio_read,
+ ivshmem_mmio_write, s);
+
+ /* region for registers*/
+ pci_register_io_region(&d->dev, 0, 0x100,
+ PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map);
+
+ /* region for shared memory */
+ pci_register_io_region(&d->dev, 1, ivshmem_desc.size,
+ PCI_ADDRESS_SPACE_MEM, ivshmem_map);
+
+ /* open shared memory file */
+ if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR,
S_IRWXU)) <
0)
+ {
+ fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
+ exit(-1);
+ }
+
+ ftruncate(ivshmem_fd, ivshmem_desc.size);
+
+ /* mmap onto PCI device's memory */
+ if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE,
+ MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) ==
MAP_FAILED)
+ {
+ fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n");
+ exit(-1);
+ }
+
+ IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s-
ivshmem_ptr);
+
+ /* setup character device channel */
+
+ if (ivshmem_desc.chrdev != NULL) {
+ char label[32];
+ snprintf(label, 32, "ivshmem_chardev");
+ s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL);
+ if (s->chr == NULL) {
+ fprintf(stderr, "No server listening on %s\n",
ivshmem_desc.chrdev);
+ exit(-1);
+ }
+ qemu_chr_add_handlers(s->chr, ivshmem_can_receive,
ivshmem_receive,
+ ivshmem_event, s);
+ }
+
+ return 0;
+}
+
diff --git a/hw/pc.c b/hw/pc.c
index 34a4d25..7d0cff2 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -67,6 +67,8 @@ static PITState *pit;
static IOAPICState *ioapic;
static PCIDevice *i440fx_state;
+extern int ivshmem_enabled;
+
static void ioport80_write(void *opaque, uint32_t addr, uint32_t data)
{
}
@@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int
vga_ram_size,
}
}
+ if (pci_enabled && ivshmem_enabled) {
+ pci_ivshmem_init(pci_bus);
+ }
+
rtc_state = rtc_init(0x70, i8259[8], 2000);
qemu_register_boot_set(pc_boot_set, rtc_state);
diff --git a/hw/pc.h b/hw/pc.h
index 885c918..0ae0493 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq,
NICInfo *nd);
void extboot_init(BlockDriverState *bs, int cmd);
+/* ivshmem.c */
+int pci_ivshmem_init(PCIBus *bus);
+
#endif
diff --git a/qemu-options.hx b/qemu-options.hx
index 173f458..9ab3e2d 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical
mode and
@code{stdio} in
non graphical mode.
ETEXI
+DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \
+ "-ivshmem name,size[,unix:path][,server] creates or opens a
shared file
'name' of size \
+ 'size' (in MB) and exposes it as a PCI device in the guest\n")
+STEXI
+...@item -ivshmem @var{file},@var{size}
+Creates a POSIX shared file named @var{file} of size @var{size} and
creates a
+PCI device of the same size that maps the shared file into the
device for
guests
+to access. The created file on the host is located in /dev/shm/
+
+...@item unix:@var{path}[,server]
+A unix domain socket is used to send and receive interrupts between
VMs. The
unix domain socket
+...@var{path} is used for connections.
+ETEXI
+
DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \
"-pidfile file write PID to 'file'\n")
STEXI
diff --git a/sysemu.h b/sysemu.h
index 1f45fd6..862b79e 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -217,6 +217,14 @@ extern CharDriverState
*parallel_hds[MAX_PARALLEL_PORTS];
extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
+/* inter-VM shared memory devices */
+
+#define MAX_IVSHMEM_DEVICES 1
+
+extern CharDriverState * ivshmem_chardev;
+void ivshmem_init(const char * optarg);
+int ivshmem_get_size(void);
+
#define TFR(expr) do { if ((expr) != -1) break; } while (errno ==
EINTR)
#ifdef NEED_CPU_H
diff --git a/vl.c b/vl.c
index 0420634..7260fa1 100644
--- a/vl.c
+++ b/vl.c
@@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no
change */
int cirrus_vga_enabled = 1;
int std_vga_enabled = 0;
int vmsvga_enabled = 0;
+int ivshmem_enabled = 0;
int xenfb_enabled = 0;
#ifdef TARGET_SPARC
int graphic_width = 1024;
@@ -239,6 +240,8 @@ int no_quit = 0;
CharDriverState *serial_hds[MAX_SERIAL_PORTS];
CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
+CharDriverState *ivshmem_chardev;
+const char * ivshmem_device;
#ifdef TARGET_I386
int win2k_install_hack = 0;
int rtc_td_hack = 0;
@@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp)
cyls = heads = secs = 0;
translation = BIOS_ATA_TRANSLATION_AUTO;
monitor_device = "vc:80Cx24C";
+ ivshmem_device = NULL;
+ ivshmem_chardev = NULL;
serial_devices[0] = "vc:80Cx24C";
for(i = 1; i < MAX_SERIAL_PORTS; i++)
@@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp)
parallel_devices[parallel_device_index] = optarg;
parallel_device_index++;
break;
+ case QEMU_OPTION_ivshmem:
+ ivshmem_device = optarg;
+ ivshmem_enabled = 1;
+ break;
case QEMU_OPTION_loadvm:
loadvm = optarg;
break;
@@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp)
}
}
+ if (ivshmem_enabled) {
+ ivshmem_init(ivshmem_device);
+ ram_size += ivshmem_get_size();
+ }
+
#ifdef CONFIG_KQEMU
/* FIXME: This is a nasty hack because kqemu can't cope with
dynamic
guest ram allocation. It needs to go away. */
Thx,
Venkat