Cam - I got your patch to work but without notifications. I could share memory using the patch but notifications aren't working. I bring up two VM's with option "-ivshmem shrmem,1024,/dev/shm/shrmem,server" and "-ivshmem shrmem,1024,/dev/shm/shrmem" respectively. When I make an "ioctl" from one of the VM's to inject an interrupt to the other VM, I get an error in "qemu_chr_write" and return value is "-1". "write" call in "send_all" is failing with return value "-1". Am I missing something here? Thx, Venkat -----Original Message----- From: Cam Macdonell [mailto:cam@xxxxxxxxxxxxxx] Sent: Saturday, May 16, 2009 9:01 AM To: Kumar, Venkat Cc: kvm@xxxxxxxxxxxxxxx list Subject: Re: [PATCH v2] Shared memory device with interrupt support On 15-May-09, at 8:54 PM, Kumar, Venkat wrote: > Cam, > > A questions on interrupts as well. > What is "unix:path" that needs to be passed in the argument list? > Can it be any string? It has to be a valid path on the host. It will create a unix domain socket on that path. > > If my understanding is correct both the VM's who wants to > communicate would gives this path in the command line with one of > them specifying as "server". Exactly, the one with the "server" in the parameter list will wait for a connection before booting. Cam > > Thx, > Venkat > > > > > > > Support an inter-vm shared memory device that maps a shared- > memory object > as a PCI device in the guest. This patch also supports interrupts > between > guest by communicating over a unix domain socket. This patch > applies to the > qemu-kvm repository. > > This device now creates a qemu character device and sends 1-bytes > messages to > trigger interrupts. Writes are trigger by writing to the "Doorbell" > register > on the shared memory PCI device. The lower 8-bits of the value > written to this > register are sent as the 1-byte message so different meanings of > interrupts can > be supported. > > Interrupts are only supported between 2 VMs currently. One VM must > act as the > server by adding "server" to the command-line argument. Shared > memory devices > are created with the following command-line: > > -ivhshmem <shm object>,<size in MB>,[unix:<path>][,server] > > Interrupts can also be used between host and guest as well by > implementing a > listener on the host. > > Cam > > --- > Makefile.target | 3 + > hw/ivshmem.c | 421 ++++++++++++++++++++++++++++++++++++++++++++++ > +++++++++ > hw/pc.c | 6 + > hw/pc.h | 3 + > qemu-options.hx | 14 ++ > sysemu.h | 8 + > vl.c | 14 ++ > 7 files changed, 469 insertions(+), 0 deletions(-) > create mode 100644 hw/ivshmem.c > > diff --git a/Makefile.target b/Makefile.target > index b68a689..3190bba 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -643,6 +643,9 @@ OBJS += pcnet.o > OBJS += rtl8139.o > OBJS += e1000.o > > +# Inter-VM PCI shared memory > +OBJS += ivshmem.o > + > # Generic watchdog support and some watchdog devices > OBJS += watchdog.o > OBJS += wdt_ib700.o wdt_i6300esb.o > diff --git a/hw/ivshmem.c b/hw/ivshmem.c > new file mode 100644 > index 0000000..95e2268 > --- /dev/null > +++ b/hw/ivshmem.c > @@ -0,0 +1,421 @@ > +/* > + * Inter-VM Shared Memory PCI device. > + * > + * Author: > + * Cam Macdonell <c...@xxxxxxxxxxxxxx> > + * > + * Based On: cirrus_vga.c and rtl8139.c > + * > + * This code is licensed under the GNU GPL v2. > + */ > + > +#include "hw.h" > +#include "console.h" > +#include "pc.h" > +#include "pci.h" > +#include "sysemu.h" > + > +#include "qemu-common.h" > +#include <sys/mman.h> > + > +#define PCI_COMMAND_IOACCESS 0x0001 > +#define PCI_COMMAND_MEMACCESS 0x0002 > +#define PCI_COMMAND_BUSMASTER 0x0004 > + > +//#define DEBUG_IVSHMEM > + > +#ifdef DEBUG_IVSHMEM > +#define IVSHMEM_DPRINTF(fmt, args...) \ > + do {printf("IVSHMEM: " fmt, ##args); } while (0) > +#else > +#define IVSHMEM_DPRINTF(fmt, args...) > +#endif > + > +typedef struct IVShmemState { > + uint16_t intrmask; > + uint16_t intrstatus; > + uint16_t doorbell; > + uint8_t *ivshmem_ptr; > + unsigned long ivshmem_offset; > + unsigned int ivshmem_size; > + unsigned long bios_offset; > + unsigned int bios_size; > + target_phys_addr_t base_ctrl; > + int it_shift; > + PCIDevice *pci_dev; > + CharDriverState * chr; > + unsigned long map_addr; > + unsigned long map_end; > + int ivshmem_mmio_io_addr; > +} IVShmemState; > + > +typedef struct PCI_IVShmemState { > + PCIDevice dev; > + IVShmemState ivshmem_state; > +} PCI_IVShmemState; > + > +typedef struct IVShmemDesc { > + char name[1024]; > + char * chrdev; > + int size; > +} IVShmemDesc; > + > + > +/* registers for the Inter-VM shared memory device */ > +enum ivshmem_registers { > + IntrMask = 0, > + IntrStatus = 16, > + Doorbell = 32 > +}; > + > +static int num_ivshmem_devices = 0; > +static IVShmemDesc ivshmem_desc; > + > +static void ivshmem_map(PCIDevice *pci_dev, int region_num, > + uint32_t addr, uint32_t size, int type) > +{ > + PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev; > + IVShmemState *s = &d->ivshmem_state; > + > + IVSHMEM_DPRINTF("addr = %u size = %u\n", addr, size); > + cpu_register_physical_memory(addr, s->ivshmem_size, s- > >ivshmem_offset); > + > +} > + > +void ivshmem_init(const char * optarg) { > + > + char * temp; > + char * ivshmem_sz; > + int size; > + > + num_ivshmem_devices++; > + > + /* currently we only support 1 device */ > + if (num_ivshmem_devices > MAX_IVSHMEM_DEVICES) { > + return; > + } > + > + temp = strdup(optarg); > + snprintf(ivshmem_desc.name, 1024, "/%s", strsep(&temp,",")); > + ivshmem_sz=strsep(&temp,","); > + if (ivshmem_sz != NULL){ > + size = atol(ivshmem_sz); > + } else { > + size = -1; > + } > + > + ivshmem_desc.chrdev = strsep(&temp,"\0"); > + > + if ( size == -1) { > + ivshmem_desc.size = TARGET_PAGE_SIZE; > + } else { > + ivshmem_desc.size = size*1024*1024; > + } > + IVSHMEM_DPRINTF("optarg is %s, name is %s, size is %d, chrdev > is %s\n", > + optarg, ivshmem_desc.name, > + ivshmem_desc.size, > ivshmem_desc.chrdev); > +} > + > +int ivshmem_get_size(void) { > + return ivshmem_desc.size; > +} > + > +/* accessing registers - based on rtl8139 */ > +static void ivshmem_update_irq(IVShmemState *s) > +{ > + int isr; > + isr = (s->intrstatus & s->intrmask) & 0xffff; > + > + /* don't print ISR resets */ > + if (isr) { > + IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", > + isr ? 1 : 0, s->intrstatus, s->intrmask); > + } > + > + qemu_set_irq(s->pci_dev->irq[0], (isr != 0)); > +} > + > +static void ivshmem_mmio_map(PCIDevice *pci_dev, int region_num, > + uint32_t addr, uint32_t size, int type) > +{ > + PCI_IVShmemState *d = (PCI_IVShmemState *)pci_dev; > + IVShmemState *s = &d->ivshmem_state; > + > + cpu_register_physical_memory(addr + 0, 0x100, s- > >ivshmem_mmio_io_addr); > +} > + > +static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) > +{ > + IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); > + > + s->intrmask = val; > + > + ivshmem_update_irq(s); > +} > + > +static uint32_t ivshmem_IntrMask_read(IVShmemState *s) > +{ > + uint32_t ret = s->intrmask; > + > + IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); > + > + return ret; > +} > + > +static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) > +{ > + IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); > + > + s->intrstatus = val; > + > + ivshmem_update_irq(s); > + return; > +} > + > +static uint32_t ivshmem_IntrStatus_read(IVShmemState *s) > +{ > + uint32_t ret = s->intrstatus; > + > + /* reading ISR clears all interrupts */ > + s->intrstatus = 0; > + > + ivshmem_update_irq(s); > + > + return ret; > +} > + > +static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t > val) > +{ > + IVShmemState *s = opaque; > + > + IVSHMEM_DPRINTF("writing 0x%x to 0x%lx\n", addr, (unsigned > long) opaque); > + > + addr &= 0xfe; > + > + switch (addr) > + { > + case IntrMask: > + ivshmem_IntrMask_write(s, val); > + break; > + > + case IntrStatus: > + ivshmem_IntrStatus_write(s, val); > + break; > + > + default: > + IVSHMEM_DPRINTF("why are we writing 0x%x\n", addr); > + } > +} > + > +static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t > val) > +{ > + IVSHMEM_DPRINTF("We shouldn't be writing longs\n"); > +} > + > +static void ivshmem_io_writeb(void *opaque, uint8_t addr, uint32_t > val) > +{ > + IVShmemState *s = opaque; > + uint8_t writebyte = val & 0xff; //write the lower 8-bits of 'val' > + > + switch (addr) > + { // in future, we will probably want to support more types > of doorbells > + case Doorbell: > + // wake up the other side > + qemu_chr_write(s->chr, &writebyte, 1); > + IVSHMEM_DPRINTF("Writing to the other side 0x%x\n", > writebyte); > + break; > + default: > + IVSHMEM_DPRINTF("Unhandled write (0x%x)\n", addr); > + } > +} > + > +static uint32_t ivshmem_io_readw(void *opaque, uint8_t addr) > +{ > + > + IVShmemState *s = opaque; > + uint32_t ret; > + > + switch (addr) > + { > + case IntrMask: > + ret = ivshmem_IntrMask_read(s); > + break; > + case IntrStatus: > + ret = ivshmem_IntrStatus_read(s); > + break; > + default: > + IVSHMEM_DPRINTF("why are we reading 0x%x\n", addr); > + ret = 0; > + } > + > + return ret; > +} > + > +static uint32_t ivshmem_io_readl(void *opaque, uint8_t addr) > +{ > + IVSHMEM_DPRINTF("We shouldn't be reading longs\n"); > + return 0; > +} > + > +static uint32_t ivshmem_io_readb(void *opaque, uint8_t addr) > +{ > + IVSHMEM_DPRINTF("We shouldn't be reading bytes\n"); > + > + return 0; > +} > + > +static void ivshmem_mmio_writeb(void *opaque, > + target_phys_addr_t addr, uint32_t > val) > +{ > + ivshmem_io_writeb(opaque, addr & 0xFF, val); > +} > + > +static void ivshmem_mmio_writew(void *opaque, > + target_phys_addr_t addr, uint32_t > val) > +{ > + ivshmem_io_writew(opaque, addr & 0xFF, val); > +} > + > +static void ivshmem_mmio_writel(void *opaque, > + target_phys_addr_t addr, uint32_t > val) > +{ > + ivshmem_io_writel(opaque, addr & 0xFF, val); > +} > + > +static uint32_t ivshmem_mmio_readb(void *opaque, target_phys_addr_t > addr) > +{ > + return ivshmem_io_readb(opaque, addr & 0xFF); > +} > + > +static uint32_t ivshmem_mmio_readw(void *opaque, target_phys_addr_t > addr) > +{ > + uint32_t val = ivshmem_io_readw(opaque, addr & 0xFF); > + return val; > +} > + > +static uint32_t ivshmem_mmio_readl(void *opaque, target_phys_addr_t > addr) > +{ > + uint32_t val = ivshmem_io_readl(opaque, addr & 0xFF); > + return val; > +} > + > +static CPUReadMemoryFunc *ivshmem_mmio_read[3] = { > + ivshmem_mmio_readb, > + ivshmem_mmio_readw, > + ivshmem_mmio_readl, > +}; > + > +static CPUWriteMemoryFunc *ivshmem_mmio_write[3] = { > + ivshmem_mmio_writeb, > + ivshmem_mmio_writew, > + ivshmem_mmio_writel, > +}; > + > +static int ivshmem_can_receive(void * opaque) > +{ > + return 1; > +} > + > +static void ivshmem_receive(void *opaque, const uint8_t *buf, int > size) > +{ > + IVShmemState *s = opaque; > + > + ivshmem_IntrStatus_write(s, *buf); > + > + IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf); > +} > + > +static void ivshmem_event(void *opaque, int event) > +{ > + IVShmemState *s = opaque; > + IVSHMEM_DPRINTF("ivshmem_event %d\n", event); > +} > + > +int pci_ivshmem_init(PCIBus *bus) > +{ > + PCI_IVShmemState *d; > + IVShmemState *s; > + uint8_t *pci_conf; > + int ivshmem_fd; > + > + IVSHMEM_DPRINTF("shared file is %s\n", ivshmem_desc.name); > + d = (PCI_IVShmemState *)pci_register_device(bus, "kvm_ivshmem", > + sizeof(PCI_IVShmemState), > + -1, NULL, NULL); > + if (!d) { > + return -1; > + } > + > + s = &d->ivshmem_state; > + > + /* allocate shared memory RAM */ > + s->ivshmem_offset = qemu_ram_alloc(ivshmem_desc.size); > + IVSHMEM_DPRINTF("size is = %d\n", ivshmem_desc.size); > + IVSHMEM_DPRINTF("ivshmem ram offset = %ld\n", s->ivshmem_offset); > + > + s->ivshmem_ptr = qemu_get_ram_ptr(s->ivshmem_offset); > + > + s->pci_dev = &d->dev; > + s->ivshmem_size = ivshmem_desc.size; > + > + pci_conf = d->dev.config; > + pci_conf[0x00] = 0xf4; // Qumranet vendor ID 0x5002 > + pci_conf[0x01] = 0x1a; > + pci_conf[0x02] = 0x10; > + pci_conf[0x03] = 0x11; > + pci_conf[0x04] = PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS; > + pci_conf[0x0a] = 0x00; // RAM controller > + pci_conf[0x0b] = 0x05; > + pci_conf[0x0e] = 0x00; // header_type > + > + pci_conf[PCI_INTERRUPT_PIN] = 1; // we are going to support > interrupts > + > + /* XXX: ivshmem_desc.size must be a power of two */ > + > + s->ivshmem_mmio_io_addr = cpu_register_io_memory(0, > ivshmem_mmio_read, > + ivshmem_mmio_write, s); > + > + /* region for registers*/ > + pci_register_io_region(&d->dev, 0, 0x100, > + PCI_ADDRESS_SPACE_MEM, ivshmem_mmio_map); > + > + /* region for shared memory */ > + pci_register_io_region(&d->dev, 1, ivshmem_desc.size, > + PCI_ADDRESS_SPACE_MEM, ivshmem_map); > + > + /* open shared memory file */ > + if ((ivshmem_fd = shm_open(ivshmem_desc.name, O_CREAT|O_RDWR, > S_IRWXU)) < > 0) > + { > + fprintf(stderr, "kvm_ivshmem: could not open shared file\n"); > + exit(-1); > + } > + > + ftruncate(ivshmem_fd, ivshmem_desc.size); > + > + /* mmap onto PCI device's memory */ > + if (mmap(s->ivshmem_ptr, ivshmem_desc.size, PROT_READ|PROT_WRITE, > + MAP_SHARED|MAP_FIXED, ivshmem_fd, 0) == > MAP_FAILED) > + { > + fprintf(stderr, "kvm_ivshmem: could not mmap shared file\n"); > + exit(-1); > + } > + > + IVSHMEM_DPRINTF("shared object mapped to 0x%p\n", s- > >ivshmem_ptr); > + > + /* setup character device channel */ > + > + if (ivshmem_desc.chrdev != NULL) { > + char label[32]; > + snprintf(label, 32, "ivshmem_chardev"); > + s->chr = qemu_chr_open(label, ivshmem_desc.chrdev, NULL); > + if (s->chr == NULL) { > + fprintf(stderr, "No server listening on %s\n", > ivshmem_desc.chrdev); > + exit(-1); > + } > + qemu_chr_add_handlers(s->chr, ivshmem_can_receive, > ivshmem_receive, > + ivshmem_event, s); > + } > + > + return 0; > +} > + > diff --git a/hw/pc.c b/hw/pc.c > index 34a4d25..7d0cff2 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -67,6 +67,8 @@ static PITState *pit; > static IOAPICState *ioapic; > static PCIDevice *i440fx_state; > > +extern int ivshmem_enabled; > + > static void ioport80_write(void *opaque, uint32_t addr, uint32_t data) > { > } > @@ -1040,6 +1042,10 @@ static void pc_init1(ram_addr_t ram_size, int > vga_ram_size, > } > } > > + if (pci_enabled && ivshmem_enabled) { > + pci_ivshmem_init(pci_bus); > + } > + > rtc_state = rtc_init(0x70, i8259[8], 2000); > > qemu_register_boot_set(pc_boot_set, rtc_state); > diff --git a/hw/pc.h b/hw/pc.h > index 885c918..0ae0493 100644 > --- a/hw/pc.h > +++ b/hw/pc.h > @@ -185,4 +185,7 @@ void isa_ne2000_init(int base, qemu_irq irq, > NICInfo *nd); > > void extboot_init(BlockDriverState *bs, int cmd); > > +/* ivshmem.c */ > +int pci_ivshmem_init(PCIBus *bus); > + > #endif > diff --git a/qemu-options.hx b/qemu-options.hx > index 173f458..9ab3e2d 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -1243,6 +1243,20 @@ The default device is @code{vc} in graphical > mode and > @code{stdio} in > non graphical mode. > ETEXI > > +DEF("ivshmem", HAS_ARG, QEMU_OPTION_ivshmem, \ > + "-ivshmem name,size[,unix:path][,server] creates or opens a > shared file > 'name' of size \ > + 'size' (in MB) and exposes it as a PCI device in the guest\n") > +STEXI > +...@item -ivshmem @var{file},@var{size} > +Creates a POSIX shared file named @var{file} of size @var{size} and > creates a > +PCI device of the same size that maps the shared file into the > device for > guests > +to access. The created file on the host is located in /dev/shm/ > + > +...@item unix:@var{path}[,server] > +A unix domain socket is used to send and receive interrupts between > VMs. The > unix domain socket > +...@var{path} is used for connections. > +ETEXI > + > DEF("pidfile", HAS_ARG, QEMU_OPTION_pidfile, \ > "-pidfile file write PID to 'file'\n") > STEXI > diff --git a/sysemu.h b/sysemu.h > index 1f45fd6..862b79e 100644 > --- a/sysemu.h > +++ b/sysemu.h > @@ -217,6 +217,14 @@ extern CharDriverState > *parallel_hds[MAX_PARALLEL_PORTS]; > > extern CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES]; > > +/* inter-VM shared memory devices */ > + > +#define MAX_IVSHMEM_DEVICES 1 > + > +extern CharDriverState * ivshmem_chardev; > +void ivshmem_init(const char * optarg); > +int ivshmem_get_size(void); > + > #define TFR(expr) do { if ((expr) != -1) break; } while (errno == > EINTR) > > #ifdef NEED_CPU_H > diff --git a/vl.c b/vl.c > index 0420634..7260fa1 100644 > --- a/vl.c > +++ b/vl.c > @@ -221,6 +221,7 @@ static int rtc_date_offset = -1; /* -1 means no > change */ > int cirrus_vga_enabled = 1; > int std_vga_enabled = 0; > int vmsvga_enabled = 0; > +int ivshmem_enabled = 0; > int xenfb_enabled = 0; > #ifdef TARGET_SPARC > int graphic_width = 1024; > @@ -239,6 +240,8 @@ int no_quit = 0; > CharDriverState *serial_hds[MAX_SERIAL_PORTS]; > CharDriverState *parallel_hds[MAX_PARALLEL_PORTS]; > CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES]; > +CharDriverState *ivshmem_chardev; > +const char * ivshmem_device; > #ifdef TARGET_I386 > int win2k_install_hack = 0; > int rtc_td_hack = 0; > @@ -5063,6 +5066,8 @@ int main(int argc, char **argv, char **envp) > cyls = heads = secs = 0; > translation = BIOS_ATA_TRANSLATION_AUTO; > monitor_device = "vc:80Cx24C"; > + ivshmem_device = NULL; > + ivshmem_chardev = NULL; > > serial_devices[0] = "vc:80Cx24C"; > for(i = 1; i < MAX_SERIAL_PORTS; i++) > @@ -5518,6 +5523,10 @@ int main(int argc, char **argv, char **envp) > parallel_devices[parallel_device_index] = optarg; > parallel_device_index++; > break; > + case QEMU_OPTION_ivshmem: > + ivshmem_device = optarg; > + ivshmem_enabled = 1; > + break; > case QEMU_OPTION_loadvm: > loadvm = optarg; > break; > @@ -5984,6 +5993,11 @@ int main(int argc, char **argv, char **envp) > } > } > > + if (ivshmem_enabled) { > + ivshmem_init(ivshmem_device); > + ram_size += ivshmem_get_size(); > + } > + > #ifdef CONFIG_KQEMU > /* FIXME: This is a nasty hack because kqemu can't cope with > dynamic > guest ram allocation. It needs to go away. */ > Thx, > > Venkat ----------------------------------------------- A. Cameron Macdonell Ph.D. Student Department of Computing Science University of Alberta cam@xxxxxxxxxxxxxx -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html