On 04/21/2010 08:53 PM, Cam Macdonell wrote:
Support an inter-vm shared memory device that maps a shared-memory object as a PCI device in the guest. This patch also supports interrupts between guest by communicating over a unix domain socket. This patch applies to the qemu-kvm repository. -device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>] Interrupts are supported between multiple VMs by using a shared memory server by using a chardev socket. -device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>] [,chardev=<id>][,msi=on][,irqfd=on][,vectors=n] -chardev socket,path=<path>,id=<id> (shared memory server is qemu.git/contrib/ivshmem-server) Sample programs and init scripts are in a git repo here: +typedef struct EventfdEntry { + PCIDevice *pdev; + int vector; +} EventfdEntry; + +typedef struct IVShmemState { + PCIDevice dev; + uint32_t intrmask; + uint32_t intrstatus; + uint32_t doorbell; + + CharDriverState * chr; + CharDriverState ** eventfd_chr; + int ivshmem_mmio_io_addr; + + pcibus_t mmio_addr; + unsigned long ivshmem_offset; + uint64_t ivshmem_size; /* size of shared memory region */ + int shm_fd; /* shared memory file descriptor */ + + int nr_allocated_vms; + /* array of eventfds for each guest */ + int ** eventfds; + /* keep track of # of eventfds for each guest*/ + int * eventfds_posn_count;
More readable: typedef struct Peer { int nb_eventfds; int *eventfds; } Peer; int nb_peers; Peer *peers; Does eventfd_chr need to be there as well?
+ + int nr_alloc_guests; + int vm_id; + int num_eventfds; + uint32_t vectors; + uint32_t features; + EventfdEntry *eventfd_table; + + char * shmobj; + char * sizearg;
Does this need to be part of the state?
+} IVShmemState; + +/* registers for the Inter-VM shared memory device */ +enum ivshmem_registers { + IntrMask = 0, + IntrStatus = 4, + IVPosition = 8, + Doorbell = 12, +}; + +static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, int feature) { + return (ivs->features& (1<< feature)); +} + +static inline int is_power_of_two(int x) { + return (x& (x-1)) == 0; +}
argument needs to be uint64_t to avoid overflow with large BARs. Return type can be bool.
+static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t val) +{ + IVShmemState *s = opaque; + + u_int64_t write_one = 1; + u_int16_t dest = val>> 16; + u_int16_t vector = val& 0xff; + + addr&= 0xfe;
Why 0xfe? Can understand 0xfc or 0xff.
+ + switch (addr) + { + case IntrMask: + ivshmem_IntrMask_write(s, val); + break; + + case IntrStatus: + ivshmem_IntrStatus_write(s, val); + break; + + case Doorbell: + /* check doorbell range */ + if ((vector>= 0)&& (vector< s->eventfds_posn_count[dest])) {
What if dest is too big? We overflow s->eventfds_posn_count.
+ +static void close_guest_eventfds(IVShmemState *s, int posn) +{ + int i, guest_curr_max; + + guest_curr_max = s->eventfds_posn_count[posn]; + + for (i = 0; i< guest_curr_max; i++) + close(s->eventfds[posn][i]); + + free(s->eventfds[posn]);
qemu_free().
+/* this function increase the dynamic storage need to store data about other + * guests */ +static void increase_dynamic_storage(IVShmemState *s, int new_min_size) { + + int j, old_nr_alloc; + + old_nr_alloc = s->nr_alloc_guests; + + while (s->nr_alloc_guests< new_min_size) + s->nr_alloc_guests = s->nr_alloc_guests * 2; + + IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nr_alloc_guests); + s->eventfds = qemu_realloc(s->eventfds, s->nr_alloc_guests * + sizeof(int *)); + s->eventfds_posn_count = qemu_realloc(s->eventfds_posn_count, + s->nr_alloc_guests * + sizeof(int)); + s->eventfd_table = qemu_realloc(s->eventfd_table, s->nr_alloc_guests * + sizeof(EventfdEntry)); + + if ((s->eventfds == NULL) || (s->eventfds_posn_count == NULL) || + (s->eventfd_table == NULL)) { + fprintf(stderr, "Allocation error - exiting\n"); + exit(1); + } + + if (!ivshmem_has_feature(s, IVSHMEM_IRQFD)) { + s->eventfd_chr = (CharDriverState **)qemu_realloc(s->eventfd_chr, + s->nr_alloc_guests * sizeof(void *)); + if (s->eventfd_chr == NULL) { + fprintf(stderr, "Allocation error - exiting\n"); + exit(1); + } + } + + /* zero out new pointers */ + for (j = old_nr_alloc; j< s->nr_alloc_guests; j++) { + s->eventfds[j] = NULL;
eventfds_posn_count and eventfd_table want zeroing as well.
+ } +} + +static void ivshmem_read(void *opaque, const uint8_t * buf, int flags) +{ + IVShmemState *s = opaque; + int incoming_fd, tmp_fd; + int guest_curr_max; + long incoming_posn; + + memcpy(&incoming_posn, buf, sizeof(long)); + /* pick off s->chr->msgfd and store it, posn should accompany msg */ + tmp_fd = qemu_chr_get_msgfd(s->chr); + IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd); + + /* make sure we have enough space for this guest */ + if (incoming_posn>= s->nr_alloc_guests) { + increase_dynamic_storage(s, incoming_posn); + } + + if (tmp_fd == -1) { + /* if posn is positive and unseen before then this is our posn*/ + if ((incoming_posn>= 0)&& (s->eventfds[incoming_posn] == NULL)) { + /* receive our posn */ + s->vm_id = incoming_posn; + return; + } else { + /* otherwise an fd == -1 means an existing guest has gone away */ + IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn); + close_guest_eventfds(s, incoming_posn); + return; + } + } + + /* because of the implementation of get_msgfd, we need a dup */ + incoming_fd = dup(tmp_fd);
Error check.
+ + /* if the position is -1, then it's shared memory region fd */ + if (incoming_posn == -1) { + + s->num_eventfds = 0; + + if (check_shm_size(s, incoming_fd) == -1) { + exit(-1); + } + + /* creating a BAR in qemu_chr callback may be crazy */ + create_shared_memory_BAR(s, incoming_fd);
It probably is... why can't you create it during initialization?
+ + return; + } + + /* each guest has an array of eventfds, and we keep track of how many + * guests for each VM */ + guest_curr_max = s->eventfds_posn_count[incoming_posn]; + if (guest_curr_max == 0) { + /* one eventfd per MSI vector */ + s->eventfds[incoming_posn] = (int *) qemu_malloc(s->vectors * + sizeof(int)); + } + + /* this is an eventfd for a particular guest VM */ + IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn, guest_curr_max, + incoming_fd); + s->eventfds[incoming_posn][guest_curr_max] = incoming_fd; + + /* increment count for particular guest */ + s->eventfds_posn_count[incoming_posn]++;
Not sure I follow exactly, but perhaps this needs to be s->eventfds_posn_count[incoming_posn] = guest_curr_max + 1; Oh, it is.
+ + /* allocate/initialize space for interrupt handling */ + s->eventfds = qemu_mallocz(s->nr_alloc_guests * sizeof(int *)); + s->eventfd_table = qemu_mallocz(s->vectors * sizeof(EventfdEntry)); + s->eventfds_posn_count = qemu_mallocz(s->nr_alloc_guests * sizeof(int)); + + pci_conf[PCI_INTERRUPT_PIN] = 1; /* we are going to support interrupts */
This is done by the guest BIOS. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html