On Mon, May 10, 2010 at 5:59 AM, Avi Kivity <avi@xxxxxxxxxx> wrote: > On 04/21/2010 08:53 PM, Cam Macdonell wrote: >> >> Support an inter-vm shared memory device that maps a shared-memory object >> as a >> PCI device in the guest. This patch also supports interrupts between >> guest by >> communicating over a unix domain socket. This patch applies to the >> qemu-kvm >> repository. >> >> -device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>] >> >> Interrupts are supported between multiple VMs by using a shared memory >> server >> by using a chardev socket. >> >> -device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>] >> [,chardev=<id>][,msi=on][,irqfd=on][,vectors=n] >> -chardev socket,path=<path>,id=<id> >> >> (shared memory server is qemu.git/contrib/ivshmem-server) >> >> Sample programs and init scripts are in a git repo here: >> >> >> +typedef struct EventfdEntry { >> + PCIDevice *pdev; >> + int vector; >> +} EventfdEntry; >> + >> +typedef struct IVShmemState { >> + PCIDevice dev; >> + uint32_t intrmask; >> + uint32_t intrstatus; >> + uint32_t doorbell; >> + >> + CharDriverState * chr; >> + CharDriverState ** eventfd_chr; >> + int ivshmem_mmio_io_addr; >> + >> + pcibus_t mmio_addr; >> + unsigned long ivshmem_offset; >> + uint64_t ivshmem_size; /* size of shared memory region */ >> + int shm_fd; /* shared memory file descriptor */ >> + >> + int nr_allocated_vms; >> + /* array of eventfds for each guest */ >> + int ** eventfds; >> + /* keep track of # of eventfds for each guest*/ >> + int * eventfds_posn_count; >> > > More readable: > > typedef struct Peer { > int nb_eventfds; > int *eventfds; > } Peer; > int nb_peers; > Peer *peers; > > Does eventfd_chr need to be there as well? > >> + >> + int nr_alloc_guests; >> + int vm_id; >> + int num_eventfds; >> + uint32_t vectors; >> + uint32_t features; >> + EventfdEntry *eventfd_table; >> + >> + char * shmobj; >> + char * sizearg; >> > > Does this need to be part of the state? > >> +} IVShmemState; >> + >> +/* registers for the Inter-VM shared memory device */ >> +enum ivshmem_registers { >> + IntrMask = 0, >> + IntrStatus = 4, >> + IVPosition = 8, >> + Doorbell = 12, >> +}; >> + >> +static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, int >> feature) { >> + return (ivs->features& (1<< feature)); >> +} >> + >> +static inline int is_power_of_two(int x) { >> + return (x& (x-1)) == 0; >> +} >> > > argument needs to be uint64_t to avoid overflow with large BARs. Return > type can be bool. > >> +static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t val) >> +{ >> + IVShmemState *s = opaque; >> + >> + u_int64_t write_one = 1; >> + u_int16_t dest = val>> 16; >> + u_int16_t vector = val& 0xff; >> + >> + addr&= 0xfe; >> > > Why 0xfe? Can understand 0xfc or 0xff. > >> + >> + switch (addr) >> + { >> + case IntrMask: >> + ivshmem_IntrMask_write(s, val); >> + break; >> + >> + case IntrStatus: >> + ivshmem_IntrStatus_write(s, val); >> + break; >> + >> + case Doorbell: >> + /* check doorbell range */ >> + if ((vector>= 0)&& (vector< s->eventfds_posn_count[dest])) >> { >> > > What if dest is too big? We overflow s->eventfds_posn_count. >> >> + >> +static void close_guest_eventfds(IVShmemState *s, int posn) >> +{ >> + int i, guest_curr_max; >> + >> + guest_curr_max = s->eventfds_posn_count[posn]; >> + >> + for (i = 0; i< guest_curr_max; i++) >> + close(s->eventfds[posn][i]); >> + >> + free(s->eventfds[posn]); >> > > qemu_free(). > >> +/* this function increase the dynamic storage need to store data about >> other >> + * guests */ >> +static void increase_dynamic_storage(IVShmemState *s, int new_min_size) { >> + >> + int j, old_nr_alloc; >> + >> + old_nr_alloc = s->nr_alloc_guests; >> + >> + while (s->nr_alloc_guests< new_min_size) >> + s->nr_alloc_guests = s->nr_alloc_guests * 2; >> + >> + IVSHMEM_DPRINTF("bumping storage to %d guests\n", >> s->nr_alloc_guests); >> + s->eventfds = qemu_realloc(s->eventfds, s->nr_alloc_guests * >> + sizeof(int *)); >> + s->eventfds_posn_count = qemu_realloc(s->eventfds_posn_count, >> + s->nr_alloc_guests * >> + sizeof(int)); >> + s->eventfd_table = qemu_realloc(s->eventfd_table, s->nr_alloc_guests >> * >> + >> sizeof(EventfdEntry)); >> + >> + if ((s->eventfds == NULL) || (s->eventfds_posn_count == NULL) || >> + (s->eventfd_table == NULL)) { >> + fprintf(stderr, "Allocation error - exiting\n"); >> + exit(1); >> + } >> + >> + if (!ivshmem_has_feature(s, IVSHMEM_IRQFD)) { >> + s->eventfd_chr = (CharDriverState **)qemu_realloc(s->eventfd_chr, >> + s->nr_alloc_guests * sizeof(void *)); >> + if (s->eventfd_chr == NULL) { >> + fprintf(stderr, "Allocation error - exiting\n"); >> + exit(1); >> + } >> + } >> + >> + /* zero out new pointers */ >> + for (j = old_nr_alloc; j< s->nr_alloc_guests; j++) { >> + s->eventfds[j] = NULL; >> > > eventfds_posn_count and eventfd_table want zeroing as well. > >> + } >> +} >> + >> +static void ivshmem_read(void *opaque, const uint8_t * buf, int flags) >> +{ >> + IVShmemState *s = opaque; >> + int incoming_fd, tmp_fd; >> + int guest_curr_max; >> + long incoming_posn; >> + >> + memcpy(&incoming_posn, buf, sizeof(long)); >> + /* pick off s->chr->msgfd and store it, posn should accompany msg */ >> + tmp_fd = qemu_chr_get_msgfd(s->chr); >> + IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd); >> + >> + /* make sure we have enough space for this guest */ >> + if (incoming_posn>= s->nr_alloc_guests) { >> + increase_dynamic_storage(s, incoming_posn); >> + } >> + >> + if (tmp_fd == -1) { >> + /* if posn is positive and unseen before then this is our posn*/ >> + if ((incoming_posn>= 0)&& (s->eventfds[incoming_posn] == NULL)) >> { >> + /* receive our posn */ >> + s->vm_id = incoming_posn; >> + return; >> + } else { >> + /* otherwise an fd == -1 means an existing guest has gone >> away */ >> + IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn); >> + close_guest_eventfds(s, incoming_posn); >> + return; >> + } >> + } >> + >> + /* because of the implementation of get_msgfd, we need a dup */ >> + incoming_fd = dup(tmp_fd); >> > > Error check. > >> + >> + /* if the position is -1, then it's shared memory region fd */ >> + if (incoming_posn == -1) { >> + >> + s->num_eventfds = 0; >> + >> + if (check_shm_size(s, incoming_fd) == -1) { >> + exit(-1); >> + } >> + >> + /* creating a BAR in qemu_chr callback may be crazy */ >> + create_shared_memory_BAR(s, incoming_fd); >> > > It probably is... why can't you create it during initialization? This is for the shared memory server implementation, so the fd for the shared memory has to be received (over the qemu char device) from the server before the BAR can be created via qemu_ram_mmap() which adds the necessary memory Otherwise, if the BAR is allocated during initialization, I would have to use MAP_FIXED to mmap the memory. This is what I did before the qemu_ram_mmap() function was added. > > >> + >> + return; >> + } >> + >> + /* each guest has an array of eventfds, and we keep track of how many >> + * guests for each VM */ >> + guest_curr_max = s->eventfds_posn_count[incoming_posn]; >> + if (guest_curr_max == 0) { >> + /* one eventfd per MSI vector */ >> + s->eventfds[incoming_posn] = (int *) qemu_malloc(s->vectors * >> + >> sizeof(int)); >> + } >> + >> + /* this is an eventfd for a particular guest VM */ >> + IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn, >> guest_curr_max, >> + >> incoming_fd); >> + s->eventfds[incoming_posn][guest_curr_max] = incoming_fd; >> + >> + /* increment count for particular guest */ >> + s->eventfds_posn_count[incoming_posn]++; >> > > Not sure I follow exactly, but perhaps this needs to be > > s->eventfds_posn_count[incoming_posn] = guest_curr_max + 1; > > Oh, it is. > >> + >> + /* allocate/initialize space for interrupt handling */ >> + s->eventfds = qemu_mallocz(s->nr_alloc_guests * sizeof(int *)); >> + s->eventfd_table = qemu_mallocz(s->vectors * >> sizeof(EventfdEntry)); >> + s->eventfds_posn_count = qemu_mallocz(s->nr_alloc_guests * >> sizeof(int)); >> + >> + pci_conf[PCI_INTERRUPT_PIN] = 1; /* we are going to support >> interrupts */ >> > > This is done by the guest BIOS. > > > -- > error compiling committee.c: too many arguments to function > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html