Re: [PATCH v5 4/5] Inter-VM shared memory PCI device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 04/21/2010 08:53 PM, Cam Macdonell wrote:
Support an inter-vm shared memory device that maps a shared-memory object as a
PCI device in the guest.  This patch also supports interrupts between guest by
communicating over a unix domain socket.  This patch applies to the qemu-kvm
repository.

     -device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>]

Interrupts are supported between multiple VMs by using a shared memory server
by using a chardev socket.

     -device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>]
                     [,chardev=<id>][,msi=on][,irqfd=on][,vectors=n]
     -chardev socket,path=<path>,id=<id>

(shared memory server is qemu.git/contrib/ivshmem-server)

Sample programs and init scripts are in a git repo here:


+typedef struct EventfdEntry {
+    PCIDevice *pdev;
+    int vector;
+} EventfdEntry;
+
+typedef struct IVShmemState {
+    PCIDevice dev;
+    uint32_t intrmask;
+    uint32_t intrstatus;
+    uint32_t doorbell;
+
+    CharDriverState * chr;
+    CharDriverState ** eventfd_chr;
+    int ivshmem_mmio_io_addr;
+
+    pcibus_t mmio_addr;
+    unsigned long ivshmem_offset;
+    uint64_t ivshmem_size; /* size of shared memory region */
+    int shm_fd; /* shared memory file descriptor */
+
+    int nr_allocated_vms;
+    /* array of eventfds for each guest */
+    int ** eventfds;
+    /* keep track of # of eventfds for each guest*/
+    int * eventfds_posn_count;

More readable:

  typedef struct Peer {
      int nb_eventfds;
      int *eventfds;
  } Peer;
  int nb_peers;
  Peer *peers;

Does eventfd_chr need to be there as well?

+
+    int nr_alloc_guests;
+    int vm_id;
+    int num_eventfds;
+    uint32_t vectors;
+    uint32_t features;
+    EventfdEntry *eventfd_table;
+
+    char * shmobj;
+    char * sizearg;

Does this need to be part of the state?

+} IVShmemState;
+
+/* registers for the Inter-VM shared memory device */
+enum ivshmem_registers {
+    IntrMask = 0,
+    IntrStatus = 4,
+    IVPosition = 8,
+    Doorbell = 12,
+};
+
+static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, int feature) {
+    return (ivs->features&  (1<<  feature));
+}
+
+static inline int is_power_of_two(int x) {
+    return (x&  (x-1)) == 0;
+}

argument needs to be uint64_t to avoid overflow with large BARs. Return type can be bool.

+static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t val)
+{
+    IVShmemState *s = opaque;
+
+    u_int64_t write_one = 1;
+    u_int16_t dest = val>>  16;
+    u_int16_t vector = val&  0xff;
+
+    addr&= 0xfe;

Why 0xfe?  Can understand 0xfc or 0xff.

+
+    switch (addr)
+    {
+        case IntrMask:
+            ivshmem_IntrMask_write(s, val);
+            break;
+
+        case IntrStatus:
+            ivshmem_IntrStatus_write(s, val);
+            break;
+
+        case Doorbell:
+            /* check doorbell range */
+            if ((vector>= 0)&&  (vector<  s->eventfds_posn_count[dest])) {

What if dest is too big?  We overflow s->eventfds_posn_count.
+
+static void close_guest_eventfds(IVShmemState *s, int posn)
+{
+    int i, guest_curr_max;
+
+    guest_curr_max = s->eventfds_posn_count[posn];
+
+    for (i = 0; i<  guest_curr_max; i++)
+        close(s->eventfds[posn][i]);
+
+    free(s->eventfds[posn]);

qemu_free().

+/* this function increase the dynamic storage need to store data about other
+ * guests */
+static void increase_dynamic_storage(IVShmemState *s, int new_min_size) {
+
+    int j, old_nr_alloc;
+
+    old_nr_alloc = s->nr_alloc_guests;
+
+    while (s->nr_alloc_guests<  new_min_size)
+        s->nr_alloc_guests = s->nr_alloc_guests * 2;
+
+    IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nr_alloc_guests);
+    s->eventfds = qemu_realloc(s->eventfds, s->nr_alloc_guests *
+                                                        sizeof(int *));
+    s->eventfds_posn_count = qemu_realloc(s->eventfds_posn_count,
+                                                    s->nr_alloc_guests *
+                                                        sizeof(int));
+    s->eventfd_table = qemu_realloc(s->eventfd_table, s->nr_alloc_guests *
+                                                    sizeof(EventfdEntry));
+
+    if ((s->eventfds == NULL) || (s->eventfds_posn_count == NULL) ||
+            (s->eventfd_table == NULL)) {
+        fprintf(stderr, "Allocation error - exiting\n");
+        exit(1);
+    }
+
+    if (!ivshmem_has_feature(s, IVSHMEM_IRQFD)) {
+        s->eventfd_chr = (CharDriverState **)qemu_realloc(s->eventfd_chr,
+                                    s->nr_alloc_guests * sizeof(void *));
+        if (s->eventfd_chr == NULL) {
+            fprintf(stderr, "Allocation error - exiting\n");
+            exit(1);
+        }
+    }
+
+    /* zero out new pointers */
+    for (j = old_nr_alloc; j<  s->nr_alloc_guests; j++) {
+        s->eventfds[j] = NULL;

eventfds_posn_count and eventfd_table want zeroing as well.

+    }
+}
+
+static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
+{
+    IVShmemState *s = opaque;
+    int incoming_fd, tmp_fd;
+    int guest_curr_max;
+    long incoming_posn;
+
+    memcpy(&incoming_posn, buf, sizeof(long));
+    /* pick off s->chr->msgfd and store it, posn should accompany msg */
+    tmp_fd = qemu_chr_get_msgfd(s->chr);
+    IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd);
+
+    /* make sure we have enough space for this guest */
+    if (incoming_posn>= s->nr_alloc_guests) {
+        increase_dynamic_storage(s, incoming_posn);
+    }
+
+    if (tmp_fd == -1) {
+        /* if posn is positive and unseen before then this is our posn*/
+        if ((incoming_posn>= 0)&&  (s->eventfds[incoming_posn] == NULL)) {
+            /* receive our posn */
+            s->vm_id = incoming_posn;
+            return;
+        } else {
+            /* otherwise an fd == -1 means an existing guest has gone away */
+            IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn);
+            close_guest_eventfds(s, incoming_posn);
+            return;
+        }
+    }
+
+    /* because of the implementation of get_msgfd, we need a dup */
+    incoming_fd = dup(tmp_fd);

Error check.

+
+    /* if the position is -1, then it's shared memory region fd */
+    if (incoming_posn == -1) {
+
+        s->num_eventfds = 0;
+
+        if (check_shm_size(s, incoming_fd) == -1) {
+            exit(-1);
+        }
+
+        /* creating a BAR in qemu_chr callback may be crazy */
+        create_shared_memory_BAR(s, incoming_fd);

It probably is... why can't you create it during initialization?


+
+       return;
+    }
+
+    /* each guest has an array of eventfds, and we keep track of how many
+     * guests for each VM */
+    guest_curr_max = s->eventfds_posn_count[incoming_posn];
+    if (guest_curr_max == 0) {
+        /* one eventfd per MSI vector */
+        s->eventfds[incoming_posn] = (int *) qemu_malloc(s->vectors *
+                                                                sizeof(int));
+    }
+
+    /* this is an eventfd for a particular guest VM */
+    IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn, guest_curr_max,
+                                                                incoming_fd);
+    s->eventfds[incoming_posn][guest_curr_max] = incoming_fd;
+
+    /* increment count for particular guest */
+    s->eventfds_posn_count[incoming_posn]++;

Not sure I follow exactly, but perhaps this needs to be

    s->eventfds_posn_count[incoming_posn] = guest_curr_max + 1;

Oh, it is.

+
+        /* allocate/initialize space for interrupt handling */
+        s->eventfds = qemu_mallocz(s->nr_alloc_guests * sizeof(int *));
+        s->eventfd_table = qemu_mallocz(s->vectors * sizeof(EventfdEntry));
+        s->eventfds_posn_count = qemu_mallocz(s->nr_alloc_guests * sizeof(int));
+
+        pci_conf[PCI_INTERRUPT_PIN] = 1; /* we are going to support interrupts */

This is done by the guest BIOS.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux