Re: [Qemu-devel] [PATCH v6 6/6] the stand-alone shared memory server for inter-VM shared memory

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 06/04/2010 04:45 PM, Cam Macdonell wrote:
this code is a standalone server which will pass file descriptors for the shared
memory region and eventfds to support interrupts between guests using inter-VM
shared memory.
---
  contrib/ivshmem-server/Makefile         |   16 ++
  contrib/ivshmem-server/README           |   30 +++
  contrib/ivshmem-server/ivshmem_server.c |  353 +++++++++++++++++++++++++++++++
  contrib/ivshmem-server/send_scm.c       |  208 ++++++++++++++++++
  contrib/ivshmem-server/send_scm.h       |   19 ++
  5 files changed, 626 insertions(+), 0 deletions(-)
  create mode 100644 contrib/ivshmem-server/Makefile
  create mode 100644 contrib/ivshmem-server/README
  create mode 100644 contrib/ivshmem-server/ivshmem_server.c
  create mode 100644 contrib/ivshmem-server/send_scm.c
  create mode 100644 contrib/ivshmem-server/send_scm.h

diff --git a/contrib/ivshmem-server/Makefile b/contrib/ivshmem-server/Makefile
new file mode 100644
index 0000000..da40ffa
--- /dev/null
+++ b/contrib/ivshmem-server/Makefile
@@ -0,0 +1,16 @@
+CC = gcc
+CFLAGS = -O3 -Wall -Werror
+LIBS = -lrt
+
+# a very simple makefile to build the inter-VM shared memory server
+
+all: ivshmem_server
+
+.c.o:
+	$(CC) $(CFLAGS) -c $^ -o $@
+
+ivshmem_server: ivshmem_server.o send_scm.o
+	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+clean:
+	rm -f *.o ivshmem_server
diff --git a/contrib/ivshmem-server/README b/contrib/ivshmem-server/README
new file mode 100644
index 0000000..b1fc2a2
--- /dev/null
+++ b/contrib/ivshmem-server/README
@@ -0,0 +1,30 @@
+Using the ivshmem shared memory server
+--------------------------------------
+
+This server is only supported on Linux.
+
+To use the shared memory server, first compile it.  Running 'make' should
+accomplish this.  An executable named 'ivshmem_server' will be built.
+
+to display the options run:
+
+./ivshmem_server -h
+
+Options
+-------
+
+    -h  print help message
+
+    -p<path on host>
+        unix socket to listen on.  The qemu-kvm chardev needs to connect on
+        this socket. (default: '/tmp/ivshmem_socket')
+
+    -s<string>
+        POSIX shared object to create that is the shared memory (default: 'ivshmem')
+
+    -m<#>
+        size of the POSIX object in MBs (default: 1)
+
+    -n<#>
+        number of eventfds for each guest.  This number must match the
+        'vectors' argument passed the ivshmem device. (default: 1)
diff --git a/contrib/ivshmem-server/ivshmem_server.c b/contrib/ivshmem-server/ivshmem_server.c
new file mode 100644
index 0000000..e0a7b98
--- /dev/null
+++ b/contrib/ivshmem-server/ivshmem_server.c

There's no licensing here. I don't think this belongs in the qemu tree either to be honest. If it were to be included, it ought to use all of the existing qemu infrastructure like the other qemu-* tools.

Regards,

Anthony Liguori

@@ -0,0 +1,353 @@
+/*
+ * A stand-alone shared memory server for inter-VM shared memory for KVM
+*/
+
+#include<errno.h>
+#include<string.h>
+#include<sys/types.h>
+#include<sys/socket.h>
+#include<sys/un.h>
+#include<unistd.h>
+#include<sys/types.h>
+#include<sys/stat.h>
+#include<fcntl.h>
+#include<sys/eventfd.h>
+#include<sys/mman.h>
+#include<sys/select.h>
+#include<stdio.h>
+#include<stdlib.h>
+#include "send_scm.h"
+
+#define DEFAULT_SOCK_PATH "/tmp/ivshmem_socket"
+#define DEFAULT_SHM_OBJ "ivshmem"
+
+#define DEBUG 1
+
+typedef struct server_state {
+    vmguest_t *live_vms;
+    int nr_allocated_vms;
+    int shm_size;
+    long live_count;
+    long total_count;
+    int shm_fd;
+    char * path;
+    char * shmobj;
+    int maxfd, conn_socket;
+    long msi_vectors;
+} server_state_t;
+
+void usage(char const *prg);
+int find_set(fd_set * readset, int max);
+void print_vec(server_state_t * s, const char * c);
+
+void add_new_guest(server_state_t * s);
+void parse_args(int argc, char **argv, server_state_t * s);
+int create_listening_socket(char * path);
+
+int main(int argc, char ** argv)
+{
+    fd_set readset;
+    server_state_t * s;
+
+    s = (server_state_t *)calloc(1, sizeof(server_state_t));
+
+    s->live_count = 0;
+    s->total_count = 0;
+    parse_args(argc, argv, s);
+
+    /* open shared memory file  */
+    if ((s->shm_fd = shm_open(s->shmobj, O_CREAT|O_RDWR, S_IRWXU))<  0)
+    {
+        fprintf(stderr, "kvm_ivshmem: could not open shared file\n");
+        exit(-1);
+    }
+
+    ftruncate(s->shm_fd, s->shm_size);
+
+    s->conn_socket = create_listening_socket(s->path);
+
+    s->maxfd = s->conn_socket;
+
+    for(;;) {
+        int ret, handle, i;
+        char buf[1024];
+
+        print_vec(s, "vm_sockets");
+
+        FD_ZERO(&readset);
+        /* conn socket is in Live_vms at posn 0 */
+        FD_SET(s->conn_socket,&readset);
+        for (i = 0; i<  s->total_count; i++) {
+            if (s->live_vms[i].alive != 0) {
+                FD_SET(s->live_vms[i].sockfd,&readset);
+            }
+        }
+
+        printf("\nWaiting (maxfd = %d)\n", s->maxfd);
+
+        ret = select(s->maxfd + 1,&readset, NULL, NULL, NULL);
+
+        if (ret == -1) {
+            perror("select()");
+        }
+
+        handle = find_set(&readset, s->maxfd + 1);
+        if (handle == -1) continue;
+
+        if (handle == s->conn_socket) {
+
+            printf("[NC] new connection\n");
+            FD_CLR(s->conn_socket,&readset);
+
+            /* The Total_count is equal to the new guests VM ID */
+            add_new_guest(s);
+
+            /* update our the maximum file descriptor number */
+            s->maxfd = s->live_vms[s->total_count - 1].sockfd>  s->maxfd ?
+                            s->live_vms[s->total_count - 1].sockfd : s->maxfd;
+
+            s->live_count++;
+            printf("Live_count is %ld\n", s->live_count);
+
+        } else {
+            /* then we have received a disconnection */
+            int recv_ret;
+            long i, j;
+            long deadposn = -1;
+
+            recv_ret = recv(handle, buf, 1, 0);
+
+            printf("[DC] recv returned %d\n", recv_ret);
+
+            /* find the dead VM in our list and move it do the dead list. */
+            for (i = 0; i<  s->total_count; i++) {
+                if (s->live_vms[i].sockfd == handle) {
+                    deadposn = i;
+                    s->live_vms[i].alive = 0;
+                    close(s->live_vms[i].sockfd);
+
+                    for (j = 0; j<  s->msi_vectors; j++) {
+                        close(s->live_vms[i].efd[j]);
+                    }
+
+                    free(s->live_vms[i].efd);
+                    s->live_vms[i].sockfd = -1;
+                    break;
+                }
+            }
+
+            for (j = 0; j<  s->total_count; j++) {
+                /* update remaining clients that one client has left/died */
+                if (s->live_vms[j].alive) {
+                    printf("[UD] sending kill of fd[%ld] to %ld\n",
+                                                                deadposn, j);
+                    sendKill(s->live_vms[j].sockfd, deadposn, sizeof(deadposn));
+                }
+            }
+
+            s->live_count--;
+
+            /* close the socket for the departed VM */
+            close(handle);
+        }
+
+    }
+
+    return 0;
+}
+
+void add_new_guest(server_state_t * s) {
+
+    struct sockaddr_un remote;
+    socklen_t t = sizeof(remote);
+    long i, j;
+    int vm_sock;
+    long new_posn;
+    long neg1 = -1;
+
+    vm_sock = accept(s->conn_socket, (struct sockaddr *)&remote,&t);
+
+    if ( vm_sock == -1 ) {
+        perror("accept");
+        exit(1);
+    }
+
+    new_posn = s->total_count;
+
+    if (new_posn == s->nr_allocated_vms) {
+        printf("increasing vm slots\n");
+        s->nr_allocated_vms = s->nr_allocated_vms * 2;
+        if (s->nr_allocated_vms<  16)
+            s->nr_allocated_vms = 16;
+        s->live_vms = realloc(s->live_vms,
+                    s->nr_allocated_vms * sizeof(vmguest_t));
+
+        if (s->live_vms == NULL) {
+            fprintf(stderr, "realloc failed - quitting\n");
+            exit(-1);
+        }
+    }
+
+    s->live_vms[new_posn].posn = new_posn;
+    printf("[NC] Live_vms[%ld]\n", new_posn);
+    s->live_vms[new_posn].efd = (int *) malloc(sizeof(int));
+    for (i = 0; i<  s->msi_vectors; i++) {
+        s->live_vms[new_posn].efd[i] = eventfd(0, 0);
+        printf("\tefd[%ld] = %d\n", i, s->live_vms[new_posn].efd[i]);
+    }
+    s->live_vms[new_posn].sockfd = vm_sock;
+    s->live_vms[new_posn].alive = 1;
+
+
+    sendPosition(vm_sock, new_posn);
+    sendUpdate(vm_sock, neg1, sizeof(long), s->shm_fd);
+    printf("[NC] trying to send fds to new connection\n");
+    sendRights(vm_sock, new_posn, sizeof(new_posn), s->live_vms, s->msi_vectors);
+
+    printf("[NC] Connected (count = %ld).\n", new_posn);
+    for (i = 0; i<  new_posn; i++) {
+        if (s->live_vms[i].alive) {
+            // ping all clients that a new client has joined
+            printf("[UD] sending fd[%ld] to %ld\n", new_posn, i);
+            for (j = 0; j<  s->msi_vectors; j++) {
+                printf("\tefd[%ld] = [%d]", j, s->live_vms[new_posn].efd[j]);
+                sendUpdate(s->live_vms[i].sockfd, new_posn,
+                        sizeof(new_posn), s->live_vms[new_posn].efd[j]);
+            }
+            printf("\n");
+        }
+    }
+
+    s->total_count++;
+}
+
+int create_listening_socket(char * path) {
+
+    struct sockaddr_un local;
+    int len, conn_socket;
+
+    if ((conn_socket = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+        perror("socket");
+        exit(1);
+    }
+
+    local.sun_family = AF_UNIX;
+    strcpy(local.sun_path, path);
+    unlink(local.sun_path);
+    len = strlen(local.sun_path) + sizeof(local.sun_family);
+    if (bind(conn_socket, (struct sockaddr *)&local, len) == -1) {
+        perror("bind");
+        exit(1);
+    }
+
+    if (listen(conn_socket, 5) == -1) {
+        perror("listen");
+        exit(1);
+    }
+
+    return conn_socket;
+
+}
+
+void parse_args(int argc, char **argv, server_state_t * s) {
+
+    int c;
+
+    s->shm_size = 1024 * 1024; // default shm_size
+    s->path = NULL;
+    s->shmobj = NULL;
+    s->msi_vectors = 1;
+
+	while ((c = getopt(argc, argv, "hp:s:m:n:")) != -1) {
+
+        switch (c) {
+            // path to listening socket
+            case 'p':
+                s->path = optarg;
+                break;
+            // name of shared memory object
+            case 's':
+                s->shmobj = optarg;
+                break;
+            // size of shared memory object
+            case 'm': {
+                    uint64_t value;
+                    char *ptr;
+
+                    value = strtoul(optarg,&ptr, 10);
+                    switch (*ptr) {
+                    case 0: case 'M': case 'm':
+                        value<<= 20;
+                        break;
+                    case 'G': case 'g':
+                        value<<= 30;
+                        break;
+                    default:
+                        fprintf(stderr, "qemu: invalid ram size: %s\n", optarg);
+                        exit(1);
+                    }
+                    s->shm_size = value;
+                    break;
+                }
+            case 'n':
+                s->msi_vectors = atol(optarg);
+                break;
+            case 'h':
+            default:
+	            usage(argv[0]);
+		        exit(1);
+		}
+	}
+
+    if (s->path == NULL) {
+        s->path = strdup(DEFAULT_SOCK_PATH);
+    }
+
+    printf("listening socket: %s\n", s->path);
+
+    if (s->shmobj == NULL) {
+        s->shmobj = strdup(DEFAULT_SHM_OBJ);
+    }
+
+    printf("shared object: %s\n", s->shmobj);
+    printf("shared object size: %d (bytes)\n", s->shm_size);
+
+}
+
+void print_vec(server_state_t * s, const char * c) {
+
+    int i, j;
+
+#if DEBUG
+    printf("%s (%ld) = ", c, s->total_count);
+    for (i = 0; i<  s->total_count; i++) {
+        if (s->live_vms[i].alive) {
+            for (j = 0; j<  s->msi_vectors; j++) {
+                printf("[%d|%d] ", s->live_vms[i].sockfd, s->live_vms[i].efd[j]);
+            }
+        }
+    }
+    printf("\n");
+#endif
+
+}
+
+int find_set(fd_set * readset, int max) {
+
+    int i;
+
+    for (i = 1; i<  max; i++) {
+        if (FD_ISSET(i, readset)) {
+            return i;
+        }
+    }
+
+    printf("nothing set\n");
+    return -1;
+
+}
+
+void usage(char const *prg) {
+	fprintf(stderr, "use: %s [-h]  [-p<unix socket>] [-s<shm obj>] "
+            "[-m<size in MB>] [-n<# of MSI vectors>]\n", prg);
+}
diff --git a/contrib/ivshmem-server/send_scm.c b/contrib/ivshmem-server/send_scm.c
new file mode 100644
index 0000000..b1bb4a3
--- /dev/null
+++ b/contrib/ivshmem-server/send_scm.c
@@ -0,0 +1,208 @@
+#include<stdint.h>
+#include<stdlib.h>
+#include<errno.h>
+#include<stdio.h>
+#include<unistd.h>
+#include<sys/socket.h>
+#include<sys/syscall.h>
+#include<sys/un.h>
+#include<sys/types.h>
+#include<sys/stat.h>
+#include<fcntl.h>
+#include<poll.h>
+#include "send_scm.h"
+
+#ifndef POLLRDHUP
+#define POLLRDHUP 0x2000
+#endif
+
+int readUpdate(int fd, long * posn, int * newfd)
+{
+    struct msghdr msg;
+    struct iovec iov[1];
+    struct cmsghdr *cmptr;
+    size_t len;
+    size_t msg_size = sizeof(int);
+    char control[CMSG_SPACE(msg_size)];
+
+    msg.msg_name = 0;
+    msg.msg_namelen = 0;
+    msg.msg_control = control;
+    msg.msg_controllen = sizeof(control);
+    msg.msg_flags = 0;
+    msg.msg_iov = iov;
+    msg.msg_iovlen = 1;
+
+    iov[0].iov_base =&posn;
+    iov[0].iov_len = sizeof(posn);
+
+    do {
+        len = recvmsg(fd,&msg, 0);
+    } while (len == (size_t) (-1)&&  (errno == EINTR || errno == EAGAIN));
+
+    printf("iov[0].buf is %ld\n", *((long *)iov[0].iov_base));
+    printf("len is %ld\n", len);
+    // TODO: Logging
+    if (len == (size_t) (-1)) {
+        perror("recvmsg()");
+        return -1;
+    }
+
+    if (msg.msg_controllen<  sizeof(struct cmsghdr))
+        return *posn;
+
+    for (cmptr = CMSG_FIRSTHDR(&msg); cmptr != NULL;
+        cmptr = CMSG_NXTHDR(&msg, cmptr)) {
+        if (cmptr->cmsg_level != SOL_SOCKET ||
+            cmptr->cmsg_type != SCM_RIGHTS){
+                printf("continuing %ld\n", sizeof(size_t));
+                printf("read msg_size = %ld\n", msg_size);
+                if (cmptr->cmsg_len != sizeof(control))
+                    printf("not equal (%ld != %ld)\n",cmptr->cmsg_len,sizeof(control));
+                continue;
+        }
+
+        memcpy(newfd, CMSG_DATA(cmptr), sizeof(int));
+        printf("posn is %ld (fd = %d)\n", *posn, *newfd);
+        return 0;
+    }
+
+    fprintf(stderr, "bad data in packet\n");
+    return -1;
+}
+
+int readRights(int fd, long count, size_t count_len, int **fds, int msi_vectors)
+{
+    int j, newfd;
+
+    for (; ;){
+        long posn = 0;
+
+        readUpdate(fd,&posn,&newfd);
+        printf("reading posn %ld ", posn);
+        fds[posn] = (int *)malloc (msi_vectors * sizeof(int));
+        fds[posn][0] = newfd;
+        for (j = 1; j<  msi_vectors; j++) {
+            readUpdate(fd,&posn,&newfd);
+            fds[posn][j] = newfd;
+            printf("%d.", fds[posn][j]);
+        }
+        printf("\n");
+
+        /* stop reading once i've read my own eventfds */
+        if (posn == count)
+            break;
+    }
+
+    return 0;
+}
+
+int sendKill(int fd, long const posn, size_t posn_len) {
+
+    struct cmsghdr *cmsg;
+    size_t msg_size = sizeof(int);
+    char control[CMSG_SPACE(msg_size)];
+    struct iovec iov[1];
+    size_t len;
+    struct msghdr msg = { 0, 0, iov, 1, control, sizeof control, 0 };
+
+    struct pollfd mypollfd;
+    int rv;
+
+    iov[0].iov_base = (void *)&posn;
+    iov[0].iov_len = posn_len;
+
+    // from cmsg(3)
+    cmsg = CMSG_FIRSTHDR(&msg);
+    cmsg->cmsg_level = SOL_SOCKET;
+    cmsg->cmsg_len = 0;
+    msg.msg_controllen = cmsg->cmsg_len;
+
+    printf("Killing posn %ld\n", posn);
+
+    // check if the fd is dead or not
+    mypollfd.fd = fd;
+    mypollfd.events = POLLRDHUP;
+    mypollfd.revents = 0;
+
+    rv = poll(&mypollfd, 1, 0);
+
+    printf("rv is %d\n", rv);
+
+    if (rv == 0) {
+        len = sendmsg(fd,&msg, 0);
+        if (len == (size_t) (-1)) {
+            perror("sendmsg()");
+            return -1;
+        }
+        return (len == posn_len);
+    } else {
+        printf("already dead\n");
+        return 0;
+    }
+}
+
+int sendUpdate(int fd, long posn, size_t posn_len, int sendfd)
+{
+
+    struct cmsghdr *cmsg;
+    size_t msg_size = sizeof(int);
+    char control[CMSG_SPACE(msg_size)];
+    struct iovec iov[1];
+    size_t len;
+    struct msghdr msg = { 0, 0, iov, 1, control, sizeof control, 0 };
+
+    iov[0].iov_base = (void *) (&posn);
+    iov[0].iov_len = posn_len;
+
+    // from cmsg(3)
+    cmsg = CMSG_FIRSTHDR(&msg);
+    cmsg->cmsg_level = SOL_SOCKET;
+    cmsg->cmsg_type = SCM_RIGHTS;
+    cmsg->cmsg_len = CMSG_LEN(msg_size);
+    msg.msg_controllen = cmsg->cmsg_len;
+
+    memcpy((CMSG_DATA(cmsg)),&sendfd, msg_size);
+
+    len = sendmsg(fd,&msg, 0);
+    if (len == (size_t) (-1)) {
+        perror("sendmsg()");
+        return -1;
+    }
+
+    return (len == posn_len);
+
+}
+
+int sendPosition(int fd, long const posn)
+{
+    int rv;
+
+    rv = send(fd,&posn, sizeof(long), 0);
+    if (rv != sizeof(long)) {
+        fprintf(stderr, "error sending posn\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+int sendRights(int fd, long const count, size_t count_len, vmguest_t * Live_vms,
+                                                            long msi_vectors)
+{
+    /* updates about new guests are sent one at a time */
+
+    long i, j;
+
+    for (i = 0; i<= count; i++) {
+        if (Live_vms[i].alive) {
+            for (j = 0; j<  msi_vectors; j++) {
+                sendUpdate(Live_vms[count].sockfd, i, sizeof(long),
+                                                        Live_vms[i].efd[j]);
+            }
+        }
+    }
+
+    return 0;
+
+}
diff --git a/contrib/ivshmem-server/send_scm.h b/contrib/ivshmem-server/send_scm.h
new file mode 100644
index 0000000..48c9a8d
--- /dev/null
+++ b/contrib/ivshmem-server/send_scm.h
@@ -0,0 +1,19 @@
+#ifndef SEND_SCM
+#define SEND_SCM
+
+struct vm_guest_conn {
+    int posn;
+    int sockfd;
+    int * efd;
+    int alive;
+};
+
+typedef struct vm_guest_conn vmguest_t;
+
+int readRights(int fd, long count, size_t count_len, int **fds, int msi_vectors);
+int sendRights(int fd, long const count, size_t count_len, vmguest_t *Live_vms, long msi_vectors);
+int readUpdate(int fd, long * posn, int * newfd);
+int sendUpdate(int fd, long const posn, size_t posn_len, int sendfd);
+int sendPosition(int fd, long const posn);
+int sendKill(int fd, long const posn, size_t posn_len);
+#endif

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux