On Mon, Jun 14, 2010 at 9:53 AM, Anthony Liguori <anthony@xxxxxxxxxxxxx> wrote: > On 06/04/2010 04:45 PM, Cam Macdonell wrote: >> >> this code is a standalone server which will pass file descriptors for the >> shared >> memory region and eventfds to support interrupts between guests using >> inter-VM >> shared memory. >> --- >> contrib/ivshmem-server/Makefile | 16 ++ >> contrib/ivshmem-server/README | 30 +++ >> contrib/ivshmem-server/ivshmem_server.c | 353 >> +++++++++++++++++++++++++++++++ >> contrib/ivshmem-server/send_scm.c | 208 ++++++++++++++++++ >> contrib/ivshmem-server/send_scm.h | 19 ++ >> 5 files changed, 626 insertions(+), 0 deletions(-) >> create mode 100644 contrib/ivshmem-server/Makefile >> create mode 100644 contrib/ivshmem-server/README >> create mode 100644 contrib/ivshmem-server/ivshmem_server.c >> create mode 100644 contrib/ivshmem-server/send_scm.c >> create mode 100644 contrib/ivshmem-server/send_scm.h >> >> diff --git a/contrib/ivshmem-server/Makefile >> b/contrib/ivshmem-server/Makefile >> new file mode 100644 >> index 0000000..da40ffa >> --- /dev/null >> +++ b/contrib/ivshmem-server/Makefile >> @@ -0,0 +1,16 @@ >> +CC = gcc >> +CFLAGS = -O3 -Wall -Werror >> +LIBS = -lrt >> + >> +# a very simple makefile to build the inter-VM shared memory server >> + >> +all: ivshmem_server >> + >> +.c.o: >> + $(CC) $(CFLAGS) -c $^ -o $@ >> + >> +ivshmem_server: ivshmem_server.o send_scm.o >> + $(CC) $(CFLAGS) -o $@ $^ $(LIBS) >> + >> +clean: >> + rm -f *.o ivshmem_server >> diff --git a/contrib/ivshmem-server/README b/contrib/ivshmem-server/README >> new file mode 100644 >> index 0000000..b1fc2a2 >> --- /dev/null >> +++ b/contrib/ivshmem-server/README >> @@ -0,0 +1,30 @@ >> +Using the ivshmem shared memory server >> +-------------------------------------- >> + >> +This server is only supported on Linux. >> + >> +To use the shared memory server, first compile it. Running 'make' should >> +accomplish this. An executable named 'ivshmem_server' will be built. >> + >> +to display the options run: >> + >> +./ivshmem_server -h >> + >> +Options >> +------- >> + >> + -h print help message >> + >> + -p<path on host> >> + unix socket to listen on. The qemu-kvm chardev needs to connect >> on >> + this socket. (default: '/tmp/ivshmem_socket') >> + >> + -s<string> >> + POSIX shared object to create that is the shared memory (default: >> 'ivshmem') >> + >> + -m<#> >> + size of the POSIX object in MBs (default: 1) >> + >> + -n<#> >> + number of eventfds for each guest. This number must match the >> + 'vectors' argument passed the ivshmem device. (default: 1) >> diff --git a/contrib/ivshmem-server/ivshmem_server.c >> b/contrib/ivshmem-server/ivshmem_server.c >> new file mode 100644 >> index 0000000..e0a7b98 >> --- /dev/null >> +++ b/contrib/ivshmem-server/ivshmem_server.c >> > > There's no licensing here. I don't think this belongs in the qemu tree > either to be honest. If it were to be included, it ought to use all of the > existing qemu infrastructure like the other qemu-* tools. For the time being, I'm willing to leave it out and host it externally. > > Regards, > > Anthony Liguori > >> @@ -0,0 +1,353 @@ >> +/* >> + * A stand-alone shared memory server for inter-VM shared memory for KVM >> +*/ >> + >> +#include<errno.h> >> +#include<string.h> >> +#include<sys/types.h> >> +#include<sys/socket.h> >> +#include<sys/un.h> >> +#include<unistd.h> >> +#include<sys/types.h> >> +#include<sys/stat.h> >> +#include<fcntl.h> >> +#include<sys/eventfd.h> >> +#include<sys/mman.h> >> +#include<sys/select.h> >> +#include<stdio.h> >> +#include<stdlib.h> >> +#include "send_scm.h" >> + >> +#define DEFAULT_SOCK_PATH "/tmp/ivshmem_socket" >> +#define DEFAULT_SHM_OBJ "ivshmem" >> + >> +#define DEBUG 1 >> + >> +typedef struct server_state { >> + vmguest_t *live_vms; >> + int nr_allocated_vms; >> + int shm_size; >> + long live_count; >> + long total_count; >> + int shm_fd; >> + char * path; >> + char * shmobj; >> + int maxfd, conn_socket; >> + long msi_vectors; >> +} server_state_t; >> + >> +void usage(char const *prg); >> +int find_set(fd_set * readset, int max); >> +void print_vec(server_state_t * s, const char * c); >> + >> +void add_new_guest(server_state_t * s); >> +void parse_args(int argc, char **argv, server_state_t * s); >> +int create_listening_socket(char * path); >> + >> +int main(int argc, char ** argv) >> +{ >> + fd_set readset; >> + server_state_t * s; >> + >> + s = (server_state_t *)calloc(1, sizeof(server_state_t)); >> + >> + s->live_count = 0; >> + s->total_count = 0; >> + parse_args(argc, argv, s); >> + >> + /* open shared memory file */ >> + if ((s->shm_fd = shm_open(s->shmobj, O_CREAT|O_RDWR, S_IRWXU))< 0) >> + { >> + fprintf(stderr, "kvm_ivshmem: could not open shared file\n"); >> + exit(-1); >> + } >> + >> + ftruncate(s->shm_fd, s->shm_size); >> + >> + s->conn_socket = create_listening_socket(s->path); >> + >> + s->maxfd = s->conn_socket; >> + >> + for(;;) { >> + int ret, handle, i; >> + char buf[1024]; >> + >> + print_vec(s, "vm_sockets"); >> + >> + FD_ZERO(&readset); >> + /* conn socket is in Live_vms at posn 0 */ >> + FD_SET(s->conn_socket,&readset); >> + for (i = 0; i< s->total_count; i++) { >> + if (s->live_vms[i].alive != 0) { >> + FD_SET(s->live_vms[i].sockfd,&readset); >> + } >> + } >> + >> + printf("\nWaiting (maxfd = %d)\n", s->maxfd); >> + >> + ret = select(s->maxfd + 1,&readset, NULL, NULL, NULL); >> + >> + if (ret == -1) { >> + perror("select()"); >> + } >> + >> + handle = find_set(&readset, s->maxfd + 1); >> + if (handle == -1) continue; >> + >> + if (handle == s->conn_socket) { >> + >> + printf("[NC] new connection\n"); >> + FD_CLR(s->conn_socket,&readset); >> + >> + /* The Total_count is equal to the new guests VM ID */ >> + add_new_guest(s); >> + >> + /* update our the maximum file descriptor number */ >> + s->maxfd = s->live_vms[s->total_count - 1].sockfd> s->maxfd >> ? >> + s->live_vms[s->total_count - 1].sockfd : >> s->maxfd; >> + >> + s->live_count++; >> + printf("Live_count is %ld\n", s->live_count); >> + >> + } else { >> + /* then we have received a disconnection */ >> + int recv_ret; >> + long i, j; >> + long deadposn = -1; >> + >> + recv_ret = recv(handle, buf, 1, 0); >> + >> + printf("[DC] recv returned %d\n", recv_ret); >> + >> + /* find the dead VM in our list and move it do the dead list. >> */ >> + for (i = 0; i< s->total_count; i++) { >> + if (s->live_vms[i].sockfd == handle) { >> + deadposn = i; >> + s->live_vms[i].alive = 0; >> + close(s->live_vms[i].sockfd); >> + >> + for (j = 0; j< s->msi_vectors; j++) { >> + close(s->live_vms[i].efd[j]); >> + } >> + >> + free(s->live_vms[i].efd); >> + s->live_vms[i].sockfd = -1; >> + break; >> + } >> + } >> + >> + for (j = 0; j< s->total_count; j++) { >> + /* update remaining clients that one client has left/died >> */ >> + if (s->live_vms[j].alive) { >> + printf("[UD] sending kill of fd[%ld] to %ld\n", >> + deadposn, >> j); >> + sendKill(s->live_vms[j].sockfd, deadposn, >> sizeof(deadposn)); >> + } >> + } >> + >> + s->live_count--; >> + >> + /* close the socket for the departed VM */ >> + close(handle); >> + } >> + >> + } >> + >> + return 0; >> +} >> + >> +void add_new_guest(server_state_t * s) { >> + >> + struct sockaddr_un remote; >> + socklen_t t = sizeof(remote); >> + long i, j; >> + int vm_sock; >> + long new_posn; >> + long neg1 = -1; >> + >> + vm_sock = accept(s->conn_socket, (struct sockaddr *)&remote,&t); >> + >> + if ( vm_sock == -1 ) { >> + perror("accept"); >> + exit(1); >> + } >> + >> + new_posn = s->total_count; >> + >> + if (new_posn == s->nr_allocated_vms) { >> + printf("increasing vm slots\n"); >> + s->nr_allocated_vms = s->nr_allocated_vms * 2; >> + if (s->nr_allocated_vms< 16) >> + s->nr_allocated_vms = 16; >> + s->live_vms = realloc(s->live_vms, >> + s->nr_allocated_vms * sizeof(vmguest_t)); >> + >> + if (s->live_vms == NULL) { >> + fprintf(stderr, "realloc failed - quitting\n"); >> + exit(-1); >> + } >> + } >> + >> + s->live_vms[new_posn].posn = new_posn; >> + printf("[NC] Live_vms[%ld]\n", new_posn); >> + s->live_vms[new_posn].efd = (int *) malloc(sizeof(int)); >> + for (i = 0; i< s->msi_vectors; i++) { >> + s->live_vms[new_posn].efd[i] = eventfd(0, 0); >> + printf("\tefd[%ld] = %d\n", i, s->live_vms[new_posn].efd[i]); >> + } >> + s->live_vms[new_posn].sockfd = vm_sock; >> + s->live_vms[new_posn].alive = 1; >> + >> + >> + sendPosition(vm_sock, new_posn); >> + sendUpdate(vm_sock, neg1, sizeof(long), s->shm_fd); >> + printf("[NC] trying to send fds to new connection\n"); >> + sendRights(vm_sock, new_posn, sizeof(new_posn), s->live_vms, >> s->msi_vectors); >> + >> + printf("[NC] Connected (count = %ld).\n", new_posn); >> + for (i = 0; i< new_posn; i++) { >> + if (s->live_vms[i].alive) { >> + // ping all clients that a new client has joined >> + printf("[UD] sending fd[%ld] to %ld\n", new_posn, i); >> + for (j = 0; j< s->msi_vectors; j++) { >> + printf("\tefd[%ld] = [%d]", j, >> s->live_vms[new_posn].efd[j]); >> + sendUpdate(s->live_vms[i].sockfd, new_posn, >> + sizeof(new_posn), s->live_vms[new_posn].efd[j]); >> + } >> + printf("\n"); >> + } >> + } >> + >> + s->total_count++; >> +} >> + >> +int create_listening_socket(char * path) { >> + >> + struct sockaddr_un local; >> + int len, conn_socket; >> + >> + if ((conn_socket = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { >> + perror("socket"); >> + exit(1); >> + } >> + >> + local.sun_family = AF_UNIX; >> + strcpy(local.sun_path, path); >> + unlink(local.sun_path); >> + len = strlen(local.sun_path) + sizeof(local.sun_family); >> + if (bind(conn_socket, (struct sockaddr *)&local, len) == -1) { >> + perror("bind"); >> + exit(1); >> + } >> + >> + if (listen(conn_socket, 5) == -1) { >> + perror("listen"); >> + exit(1); >> + } >> + >> + return conn_socket; >> + >> +} >> + >> +void parse_args(int argc, char **argv, server_state_t * s) { >> + >> + int c; >> + >> + s->shm_size = 1024 * 1024; // default shm_size >> + s->path = NULL; >> + s->shmobj = NULL; >> + s->msi_vectors = 1; >> + >> + while ((c = getopt(argc, argv, "hp:s:m:n:")) != -1) { >> + >> + switch (c) { >> + // path to listening socket >> + case 'p': >> + s->path = optarg; >> + break; >> + // name of shared memory object >> + case 's': >> + s->shmobj = optarg; >> + break; >> + // size of shared memory object >> + case 'm': { >> + uint64_t value; >> + char *ptr; >> + >> + value = strtoul(optarg,&ptr, 10); >> + switch (*ptr) { >> + case 0: case 'M': case 'm': >> + value<<= 20; >> + break; >> + case 'G': case 'g': >> + value<<= 30; >> + break; >> + default: >> + fprintf(stderr, "qemu: invalid ram size: %s\n", >> optarg); >> + exit(1); >> + } >> + s->shm_size = value; >> + break; >> + } >> + case 'n': >> + s->msi_vectors = atol(optarg); >> + break; >> + case 'h': >> + default: >> + usage(argv[0]); >> + exit(1); >> + } >> + } >> + >> + if (s->path == NULL) { >> + s->path = strdup(DEFAULT_SOCK_PATH); >> + } >> + >> + printf("listening socket: %s\n", s->path); >> + >> + if (s->shmobj == NULL) { >> + s->shmobj = strdup(DEFAULT_SHM_OBJ); >> + } >> + >> + printf("shared object: %s\n", s->shmobj); >> + printf("shared object size: %d (bytes)\n", s->shm_size); >> + >> +} >> + >> +void print_vec(server_state_t * s, const char * c) { >> + >> + int i, j; >> + >> +#if DEBUG >> + printf("%s (%ld) = ", c, s->total_count); >> + for (i = 0; i< s->total_count; i++) { >> + if (s->live_vms[i].alive) { >> + for (j = 0; j< s->msi_vectors; j++) { >> + printf("[%d|%d] ", s->live_vms[i].sockfd, >> s->live_vms[i].efd[j]); >> + } >> + } >> + } >> + printf("\n"); >> +#endif >> + >> +} >> + >> +int find_set(fd_set * readset, int max) { >> + >> + int i; >> + >> + for (i = 1; i< max; i++) { >> + if (FD_ISSET(i, readset)) { >> + return i; >> + } >> + } >> + >> + printf("nothing set\n"); >> + return -1; >> + >> +} >> + >> +void usage(char const *prg) { >> + fprintf(stderr, "use: %s [-h] [-p<unix socket>] [-s<shm obj>] " >> + "[-m<size in MB>] [-n<# of MSI vectors>]\n", prg); >> +} >> diff --git a/contrib/ivshmem-server/send_scm.c >> b/contrib/ivshmem-server/send_scm.c >> new file mode 100644 >> index 0000000..b1bb4a3 >> --- /dev/null >> +++ b/contrib/ivshmem-server/send_scm.c >> @@ -0,0 +1,208 @@ >> +#include<stdint.h> >> +#include<stdlib.h> >> +#include<errno.h> >> +#include<stdio.h> >> +#include<unistd.h> >> +#include<sys/socket.h> >> +#include<sys/syscall.h> >> +#include<sys/un.h> >> +#include<sys/types.h> >> +#include<sys/stat.h> >> +#include<fcntl.h> >> +#include<poll.h> >> +#include "send_scm.h" >> + >> +#ifndef POLLRDHUP >> +#define POLLRDHUP 0x2000 >> +#endif >> + >> +int readUpdate(int fd, long * posn, int * newfd) >> +{ >> + struct msghdr msg; >> + struct iovec iov[1]; >> + struct cmsghdr *cmptr; >> + size_t len; >> + size_t msg_size = sizeof(int); >> + char control[CMSG_SPACE(msg_size)]; >> + >> + msg.msg_name = 0; >> + msg.msg_namelen = 0; >> + msg.msg_control = control; >> + msg.msg_controllen = sizeof(control); >> + msg.msg_flags = 0; >> + msg.msg_iov = iov; >> + msg.msg_iovlen = 1; >> + >> + iov[0].iov_base =&posn; >> + iov[0].iov_len = sizeof(posn); >> + >> + do { >> + len = recvmsg(fd,&msg, 0); >> + } while (len == (size_t) (-1)&& (errno == EINTR || errno == >> EAGAIN)); >> + >> + printf("iov[0].buf is %ld\n", *((long *)iov[0].iov_base)); >> + printf("len is %ld\n", len); >> + // TODO: Logging >> + if (len == (size_t) (-1)) { >> + perror("recvmsg()"); >> + return -1; >> + } >> + >> + if (msg.msg_controllen< sizeof(struct cmsghdr)) >> + return *posn; >> + >> + for (cmptr = CMSG_FIRSTHDR(&msg); cmptr != NULL; >> + cmptr = CMSG_NXTHDR(&msg, cmptr)) { >> + if (cmptr->cmsg_level != SOL_SOCKET || >> + cmptr->cmsg_type != SCM_RIGHTS){ >> + printf("continuing %ld\n", sizeof(size_t)); >> + printf("read msg_size = %ld\n", msg_size); >> + if (cmptr->cmsg_len != sizeof(control)) >> + printf("not equal (%ld != >> %ld)\n",cmptr->cmsg_len,sizeof(control)); >> + continue; >> + } >> + >> + memcpy(newfd, CMSG_DATA(cmptr), sizeof(int)); >> + printf("posn is %ld (fd = %d)\n", *posn, *newfd); >> + return 0; >> + } >> + >> + fprintf(stderr, "bad data in packet\n"); >> + return -1; >> +} >> + >> +int readRights(int fd, long count, size_t count_len, int **fds, int >> msi_vectors) >> +{ >> + int j, newfd; >> + >> + for (; ;){ >> + long posn = 0; >> + >> + readUpdate(fd,&posn,&newfd); >> + printf("reading posn %ld ", posn); >> + fds[posn] = (int *)malloc (msi_vectors * sizeof(int)); >> + fds[posn][0] = newfd; >> + for (j = 1; j< msi_vectors; j++) { >> + readUpdate(fd,&posn,&newfd); >> + fds[posn][j] = newfd; >> + printf("%d.", fds[posn][j]); >> + } >> + printf("\n"); >> + >> + /* stop reading once i've read my own eventfds */ >> + if (posn == count) >> + break; >> + } >> + >> + return 0; >> +} >> + >> +int sendKill(int fd, long const posn, size_t posn_len) { >> + >> + struct cmsghdr *cmsg; >> + size_t msg_size = sizeof(int); >> + char control[CMSG_SPACE(msg_size)]; >> + struct iovec iov[1]; >> + size_t len; >> + struct msghdr msg = { 0, 0, iov, 1, control, sizeof control, 0 }; >> + >> + struct pollfd mypollfd; >> + int rv; >> + >> + iov[0].iov_base = (void *)&posn; >> + iov[0].iov_len = posn_len; >> + >> + // from cmsg(3) >> + cmsg = CMSG_FIRSTHDR(&msg); >> + cmsg->cmsg_level = SOL_SOCKET; >> + cmsg->cmsg_len = 0; >> + msg.msg_controllen = cmsg->cmsg_len; >> + >> + printf("Killing posn %ld\n", posn); >> + >> + // check if the fd is dead or not >> + mypollfd.fd = fd; >> + mypollfd.events = POLLRDHUP; >> + mypollfd.revents = 0; >> + >> + rv = poll(&mypollfd, 1, 0); >> + >> + printf("rv is %d\n", rv); >> + >> + if (rv == 0) { >> + len = sendmsg(fd,&msg, 0); >> + if (len == (size_t) (-1)) { >> + perror("sendmsg()"); >> + return -1; >> + } >> + return (len == posn_len); >> + } else { >> + printf("already dead\n"); >> + return 0; >> + } >> +} >> + >> +int sendUpdate(int fd, long posn, size_t posn_len, int sendfd) >> +{ >> + >> + struct cmsghdr *cmsg; >> + size_t msg_size = sizeof(int); >> + char control[CMSG_SPACE(msg_size)]; >> + struct iovec iov[1]; >> + size_t len; >> + struct msghdr msg = { 0, 0, iov, 1, control, sizeof control, 0 }; >> + >> + iov[0].iov_base = (void *) (&posn); >> + iov[0].iov_len = posn_len; >> + >> + // from cmsg(3) >> + cmsg = CMSG_FIRSTHDR(&msg); >> + cmsg->cmsg_level = SOL_SOCKET; >> + cmsg->cmsg_type = SCM_RIGHTS; >> + cmsg->cmsg_len = CMSG_LEN(msg_size); >> + msg.msg_controllen = cmsg->cmsg_len; >> + >> + memcpy((CMSG_DATA(cmsg)),&sendfd, msg_size); >> + >> + len = sendmsg(fd,&msg, 0); >> + if (len == (size_t) (-1)) { >> + perror("sendmsg()"); >> + return -1; >> + } >> + >> + return (len == posn_len); >> + >> +} >> + >> +int sendPosition(int fd, long const posn) >> +{ >> + int rv; >> + >> + rv = send(fd,&posn, sizeof(long), 0); >> + if (rv != sizeof(long)) { >> + fprintf(stderr, "error sending posn\n"); >> + return -1; >> + } >> + >> + return 0; >> +} >> + >> +int sendRights(int fd, long const count, size_t count_len, vmguest_t * >> Live_vms, >> + long >> msi_vectors) >> +{ >> + /* updates about new guests are sent one at a time */ >> + >> + long i, j; >> + >> + for (i = 0; i<= count; i++) { >> + if (Live_vms[i].alive) { >> + for (j = 0; j< msi_vectors; j++) { >> + sendUpdate(Live_vms[count].sockfd, i, sizeof(long), >> + >> Live_vms[i].efd[j]); >> + } >> + } >> + } >> + >> + return 0; >> + >> +} >> diff --git a/contrib/ivshmem-server/send_scm.h >> b/contrib/ivshmem-server/send_scm.h >> new file mode 100644 >> index 0000000..48c9a8d >> --- /dev/null >> +++ b/contrib/ivshmem-server/send_scm.h >> @@ -0,0 +1,19 @@ >> +#ifndef SEND_SCM >> +#define SEND_SCM >> + >> +struct vm_guest_conn { >> + int posn; >> + int sockfd; >> + int * efd; >> + int alive; >> +}; >> + >> +typedef struct vm_guest_conn vmguest_t; >> + >> +int readRights(int fd, long count, size_t count_len, int **fds, int >> msi_vectors); >> +int sendRights(int fd, long const count, size_t count_len, vmguest_t >> *Live_vms, long msi_vectors); >> +int readUpdate(int fd, long * posn, int * newfd); >> +int sendUpdate(int fd, long const posn, size_t posn_len, int sendfd); >> +int sendPosition(int fd, long const posn); >> +int sendKill(int fd, long const posn, size_t posn_len); >> +#endif >> > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html