This is a note to let you know that I've just added the patch titled samples: move mic/mpssd example code from Documentation to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: samples-move-mic-mpssd-example-code-from-documentation.patch and it can be found in the queue-4.4 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From 6bee835dd54e279f3d3ae2eca92a9c394b4fd028 Mon Sep 17 00:00:00 2001 From: Shuah Khan <shuahkh@xxxxxxxxxxxxxxx> Date: Fri, 16 Sep 2016 15:53:52 -0600 Subject: samples: move mic/mpssd example code from Documentation From: Shuah Khan <shuahkh@xxxxxxxxxxxxxxx> commit 6bee835dd54e279f3d3ae2eca92a9c394b4fd028 upstream. Move mic/mpssd examples to samples and remove it from Documentation Makefile. Create a new Makefile to build mic/mpssd. It can be built from top level directory or from mic/mpssd directory: Run make -C samples/mic/mpssd or cd samples/mic/mpssd; make Acked-by: Jonathan Corbet <corbet@xxxxxxx> Signed-off-by: Shuah Khan <shuahkh@xxxxxxxxxxxxxxx> [backported to 4.4-stable as this code is broken on newer versions of gcc and we don't want to break the build for a Documentation sample. - gregkh] Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- Documentation/Makefile | 2 Documentation/mic/Makefile | 1 Documentation/mic/mpssd/.gitignore | 1 Documentation/mic/mpssd/Makefile | 21 Documentation/mic/mpssd/micctrl | 173 --- Documentation/mic/mpssd/mpss | 200 ---- Documentation/mic/mpssd/mpssd.c | 1826 ------------------------------------- Documentation/mic/mpssd/mpssd.h | 103 -- Documentation/mic/mpssd/sysfs.c | 102 -- samples/mic/mpssd/.gitignore | 1 samples/mic/mpssd/Makefile | 27 samples/mic/mpssd/micctrl | 173 +++ samples/mic/mpssd/mpss | 200 ++++ samples/mic/mpssd/mpssd.c | 1826 +++++++++++++++++++++++++++++++++++++ samples/mic/mpssd/mpssd.h | 103 ++ samples/mic/mpssd/sysfs.c | 102 ++ 16 files changed, 2433 insertions(+), 2428 deletions(-) --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -1,4 +1,4 @@ subdir-y := accounting auxdisplay blackfin connector \ - filesystems filesystems ia64 laptops mic misc-devices \ + filesystems filesystems ia64 laptops misc-devices \ networking pcmcia prctl ptp spi timers vDSO video4linux \ watchdog --- a/Documentation/mic/Makefile +++ /dev/null @@ -1 +0,0 @@ -subdir-y := mpssd --- a/Documentation/mic/mpssd/.gitignore +++ /dev/null @@ -1 +0,0 @@ -mpssd --- a/Documentation/mic/mpssd/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -ifndef CROSS_COMPILE -# List of programs to build -hostprogs-$(CONFIG_X86_64) := mpssd - -mpssd-objs := mpssd.o sysfs.o - -# Tell kbuild to always build the programs -always := $(hostprogs-y) - -HOSTCFLAGS += -I$(objtree)/usr/include -I$(srctree)/tools/include - -ifdef DEBUG -HOSTCFLAGS += -DDEBUG=$(DEBUG) -endif - -HOSTLOADLIBES_mpssd := -lpthread - -install: - install mpssd /usr/sbin/mpssd - install micctrl /usr/sbin/micctrl -endif --- a/Documentation/mic/mpssd/micctrl +++ /dev/null @@ -1,173 +0,0 @@ -#!/bin/bash -# Intel MIC Platform Software Stack (MPSS) -# -# Copyright(c) 2013 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License, version 2, as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Intel MIC User Space Tools. -# -# micctrl - Controls MIC boot/start/stop. -# -# chkconfig: 2345 95 05 -# description: start MPSS stack processing. -# -### BEGIN INIT INFO -# Provides: micctrl -### END INIT INFO - -# Source function library. -. /etc/init.d/functions - -sysfs="/sys/class/mic" - -_status() -{ - f=$sysfs/$1 - echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`" -} - -status() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _status $1 - return $? - fi - for f in $sysfs/* - do - _status `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -_reset() -{ - f=$sysfs/$1 - echo reset > $f/state -} - -reset() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _reset $1 - return $? - fi - for f in $sysfs/* - do - _reset `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -_boot() -{ - f=$sysfs/$1 - echo "linux" > $f/bootmode - echo "mic/uos.img" > $f/firmware - echo "mic/$1.image" > $f/ramdisk - echo "boot" > $f/state -} - -boot() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _boot $1 - return $? - fi - for f in $sysfs/* - do - _boot `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -_shutdown() -{ - f=$sysfs/$1 - echo shutdown > $f/state -} - -shutdown() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _shutdown $1 - return $? - fi - for f in $sysfs/* - do - _shutdown `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -_wait() -{ - f=$sysfs/$1 - while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ] - do - sleep 1 - echo -e "Waiting for $1 to go offline" - done -} - -wait() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _wait $1 - return $? - fi - # Wait for the cards to go offline - for f in $sysfs/* - do - _wait `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -if [ ! -d "$sysfs" ]; then - echo -e $"Module unloaded " - exit 3 -fi - -case $1 in - -s) - status $2 - ;; - -r) - reset $2 - ;; - -b) - boot $2 - ;; - -S) - shutdown $2 - ;; - -w) - wait $2 - ;; - *) - echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}" - exit 2 -esac - -exit $? --- a/Documentation/mic/mpssd/mpss +++ /dev/null @@ -1,200 +0,0 @@ -#!/bin/bash -# Intel MIC Platform Software Stack (MPSS) -# -# Copyright(c) 2013 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License, version 2, as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Intel MIC User Space Tools. -# -# mpss Start mpssd. -# -# chkconfig: 2345 95 05 -# description: start MPSS stack processing. -# -### BEGIN INIT INFO -# Provides: mpss -# Required-Start: -# Required-Stop: -# Short-Description: MPSS stack control -# Description: MPSS stack control -### END INIT INFO - -# Source function library. -. /etc/init.d/functions - -exec=/usr/sbin/mpssd -sysfs="/sys/class/mic" -mic_modules="mic_host mic_x100_dma scif" - -start() -{ - [ -x $exec ] || exit 5 - - if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then - echo -e $"MPSSD already running! " - success - echo - return 0 - fi - - echo -e $"Starting MPSS Stack" - echo -e $"Loading MIC drivers:" $mic_modules - - modprobe -a $mic_modules - RETVAL=$? - if [ $RETVAL -ne 0 ]; then - failure - echo - return $RETVAL - fi - - # Start the daemon - echo -n $"Starting MPSSD " - $exec - RETVAL=$? - if [ $RETVAL -ne 0 ]; then - failure - echo - return $RETVAL - fi - success - echo - - sleep 5 - - # Boot the cards - micctrl -b - - # Wait till ping works - for f in $sysfs/* - do - count=100 - ipaddr=`cat $f/cmdline` - ipaddr=${ipaddr#*address,} - ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1` - while [ $count -ge 0 ] - do - echo -e "Pinging "`basename $f`" " - ping -c 1 $ipaddr &> /dev/null - RETVAL=$? - if [ $RETVAL -eq 0 ]; then - success - break - fi - sleep 1 - count=`expr $count - 1` - done - [ $RETVAL -ne 0 ] && failure || success - echo - done - return $RETVAL -} - -stop() -{ - echo -e $"Shutting down MPSS Stack: " - - # Bail out if module is unloaded - if [ ! -d "$sysfs" ]; then - echo -n $"Module unloaded " - success - echo - return 0 - fi - - # Shut down the cards. - micctrl -S - - # Wait for the cards to go offline - for f in $sysfs/* - do - while [ "`cat $f/state`" != "ready" ] - do - sleep 1 - echo -e "Waiting for "`basename $f`" to become ready" - done - done - - # Display the status of the cards - micctrl -s - - # Kill MPSSD now - echo -n $"Killing MPSSD" - killall -9 mpssd 2>/dev/null - RETVAL=$? - [ $RETVAL -ne 0 ] && failure || success - echo - return $RETVAL -} - -restart() -{ - stop - sleep 5 - start -} - -status() -{ - micctrl -s - if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then - echo "mpssd is running" - else - echo "mpssd is stopped" - fi - return 0 -} - -unload() -{ - if [ ! -d "$sysfs" ]; then - echo -n $"No MIC_HOST Module: " - success - echo - return - fi - - stop - - sleep 5 - echo -n $"Removing MIC drivers:" $mic_modules - modprobe -r $mic_modules - RETVAL=$? - [ $RETVAL -ne 0 ] && failure || success - echo - return $RETVAL -} - -case $1 in - start) - start - ;; - stop) - stop - ;; - restart) - restart - ;; - status) - status - ;; - unload) - unload - ;; - *) - echo $"Usage: $0 {start|stop|restart|status|unload}" - exit 2 -esac - -exit $? --- a/Documentation/mic/mpssd/mpssd.c +++ /dev/null @@ -1,1826 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC User Space Tools. - */ - -#define _GNU_SOURCE - -#include <stdlib.h> -#include <fcntl.h> -#include <getopt.h> -#include <assert.h> -#include <unistd.h> -#include <stdbool.h> -#include <signal.h> -#include <poll.h> -#include <features.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <linux/virtio_ring.h> -#include <linux/virtio_net.h> -#include <linux/virtio_console.h> -#include <linux/virtio_blk.h> -#include <linux/version.h> -#include "mpssd.h" -#include <linux/mic_ioctl.h> -#include <linux/mic_common.h> -#include <tools/endian.h> - -static void *init_mic(void *arg); - -static FILE *logfp; -static struct mic_info mic_list; - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - -#define min_t(type, x, y) ({ \ - type __min1 = (x); \ - type __min2 = (y); \ - __min1 < __min2 ? __min1 : __min2; }) - -/* align addr on a size boundary - adjust address up/down if needed */ -#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) -#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size) - -/* align addr on a size boundary - adjust address up if needed */ -#define _ALIGN(addr, size) _ALIGN_UP(addr, size) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) - -#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) - -#define GSO_ENABLED 1 -#define MAX_GSO_SIZE (64 * 1024) -#define ETH_H_LEN 14 -#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64)) -#define MIC_DEVICE_PAGE_END 0x1000 - -#ifndef VIRTIO_NET_HDR_F_DATA_VALID -#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ -#endif - -static struct { - struct mic_device_desc dd; - struct mic_vqconfig vqconfig[2]; - __u32 host_features, guest_acknowledgements; - struct virtio_console_config cons_config; -} virtcons_dev_page = { - .dd = { - .type = VIRTIO_ID_CONSOLE, - .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig), - .feature_len = sizeof(virtcons_dev_page.host_features), - .config_len = sizeof(virtcons_dev_page.cons_config), - }, - .vqconfig[0] = { - .num = htole16(MIC_VRING_ENTRIES), - }, - .vqconfig[1] = { - .num = htole16(MIC_VRING_ENTRIES), - }, -}; - -static struct { - struct mic_device_desc dd; - struct mic_vqconfig vqconfig[2]; - __u32 host_features, guest_acknowledgements; - struct virtio_net_config net_config; -} virtnet_dev_page = { - .dd = { - .type = VIRTIO_ID_NET, - .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig), - .feature_len = sizeof(virtnet_dev_page.host_features), - .config_len = sizeof(virtnet_dev_page.net_config), - }, - .vqconfig[0] = { - .num = htole16(MIC_VRING_ENTRIES), - }, - .vqconfig[1] = { - .num = htole16(MIC_VRING_ENTRIES), - }, -#if GSO_ENABLED - .host_features = htole32( - 1 << VIRTIO_NET_F_CSUM | - 1 << VIRTIO_NET_F_GSO | - 1 << VIRTIO_NET_F_GUEST_TSO4 | - 1 << VIRTIO_NET_F_GUEST_TSO6 | - 1 << VIRTIO_NET_F_GUEST_ECN), -#else - .host_features = 0, -#endif -}; - -static const char *mic_config_dir = "/etc/mpss"; -static const char *virtblk_backend = "VIRTBLK_BACKEND"; -static struct { - struct mic_device_desc dd; - struct mic_vqconfig vqconfig[1]; - __u32 host_features, guest_acknowledgements; - struct virtio_blk_config blk_config; -} virtblk_dev_page = { - .dd = { - .type = VIRTIO_ID_BLOCK, - .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig), - .feature_len = sizeof(virtblk_dev_page.host_features), - .config_len = sizeof(virtblk_dev_page.blk_config), - }, - .vqconfig[0] = { - .num = htole16(MIC_VRING_ENTRIES), - }, - .host_features = - htole32(1<<VIRTIO_BLK_F_SEG_MAX), - .blk_config = { - .seg_max = htole32(MIC_VRING_ENTRIES - 2), - .capacity = htole64(0), - } -}; - -static char *myname; - -static int -tap_configure(struct mic_info *mic, char *dev) -{ - pid_t pid; - char *ifargv[7]; - char ipaddr[IFNAMSIZ]; - int ret = 0; - - pid = fork(); - if (pid == 0) { - ifargv[0] = "ip"; - ifargv[1] = "link"; - ifargv[2] = "set"; - ifargv[3] = dev; - ifargv[4] = "up"; - ifargv[5] = NULL; - mpsslog("Configuring %s\n", dev); - ret = execvp("ip", ifargv); - if (ret < 0) { - mpsslog("%s execvp failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - } - if (pid < 0) { - mpsslog("%s fork failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - - ret = waitpid(pid, NULL, 0); - if (ret < 0) { - mpsslog("%s waitpid failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - - snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1); - - pid = fork(); - if (pid == 0) { - ifargv[0] = "ip"; - ifargv[1] = "addr"; - ifargv[2] = "add"; - ifargv[3] = ipaddr; - ifargv[4] = "dev"; - ifargv[5] = dev; - ifargv[6] = NULL; - mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr); - ret = execvp("ip", ifargv); - if (ret < 0) { - mpsslog("%s execvp failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - } - if (pid < 0) { - mpsslog("%s fork failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - - ret = waitpid(pid, NULL, 0); - if (ret < 0) { - mpsslog("%s waitpid failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - mpsslog("MIC name %s %s %d DONE!\n", - mic->name, __func__, __LINE__); - return 0; -} - -static int tun_alloc(struct mic_info *mic, char *dev) -{ - struct ifreq ifr; - int fd, err; -#if GSO_ENABLED - unsigned offload; -#endif - fd = open("/dev/net/tun", O_RDWR); - if (fd < 0) { - mpsslog("Could not open /dev/net/tun %s\n", strerror(errno)); - goto done; - } - - memset(&ifr, 0, sizeof(ifr)); - - ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; - if (*dev) - strncpy(ifr.ifr_name, dev, IFNAMSIZ); - - err = ioctl(fd, TUNSETIFF, (void *)&ifr); - if (err < 0) { - mpsslog("%s %s %d TUNSETIFF failed %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - close(fd); - return err; - } -#if GSO_ENABLED - offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN; - - err = ioctl(fd, TUNSETOFFLOAD, offload); - if (err < 0) { - mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - close(fd); - return err; - } -#endif - strcpy(dev, ifr.ifr_name); - mpsslog("Created TAP %s\n", dev); -done: - return fd; -} - -#define NET_FD_VIRTIO_NET 0 -#define NET_FD_TUN 1 -#define MAX_NET_FD 2 - -static void set_dp(struct mic_info *mic, int type, void *dp) -{ - switch (type) { - case VIRTIO_ID_CONSOLE: - mic->mic_console.console_dp = dp; - return; - case VIRTIO_ID_NET: - mic->mic_net.net_dp = dp; - return; - case VIRTIO_ID_BLOCK: - mic->mic_virtblk.block_dp = dp; - return; - } - mpsslog("%s %s %d not found\n", mic->name, __func__, type); - assert(0); -} - -static void *get_dp(struct mic_info *mic, int type) -{ - switch (type) { - case VIRTIO_ID_CONSOLE: - return mic->mic_console.console_dp; - case VIRTIO_ID_NET: - return mic->mic_net.net_dp; - case VIRTIO_ID_BLOCK: - return mic->mic_virtblk.block_dp; - } - mpsslog("%s %s %d not found\n", mic->name, __func__, type); - assert(0); - return NULL; -} - -static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type) -{ - struct mic_device_desc *d; - int i; - void *dp = get_dp(mic, type); - - for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE; - i += mic_total_desc_size(d)) { - d = dp + i; - - /* End of list */ - if (d->type == 0) - break; - - if (d->type == -1) - continue; - - mpsslog("%s %s d-> type %d d %p\n", - mic->name, __func__, d->type, d); - - if (d->type == (__u8)type) - return d; - } - mpsslog("%s %s %d not found\n", mic->name, __func__, type); - return NULL; -} - -/* See comments in vhost.c for explanation of next_desc() */ -static unsigned next_desc(struct vring_desc *desc) -{ - unsigned int next; - - if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) - return -1U; - next = le16toh(desc->next); - return next; -} - -/* Sum up all the IOVEC length */ -static ssize_t -sum_iovec_len(struct mic_copy_desc *copy) -{ - ssize_t sum = 0; - int i; - - for (i = 0; i < copy->iovcnt; i++) - sum += copy->iov[i].iov_len; - return sum; -} - -static inline void verify_out_len(struct mic_info *mic, - struct mic_copy_desc *copy) -{ - if (copy->out_len != sum_iovec_len(copy)) { - mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n", - mic->name, __func__, __LINE__, - copy->out_len, sum_iovec_len(copy)); - assert(copy->out_len == sum_iovec_len(copy)); - } -} - -/* Display an iovec */ -static void -disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy, - const char *s, int line) -{ - int i; - - for (i = 0; i < copy->iovcnt; i++) - mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n", - mic->name, s, line, i, - copy->iov[i].iov_base, copy->iov[i].iov_len); -} - -static inline __u16 read_avail_idx(struct mic_vring *vr) -{ - return ACCESS_ONCE(vr->info->avail_idx); -} - -static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr, - struct mic_copy_desc *copy, ssize_t len) -{ - copy->vr_idx = tx ? 0 : 1; - copy->update_used = true; - if (type == VIRTIO_ID_NET) - copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr); - else - copy->iov[0].iov_len = len; -} - -/* Central API which triggers the copies */ -static int -mic_virtio_copy(struct mic_info *mic, int fd, - struct mic_vring *vr, struct mic_copy_desc *copy) -{ - int ret; - - ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy); - if (ret) { - mpsslog("%s %s %d errno %s ret %d\n", - mic->name, __func__, __LINE__, - strerror(errno), ret); - } - return ret; -} - -static inline unsigned _vring_size(unsigned int num, unsigned long align) -{ - return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num) - + align - 1) & ~(align - 1)) - + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; -} - -/* - * This initialization routine requires at least one - * vring i.e. vr0. vr1 is optional. - */ -static void * -init_vr(struct mic_info *mic, int fd, int type, - struct mic_vring *vr0, struct mic_vring *vr1, int num_vq) -{ - int vr_size; - char *va; - - vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES, - MIC_VIRTIO_RING_ALIGN) + - sizeof(struct _mic_vring_info)); - va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq, - PROT_READ, MAP_SHARED, fd, 0); - if (MAP_FAILED == va) { - mpsslog("%s %s %d mmap failed errno %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - goto done; - } - set_dp(mic, type, va); - vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END]; - vr0->info = vr0->va + - _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN); - vring_init(&vr0->vr, - MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN); - mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ", - __func__, mic->name, vr0->va, vr0->info, vr_size, - _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); - mpsslog("magic 0x%x expected 0x%x\n", - le32toh(vr0->info->magic), MIC_MAGIC + type); - assert(le32toh(vr0->info->magic) == MIC_MAGIC + type); - if (vr1) { - vr1->va = (struct mic_vring *) - &va[MIC_DEVICE_PAGE_END + vr_size]; - vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES, - MIC_VIRTIO_RING_ALIGN); - vring_init(&vr1->vr, - MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN); - mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ", - __func__, mic->name, vr1->va, vr1->info, vr_size, - _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); - mpsslog("magic 0x%x expected 0x%x\n", - le32toh(vr1->info->magic), MIC_MAGIC + type + 1); - assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1); - } -done: - return va; -} - -static int -wait_for_card_driver(struct mic_info *mic, int fd, int type) -{ - struct pollfd pollfd; - int err; - struct mic_device_desc *desc = get_device_desc(mic, type); - __u8 prev_status; - - if (!desc) - return -ENODEV; - prev_status = desc->status; - pollfd.fd = fd; - mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n", - mic->name, __func__, type, desc->status); - - while (1) { - pollfd.events = POLLIN; - pollfd.revents = 0; - err = poll(&pollfd, 1, -1); - if (err < 0) { - mpsslog("%s %s poll failed %s\n", - mic->name, __func__, strerror(errno)); - continue; - } - - if (pollfd.revents) { - if (desc->status != prev_status) { - mpsslog("%s %s Waiting... desc-> type %d " - "status 0x%x\n", - mic->name, __func__, type, - desc->status); - prev_status = desc->status; - } - if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { - mpsslog("%s %s poll.revents %d\n", - mic->name, __func__, pollfd.revents); - mpsslog("%s %s desc-> type %d status 0x%x\n", - mic->name, __func__, type, - desc->status); - break; - } - } - } - return 0; -} - -/* Spin till we have some descriptors */ -static void -spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr) -{ - __u16 avail_idx = read_avail_idx(vr); - - while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) { -#ifdef DEBUG - mpsslog("%s %s waiting for desc avail %d info_avail %d\n", - mic->name, __func__, - le16toh(vr->vr.avail->idx), vr->info->avail_idx); -#endif - sched_yield(); - } -} - -static void * -virtio_net(void *arg) -{ - static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)]; - static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64))); - struct iovec vnet_iov[2][2] = { - { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) }, - { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } }, - { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) }, - { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } }, - }; - struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1]; - struct mic_info *mic = (struct mic_info *)arg; - char if_name[IFNAMSIZ]; - struct pollfd net_poll[MAX_NET_FD]; - struct mic_vring tx_vr, rx_vr; - struct mic_copy_desc copy; - struct mic_device_desc *desc; - int err; - - snprintf(if_name, IFNAMSIZ, "mic%d", mic->id); - mic->mic_net.tap_fd = tun_alloc(mic, if_name); - if (mic->mic_net.tap_fd < 0) - goto done; - - if (tap_configure(mic, if_name)) - goto done; - mpsslog("MIC name %s id %d\n", mic->name, mic->id); - - net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd; - net_poll[NET_FD_VIRTIO_NET].events = POLLIN; - net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd; - net_poll[NET_FD_TUN].events = POLLIN; - - if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd, - VIRTIO_ID_NET, &tx_vr, &rx_vr, - virtnet_dev_page.dd.num_vq)) { - mpsslog("%s init_vr failed %s\n", - mic->name, strerror(errno)); - goto done; - } - - copy.iovcnt = 2; - desc = get_device_desc(mic, VIRTIO_ID_NET); - - while (1) { - ssize_t len; - - net_poll[NET_FD_VIRTIO_NET].revents = 0; - net_poll[NET_FD_TUN].revents = 0; - - /* Start polling for data from tap and virtio net */ - err = poll(net_poll, 2, -1); - if (err < 0) { - mpsslog("%s poll failed %s\n", - __func__, strerror(errno)); - continue; - } - if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) { - err = wait_for_card_driver(mic, - mic->mic_net.virtio_net_fd, - VIRTIO_ID_NET); - if (err) { - mpsslog("%s %s %d Exiting...\n", - mic->name, __func__, __LINE__); - break; - } - } - /* - * Check if there is data to be read from TUN and write to - * virtio net fd if there is. - */ - if (net_poll[NET_FD_TUN].revents & POLLIN) { - copy.iov = iov0; - len = readv(net_poll[NET_FD_TUN].fd, - copy.iov, copy.iovcnt); - if (len > 0) { - struct virtio_net_hdr *hdr - = (struct virtio_net_hdr *)vnet_hdr[0]; - - /* Disable checksums on the card since we are on - a reliable PCIe link */ - hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; -#ifdef DEBUG - mpsslog("%s %s %d hdr->flags 0x%x ", mic->name, - __func__, __LINE__, hdr->flags); - mpsslog("copy.out_len %d hdr->gso_type 0x%x\n", - copy.out_len, hdr->gso_type); -#endif -#ifdef DEBUG - disp_iovec(mic, copy, __func__, __LINE__); - mpsslog("%s %s %d read from tap 0x%lx\n", - mic->name, __func__, __LINE__, - len); -#endif - spin_for_descriptors(mic, &tx_vr); - txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©, - len); - - err = mic_virtio_copy(mic, - mic->mic_net.virtio_net_fd, &tx_vr, - ©); - if (err < 0) { - mpsslog("%s %s %d mic_virtio_copy %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - } - if (!err) - verify_out_len(mic, ©); -#ifdef DEBUG - disp_iovec(mic, copy, __func__, __LINE__); - mpsslog("%s %s %d wrote to net 0x%lx\n", - mic->name, __func__, __LINE__, - sum_iovec_len(©)); -#endif - /* Reinitialize IOV for next run */ - iov0[1].iov_len = MAX_NET_PKT_SIZE; - } else if (len < 0) { - disp_iovec(mic, ©, __func__, __LINE__); - mpsslog("%s %s %d read failed %s ", mic->name, - __func__, __LINE__, strerror(errno)); - mpsslog("cnt %d sum %zd\n", - copy.iovcnt, sum_iovec_len(©)); - } - } - - /* - * Check if there is data to be read from virtio net and - * write to TUN if there is. - */ - if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) { - while (rx_vr.info->avail_idx != - le16toh(rx_vr.vr.avail->idx)) { - copy.iov = iov1; - txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©, - MAX_NET_PKT_SIZE - + sizeof(struct virtio_net_hdr)); - - err = mic_virtio_copy(mic, - mic->mic_net.virtio_net_fd, &rx_vr, - ©); - if (!err) { -#ifdef DEBUG - struct virtio_net_hdr *hdr - = (struct virtio_net_hdr *) - vnet_hdr[1]; - - mpsslog("%s %s %d hdr->flags 0x%x, ", - mic->name, __func__, __LINE__, - hdr->flags); - mpsslog("out_len %d gso_type 0x%x\n", - copy.out_len, - hdr->gso_type); -#endif - /* Set the correct output iov_len */ - iov1[1].iov_len = copy.out_len - - sizeof(struct virtio_net_hdr); - verify_out_len(mic, ©); -#ifdef DEBUG - disp_iovec(mic, copy, __func__, - __LINE__); - mpsslog("%s %s %d ", - mic->name, __func__, __LINE__); - mpsslog("read from net 0x%lx\n", - sum_iovec_len(copy)); -#endif - len = writev(net_poll[NET_FD_TUN].fd, - copy.iov, copy.iovcnt); - if (len != sum_iovec_len(©)) { - mpsslog("Tun write failed %s ", - strerror(errno)); - mpsslog("len 0x%zx ", len); - mpsslog("read_len 0x%zx\n", - sum_iovec_len(©)); - } else { -#ifdef DEBUG - disp_iovec(mic, ©, __func__, - __LINE__); - mpsslog("%s %s %d ", - mic->name, __func__, - __LINE__); - mpsslog("wrote to tap 0x%lx\n", - len); -#endif - } - } else { - mpsslog("%s %s %d mic_virtio_copy %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - break; - } - } - } - if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR) - mpsslog("%s: %s: POLLERR\n", __func__, mic->name); - } -done: - pthread_exit(NULL); -} - -/* virtio_console */ -#define VIRTIO_CONSOLE_FD 0 -#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1) -#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */ -#define MAX_BUFFER_SIZE PAGE_SIZE - -static void * -virtio_console(void *arg) -{ - static __u8 vcons_buf[2][PAGE_SIZE]; - struct iovec vcons_iov[2] = { - { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) }, - { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) }, - }; - struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1]; - struct mic_info *mic = (struct mic_info *)arg; - int err; - struct pollfd console_poll[MAX_CONSOLE_FD]; - int pty_fd; - char *pts_name; - ssize_t len; - struct mic_vring tx_vr, rx_vr; - struct mic_copy_desc copy; - struct mic_device_desc *desc; - - pty_fd = posix_openpt(O_RDWR); - if (pty_fd < 0) { - mpsslog("can't open a pseudoterminal master device: %s\n", - strerror(errno)); - goto _return; - } - pts_name = ptsname(pty_fd); - if (pts_name == NULL) { - mpsslog("can't get pts name\n"); - goto _close_pty; - } - printf("%s console message goes to %s\n", mic->name, pts_name); - mpsslog("%s console message goes to %s\n", mic->name, pts_name); - err = grantpt(pty_fd); - if (err < 0) { - mpsslog("can't grant access: %s %s\n", - pts_name, strerror(errno)); - goto _close_pty; - } - err = unlockpt(pty_fd); - if (err < 0) { - mpsslog("can't unlock a pseudoterminal: %s %s\n", - pts_name, strerror(errno)); - goto _close_pty; - } - console_poll[MONITOR_FD].fd = pty_fd; - console_poll[MONITOR_FD].events = POLLIN; - - console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd; - console_poll[VIRTIO_CONSOLE_FD].events = POLLIN; - - if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd, - VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr, - virtcons_dev_page.dd.num_vq)) { - mpsslog("%s init_vr failed %s\n", - mic->name, strerror(errno)); - goto _close_pty; - } - - copy.iovcnt = 1; - desc = get_device_desc(mic, VIRTIO_ID_CONSOLE); - - for (;;) { - console_poll[MONITOR_FD].revents = 0; - console_poll[VIRTIO_CONSOLE_FD].revents = 0; - err = poll(console_poll, MAX_CONSOLE_FD, -1); - if (err < 0) { - mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__, - strerror(errno)); - continue; - } - if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) { - err = wait_for_card_driver(mic, - mic->mic_console.virtio_console_fd, - VIRTIO_ID_CONSOLE); - if (err) { - mpsslog("%s %s %d Exiting...\n", - mic->name, __func__, __LINE__); - break; - } - } - - if (console_poll[MONITOR_FD].revents & POLLIN) { - copy.iov = iov0; - len = readv(pty_fd, copy.iov, copy.iovcnt); - if (len > 0) { -#ifdef DEBUG - disp_iovec(mic, copy, __func__, __LINE__); - mpsslog("%s %s %d read from tap 0x%lx\n", - mic->name, __func__, __LINE__, - len); -#endif - spin_for_descriptors(mic, &tx_vr); - txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr, - ©, len); - - err = mic_virtio_copy(mic, - mic->mic_console.virtio_console_fd, - &tx_vr, ©); - if (err < 0) { - mpsslog("%s %s %d mic_virtio_copy %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - } - if (!err) - verify_out_len(mic, ©); -#ifdef DEBUG - disp_iovec(mic, copy, __func__, __LINE__); - mpsslog("%s %s %d wrote to net 0x%lx\n", - mic->name, __func__, __LINE__, - sum_iovec_len(copy)); -#endif - /* Reinitialize IOV for next run */ - iov0->iov_len = PAGE_SIZE; - } else if (len < 0) { - disp_iovec(mic, ©, __func__, __LINE__); - mpsslog("%s %s %d read failed %s ", - mic->name, __func__, __LINE__, - strerror(errno)); - mpsslog("cnt %d sum %zd\n", - copy.iovcnt, sum_iovec_len(©)); - } - } - - if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) { - while (rx_vr.info->avail_idx != - le16toh(rx_vr.vr.avail->idx)) { - copy.iov = iov1; - txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr, - ©, PAGE_SIZE); - - err = mic_virtio_copy(mic, - mic->mic_console.virtio_console_fd, - &rx_vr, ©); - if (!err) { - /* Set the correct output iov_len */ - iov1->iov_len = copy.out_len; - verify_out_len(mic, ©); -#ifdef DEBUG - disp_iovec(mic, copy, __func__, - __LINE__); - mpsslog("%s %s %d ", - mic->name, __func__, __LINE__); - mpsslog("read from net 0x%lx\n", - sum_iovec_len(copy)); -#endif - len = writev(pty_fd, - copy.iov, copy.iovcnt); - if (len != sum_iovec_len(©)) { - mpsslog("Tun write failed %s ", - strerror(errno)); - mpsslog("len 0x%zx ", len); - mpsslog("read_len 0x%zx\n", - sum_iovec_len(©)); - } else { -#ifdef DEBUG - disp_iovec(mic, copy, __func__, - __LINE__); - mpsslog("%s %s %d ", - mic->name, __func__, - __LINE__); - mpsslog("wrote to tap 0x%lx\n", - len); -#endif - } - } else { - mpsslog("%s %s %d mic_virtio_copy %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - break; - } - } - } - if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR) - mpsslog("%s: %s: POLLERR\n", __func__, mic->name); - } -_close_pty: - close(pty_fd); -_return: - pthread_exit(NULL); -} - -static void -add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd) -{ - char path[PATH_MAX]; - int fd, err; - - snprintf(path, PATH_MAX, "/dev/mic%d", mic->id); - fd = open(path, O_RDWR); - if (fd < 0) { - mpsslog("Could not open %s %s\n", path, strerror(errno)); - return; - } - - err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd); - if (err < 0) { - mpsslog("Could not add %d %s\n", dd->type, strerror(errno)); - close(fd); - return; - } - switch (dd->type) { - case VIRTIO_ID_NET: - mic->mic_net.virtio_net_fd = fd; - mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name); - break; - case VIRTIO_ID_CONSOLE: - mic->mic_console.virtio_console_fd = fd; - mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name); - break; - case VIRTIO_ID_BLOCK: - mic->mic_virtblk.virtio_block_fd = fd; - mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name); - break; - } -} - -static bool -set_backend_file(struct mic_info *mic) -{ - FILE *config; - char buff[PATH_MAX], *line, *evv, *p; - - snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id); - config = fopen(buff, "r"); - if (config == NULL) - return false; - do { /* look for "virtblk_backend=XXXX" */ - line = fgets(buff, PATH_MAX, config); - if (line == NULL) - break; - if (*line == '#') - continue; - p = strchr(line, '\n'); - if (p) - *p = '\0'; - } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0); - fclose(config); - if (line == NULL) - return false; - evv = strchr(line, '='); - if (evv == NULL) - return false; - mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1); - if (mic->mic_virtblk.backend_file == NULL) { - mpsslog("%s %d can't allocate memory\n", mic->name, mic->id); - return false; - } - strcpy(mic->mic_virtblk.backend_file, evv + 1); - return true; -} - -#define SECTOR_SIZE 512 -static bool -set_backend_size(struct mic_info *mic) -{ - mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0, - SEEK_END); - if (mic->mic_virtblk.backend_size < 0) { - mpsslog("%s: can't seek: %s\n", - mic->name, mic->mic_virtblk.backend_file); - return false; - } - virtblk_dev_page.blk_config.capacity = - mic->mic_virtblk.backend_size / SECTOR_SIZE; - if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0) - virtblk_dev_page.blk_config.capacity++; - - virtblk_dev_page.blk_config.capacity = - htole64(virtblk_dev_page.blk_config.capacity); - - return true; -} - -static bool -open_backend(struct mic_info *mic) -{ - if (!set_backend_file(mic)) - goto _error_exit; - mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR); - if (mic->mic_virtblk.backend < 0) { - mpsslog("%s: can't open: %s\n", mic->name, - mic->mic_virtblk.backend_file); - goto _error_free; - } - if (!set_backend_size(mic)) - goto _error_close; - mic->mic_virtblk.backend_addr = mmap(NULL, - mic->mic_virtblk.backend_size, - PROT_READ|PROT_WRITE, MAP_SHARED, - mic->mic_virtblk.backend, 0L); - if (mic->mic_virtblk.backend_addr == MAP_FAILED) { - mpsslog("%s: can't map: %s %s\n", - mic->name, mic->mic_virtblk.backend_file, - strerror(errno)); - goto _error_close; - } - return true; - - _error_close: - close(mic->mic_virtblk.backend); - _error_free: - free(mic->mic_virtblk.backend_file); - _error_exit: - return false; -} - -static void -close_backend(struct mic_info *mic) -{ - munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size); - close(mic->mic_virtblk.backend); - free(mic->mic_virtblk.backend_file); -} - -static bool -start_virtblk(struct mic_info *mic, struct mic_vring *vring) -{ - if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) { - mpsslog("%s: blk_config is not 8 byte aligned.\n", - mic->name); - return false; - } - add_virtio_device(mic, &virtblk_dev_page.dd); - if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd, - VIRTIO_ID_BLOCK, vring, NULL, - virtblk_dev_page.dd.num_vq)) { - mpsslog("%s init_vr failed %s\n", - mic->name, strerror(errno)); - return false; - } - return true; -} - -static void -stop_virtblk(struct mic_info *mic) -{ - int vr_size, ret; - - vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES, - MIC_VIRTIO_RING_ALIGN) + - sizeof(struct _mic_vring_info)); - ret = munmap(mic->mic_virtblk.block_dp, - MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq); - if (ret < 0) - mpsslog("%s munmap errno %d\n", mic->name, errno); - close(mic->mic_virtblk.virtio_block_fd); -} - -static __u8 -header_error_check(struct vring_desc *desc) -{ - if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) { - mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n", - __func__, __LINE__); - return -EIO; - } - if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) { - mpsslog("%s() %d: alone\n", - __func__, __LINE__); - return -EIO; - } - if (le16toh(desc->flags) & VRING_DESC_F_WRITE) { - mpsslog("%s() %d: not read\n", - __func__, __LINE__); - return -EIO; - } - return 0; -} - -static int -read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx) -{ - struct iovec iovec; - struct mic_copy_desc copy; - - iovec.iov_len = sizeof(*hdr); - iovec.iov_base = hdr; - copy.iov = &iovec; - copy.iovcnt = 1; - copy.vr_idx = 0; /* only one vring on virtio_block */ - copy.update_used = false; /* do not update used index */ - return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); -} - -static int -transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt) -{ - struct mic_copy_desc copy; - - copy.iov = iovec; - copy.iovcnt = iovcnt; - copy.vr_idx = 0; /* only one vring on virtio_block */ - copy.update_used = false; /* do not update used index */ - return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); -} - -static __u8 -status_error_check(struct vring_desc *desc) -{ - if (le32toh(desc->len) != sizeof(__u8)) { - mpsslog("%s() %d: length is not sizeof(status)\n", - __func__, __LINE__); - return -EIO; - } - return 0; -} - -static int -write_status(int fd, __u8 *status) -{ - struct iovec iovec; - struct mic_copy_desc copy; - - iovec.iov_base = status; - iovec.iov_len = sizeof(*status); - copy.iov = &iovec; - copy.iovcnt = 1; - copy.vr_idx = 0; /* only one vring on virtio_block */ - copy.update_used = true; /* Update used index */ - return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); -} - -#ifndef VIRTIO_BLK_T_GET_ID -#define VIRTIO_BLK_T_GET_ID 8 -#endif - -static void * -virtio_block(void *arg) -{ - struct mic_info *mic = (struct mic_info *)arg; - int ret; - struct pollfd block_poll; - struct mic_vring vring; - __u16 avail_idx; - __u32 desc_idx; - struct vring_desc *desc; - struct iovec *iovec, *piov; - __u8 status; - __u32 buffer_desc_idx; - struct virtio_blk_outhdr hdr; - void *fos; - - for (;;) { /* forever */ - if (!open_backend(mic)) { /* No virtblk */ - for (mic->mic_virtblk.signaled = 0; - !mic->mic_virtblk.signaled;) - sleep(1); - continue; - } - - /* backend file is specified. */ - if (!start_virtblk(mic, &vring)) - goto _close_backend; - iovec = malloc(sizeof(*iovec) * - le32toh(virtblk_dev_page.blk_config.seg_max)); - if (!iovec) { - mpsslog("%s: can't alloc iovec: %s\n", - mic->name, strerror(ENOMEM)); - goto _stop_virtblk; - } - - block_poll.fd = mic->mic_virtblk.virtio_block_fd; - block_poll.events = POLLIN; - for (mic->mic_virtblk.signaled = 0; - !mic->mic_virtblk.signaled;) { - block_poll.revents = 0; - /* timeout in 1 sec to see signaled */ - ret = poll(&block_poll, 1, 1000); - if (ret < 0) { - mpsslog("%s %d: poll failed: %s\n", - __func__, __LINE__, - strerror(errno)); - continue; - } - - if (!(block_poll.revents & POLLIN)) { -#ifdef DEBUG - mpsslog("%s %d: block_poll.revents=0x%x\n", - __func__, __LINE__, block_poll.revents); -#endif - continue; - } - - /* POLLIN */ - while (vring.info->avail_idx != - le16toh(vring.vr.avail->idx)) { - /* read header element */ - avail_idx = - vring.info->avail_idx & - (vring.vr.num - 1); - desc_idx = le16toh( - vring.vr.avail->ring[avail_idx]); - desc = &vring.vr.desc[desc_idx]; -#ifdef DEBUG - mpsslog("%s() %d: avail_idx=%d ", - __func__, __LINE__, - vring.info->avail_idx); - mpsslog("vring.vr.num=%d desc=%p\n", - vring.vr.num, desc); -#endif - status = header_error_check(desc); - ret = read_header( - mic->mic_virtblk.virtio_block_fd, - &hdr, desc_idx); - if (ret < 0) { - mpsslog("%s() %d %s: ret=%d %s\n", - __func__, __LINE__, - mic->name, ret, - strerror(errno)); - break; - } - /* buffer element */ - piov = iovec; - status = 0; - fos = mic->mic_virtblk.backend_addr + - (hdr.sector * SECTOR_SIZE); - buffer_desc_idx = next_desc(desc); - desc_idx = buffer_desc_idx; - for (desc = &vring.vr.desc[buffer_desc_idx]; - desc->flags & VRING_DESC_F_NEXT; - desc_idx = next_desc(desc), - desc = &vring.vr.desc[desc_idx]) { - piov->iov_len = desc->len; - piov->iov_base = fos; - piov++; - fos += desc->len; - } - /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */ - if (hdr.type & ~(VIRTIO_BLK_T_OUT | - VIRTIO_BLK_T_GET_ID)) { - /* - VIRTIO_BLK_T_IN - does not do - anything. Probably for documenting. - VIRTIO_BLK_T_SCSI_CMD - for - virtio_scsi. - VIRTIO_BLK_T_FLUSH - turned off in - config space. - VIRTIO_BLK_T_BARRIER - defined but not - used in anywhere. - */ - mpsslog("%s() %d: type %x ", - __func__, __LINE__, - hdr.type); - mpsslog("is not supported\n"); - status = -ENOTSUP; - - } else { - ret = transfer_blocks( - mic->mic_virtblk.virtio_block_fd, - iovec, - piov - iovec); - if (ret < 0 && - status != 0) - status = ret; - } - /* write status and update used pointer */ - if (status != 0) - status = status_error_check(desc); - ret = write_status( - mic->mic_virtblk.virtio_block_fd, - &status); -#ifdef DEBUG - mpsslog("%s() %d: write status=%d on desc=%p\n", - __func__, __LINE__, - status, desc); -#endif - } - } - free(iovec); -_stop_virtblk: - stop_virtblk(mic); -_close_backend: - close_backend(mic); - } /* forever */ - - pthread_exit(NULL); -} - -static void -reset(struct mic_info *mic) -{ -#define RESET_TIMEOUT 120 - int i = RESET_TIMEOUT; - setsysfs(mic->name, "state", "reset"); - while (i) { - char *state; - state = readsysfs(mic->name, "state"); - if (!state) - goto retry; - mpsslog("%s: %s %d state %s\n", - mic->name, __func__, __LINE__, state); - - if (!strcmp(state, "ready")) { - free(state); - break; - } - free(state); -retry: - sleep(1); - i--; - } -} - -static int -get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status) -{ - if (!strcmp(shutdown_status, "nop")) - return MIC_NOP; - if (!strcmp(shutdown_status, "crashed")) - return MIC_CRASHED; - if (!strcmp(shutdown_status, "halted")) - return MIC_HALTED; - if (!strcmp(shutdown_status, "poweroff")) - return MIC_POWER_OFF; - if (!strcmp(shutdown_status, "restart")) - return MIC_RESTART; - mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status); - /* Invalid state */ - assert(0); -}; - -static int get_mic_state(struct mic_info *mic) -{ - char *state = NULL; - enum mic_states mic_state; - - while (!state) { - state = readsysfs(mic->name, "state"); - sleep(1); - } - mpsslog("%s: %s %d state %s\n", - mic->name, __func__, __LINE__, state); - - if (!strcmp(state, "ready")) { - mic_state = MIC_READY; - } else if (!strcmp(state, "booting")) { - mic_state = MIC_BOOTING; - } else if (!strcmp(state, "online")) { - mic_state = MIC_ONLINE; - } else if (!strcmp(state, "shutting_down")) { - mic_state = MIC_SHUTTING_DOWN; - } else if (!strcmp(state, "reset_failed")) { - mic_state = MIC_RESET_FAILED; - } else if (!strcmp(state, "resetting")) { - mic_state = MIC_RESETTING; - } else { - mpsslog("%s: BUG invalid state %s\n", mic->name, state); - assert(0); - } - - free(state); - return mic_state; -}; - -static void mic_handle_shutdown(struct mic_info *mic) -{ -#define SHUTDOWN_TIMEOUT 60 - int i = SHUTDOWN_TIMEOUT; - char *shutdown_status; - while (i) { - shutdown_status = readsysfs(mic->name, "shutdown_status"); - if (!shutdown_status) { - sleep(1); - continue; - } - mpsslog("%s: %s %d shutdown_status %s\n", - mic->name, __func__, __LINE__, shutdown_status); - switch (get_mic_shutdown_status(mic, shutdown_status)) { - case MIC_RESTART: - mic->restart = 1; - case MIC_HALTED: - case MIC_POWER_OFF: - case MIC_CRASHED: - free(shutdown_status); - goto reset; - default: - break; - } - free(shutdown_status); - sleep(1); - i--; - } -reset: - if (!i) - mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n", - mic->name, __func__, __LINE__, shutdown_status); - reset(mic); -} - -static int open_state_fd(struct mic_info *mic) -{ - char pathname[PATH_MAX]; - int fd; - - snprintf(pathname, PATH_MAX - 1, "%s/%s/%s", - MICSYSFSDIR, mic->name, "state"); - - fd = open(pathname, O_RDONLY); - if (fd < 0) - mpsslog("%s: opening file %s failed %s\n", - mic->name, pathname, strerror(errno)); - return fd; -} - -static int block_till_state_change(int fd, struct mic_info *mic) -{ - struct pollfd ufds[1]; - char value[PAGE_SIZE]; - int ret; - - ufds[0].fd = fd; - ufds[0].events = POLLERR | POLLPRI; - ret = poll(ufds, 1, -1); - if (ret < 0) { - mpsslog("%s: %s %d poll failed %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - return ret; - } - - ret = lseek(fd, 0, SEEK_SET); - if (ret < 0) { - mpsslog("%s: %s %d Failed to seek to 0: %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - return ret; - } - - ret = read(fd, value, sizeof(value)); - if (ret < 0) { - mpsslog("%s: %s %d Failed to read sysfs entry: %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - return ret; - } - - return 0; -} - -static void * -mic_config(void *arg) -{ - struct mic_info *mic = (struct mic_info *)arg; - int fd, ret, stat = 0; - - fd = open_state_fd(mic); - if (fd < 0) { - mpsslog("%s: %s %d open state fd failed %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - goto exit; - } - - do { - ret = block_till_state_change(fd, mic); - if (ret < 0) { - mpsslog("%s: %s %d block_till_state_change error %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - goto close_exit; - } - - switch (get_mic_state(mic)) { - case MIC_SHUTTING_DOWN: - mic_handle_shutdown(mic); - break; - case MIC_READY: - case MIC_RESET_FAILED: - ret = kill(mic->pid, SIGTERM); - mpsslog("%s: %s %d kill pid %d ret %d\n", - mic->name, __func__, __LINE__, - mic->pid, ret); - if (!ret) { - ret = waitpid(mic->pid, &stat, - WIFSIGNALED(stat)); - mpsslog("%s: %s %d waitpid ret %d pid %d\n", - mic->name, __func__, __LINE__, - ret, mic->pid); - } - if (mic->boot_on_resume) { - setsysfs(mic->name, "state", "boot"); - mic->boot_on_resume = 0; - } - goto close_exit; - default: - break; - } - } while (1); - -close_exit: - close(fd); -exit: - init_mic(mic); - pthread_exit(NULL); -} - -static void -set_cmdline(struct mic_info *mic) -{ - char buffer[PATH_MAX]; - int len; - - len = snprintf(buffer, PATH_MAX, - "clocksource=tsc highres=off nohz=off "); - len += snprintf(buffer + len, PATH_MAX - len, - "cpufreq_on;corec6_off;pc3_off;pc6_off "); - len += snprintf(buffer + len, PATH_MAX - len, - "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0", - mic->id + 1); - - setsysfs(mic->name, "cmdline", buffer); - mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer); - snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1); - mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer); -} - -static void -set_log_buf_info(struct mic_info *mic) -{ - int fd; - off_t len; - char system_map[] = "/lib/firmware/mic/System.map"; - char *map, *temp, log_buf[17] = {'\0'}; - - fd = open(system_map, O_RDONLY); - if (fd < 0) { - mpsslog("%s: Opening System.map failed: %d\n", - mic->name, errno); - return; - } - len = lseek(fd, 0, SEEK_END); - if (len < 0) { - mpsslog("%s: Reading System.map size failed: %d\n", - mic->name, errno); - close(fd); - return; - } - map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); - if (map == MAP_FAILED) { - mpsslog("%s: mmap of System.map failed: %d\n", - mic->name, errno); - close(fd); - return; - } - temp = strstr(map, "__log_buf"); - if (!temp) { - mpsslog("%s: __log_buf not found: %d\n", mic->name, errno); - munmap(map, len); - close(fd); - return; - } - strncpy(log_buf, temp - 19, 16); - setsysfs(mic->name, "log_buf_addr", log_buf); - mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf); - temp = strstr(map, "log_buf_len"); - if (!temp) { - mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno); - munmap(map, len); - close(fd); - return; - } - strncpy(log_buf, temp - 19, 16); - setsysfs(mic->name, "log_buf_len", log_buf); - mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf); - munmap(map, len); - close(fd); -} - -static void -change_virtblk_backend(int x, siginfo_t *siginfo, void *p) -{ - struct mic_info *mic; - - for (mic = mic_list.next; mic != NULL; mic = mic->next) - mic->mic_virtblk.signaled = 1/* true */; -} - -static void -set_mic_boot_params(struct mic_info *mic) -{ - set_log_buf_info(mic); - set_cmdline(mic); -} - -static void * -init_mic(void *arg) -{ - struct mic_info *mic = (struct mic_info *)arg; - struct sigaction ignore = { - .sa_flags = 0, - .sa_handler = SIG_IGN - }; - struct sigaction act = { - .sa_flags = SA_SIGINFO, - .sa_sigaction = change_virtblk_backend, - }; - char buffer[PATH_MAX]; - int err, fd; - - /* - * Currently, one virtio block device is supported for each MIC card - * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon. - * The signal informs the virtio block backend about a change in the - * configuration file which specifies the virtio backend file name on - * the host. Virtio block backend then re-reads the configuration file - * and switches to the new block device. This signalling mechanism may - * not be required once multiple virtio block devices are supported by - * the MIC daemon. - */ - sigaction(SIGUSR1, &ignore, NULL); -retry: - fd = open_state_fd(mic); - if (fd < 0) { - mpsslog("%s: %s %d open state fd failed %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - sleep(2); - goto retry; - } - - if (mic->restart) { - snprintf(buffer, PATH_MAX, "boot"); - setsysfs(mic->name, "state", buffer); - mpsslog("%s restarting mic %d\n", - mic->name, mic->restart); - mic->restart = 0; - } - - while (1) { - while (block_till_state_change(fd, mic)) { - mpsslog("%s: %s %d block_till_state_change error %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - sleep(2); - continue; - } - - if (get_mic_state(mic) == MIC_BOOTING) - break; - } - - mic->pid = fork(); - switch (mic->pid) { - case 0: - add_virtio_device(mic, &virtcons_dev_page.dd); - add_virtio_device(mic, &virtnet_dev_page.dd); - err = pthread_create(&mic->mic_console.console_thread, NULL, - virtio_console, mic); - if (err) - mpsslog("%s virtcons pthread_create failed %s\n", - mic->name, strerror(err)); - err = pthread_create(&mic->mic_net.net_thread, NULL, - virtio_net, mic); - if (err) - mpsslog("%s virtnet pthread_create failed %s\n", - mic->name, strerror(err)); - err = pthread_create(&mic->mic_virtblk.block_thread, NULL, - virtio_block, mic); - if (err) - mpsslog("%s virtblk pthread_create failed %s\n", - mic->name, strerror(err)); - sigemptyset(&act.sa_mask); - err = sigaction(SIGUSR1, &act, NULL); - if (err) - mpsslog("%s sigaction SIGUSR1 failed %s\n", - mic->name, strerror(errno)); - while (1) - sleep(60); - case -1: - mpsslog("fork failed MIC name %s id %d errno %d\n", - mic->name, mic->id, errno); - break; - default: - err = pthread_create(&mic->config_thread, NULL, - mic_config, mic); - if (err) - mpsslog("%s mic_config pthread_create failed %s\n", - mic->name, strerror(err)); - } - - return NULL; -} - -static void -start_daemon(void) -{ - struct mic_info *mic; - int err; - - for (mic = mic_list.next; mic; mic = mic->next) { - set_mic_boot_params(mic); - err = pthread_create(&mic->init_thread, NULL, init_mic, mic); - if (err) - mpsslog("%s init_mic pthread_create failed %s\n", - mic->name, strerror(err)); - } - - while (1) - sleep(60); -} - -static int -init_mic_list(void) -{ - struct mic_info *mic = &mic_list; - struct dirent *file; - DIR *dp; - int cnt = 0; - - dp = opendir(MICSYSFSDIR); - if (!dp) - return 0; - - while ((file = readdir(dp)) != NULL) { - if (!strncmp(file->d_name, "mic", 3)) { - mic->next = calloc(1, sizeof(struct mic_info)); - if (mic->next) { - mic = mic->next; - mic->id = atoi(&file->d_name[3]); - mic->name = malloc(strlen(file->d_name) + 16); - if (mic->name) - strcpy(mic->name, file->d_name); - mpsslog("MIC name %s id %d\n", mic->name, - mic->id); - cnt++; - } - } - } - - closedir(dp); - return cnt; -} - -void -mpsslog(char *format, ...) -{ - va_list args; - char buffer[4096]; - char ts[52], *ts1; - time_t t; - - if (logfp == NULL) - return; - - va_start(args, format); - vsprintf(buffer, format, args); - va_end(args); - - time(&t); - ts1 = ctime_r(&t, ts); - ts1[strlen(ts1) - 1] = '\0'; - fprintf(logfp, "%s: %s", ts1, buffer); - - fflush(logfp); -} - -int -main(int argc, char *argv[]) -{ - int cnt; - pid_t pid; - - myname = argv[0]; - - logfp = fopen(LOGFILE_NAME, "a+"); - if (!logfp) { - fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME); - exit(1); - } - pid = fork(); - switch (pid) { - case 0: - break; - case -1: - exit(2); - default: - exit(0); - } - - mpsslog("MIC Daemon start\n"); - - cnt = init_mic_list(); - if (cnt == 0) { - mpsslog("MIC module not loaded\n"); - exit(3); - } - mpsslog("MIC found %d devices\n", cnt); - - start_daemon(); - - exit(0); -} --- a/Documentation/mic/mpssd/mpssd.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC User Space Tools. - */ -#ifndef _MPSSD_H_ -#define _MPSSD_H_ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> -#include <dirent.h> -#include <libgen.h> -#include <pthread.h> -#include <stdarg.h> -#include <time.h> -#include <errno.h> -#include <sys/dir.h> -#include <sys/ioctl.h> -#include <sys/poll.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/mman.h> -#include <sys/utsname.h> -#include <sys/wait.h> -#include <netinet/in.h> -#include <arpa/inet.h> -#include <netdb.h> -#include <pthread.h> -#include <signal.h> -#include <limits.h> -#include <syslog.h> -#include <getopt.h> -#include <net/if.h> -#include <linux/if_tun.h> -#include <linux/if_tun.h> -#include <linux/virtio_ids.h> - -#define MICSYSFSDIR "/sys/class/mic" -#define LOGFILE_NAME "/var/log/mpssd" -#define PAGE_SIZE 4096 - -struct mic_console_info { - pthread_t console_thread; - int virtio_console_fd; - void *console_dp; -}; - -struct mic_net_info { - pthread_t net_thread; - int virtio_net_fd; - int tap_fd; - void *net_dp; -}; - -struct mic_virtblk_info { - pthread_t block_thread; - int virtio_block_fd; - void *block_dp; - volatile sig_atomic_t signaled; - char *backend_file; - int backend; - void *backend_addr; - long backend_size; -}; - -struct mic_info { - int id; - char *name; - pthread_t config_thread; - pthread_t init_thread; - pid_t pid; - struct mic_console_info mic_console; - struct mic_net_info mic_net; - struct mic_virtblk_info mic_virtblk; - int restart; - int boot_on_resume; - struct mic_info *next; -}; - -__attribute__((format(printf, 1, 2))) -void mpsslog(char *format, ...); -char *readsysfs(char *dir, char *entry); -int setsysfs(char *dir, char *entry, char *value); -#endif --- a/Documentation/mic/mpssd/sysfs.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC User Space Tools. - */ - -#include "mpssd.h" - -#define PAGE_SIZE 4096 - -char * -readsysfs(char *dir, char *entry) -{ - char filename[PATH_MAX]; - char value[PAGE_SIZE]; - char *string = NULL; - int fd; - int len; - - if (dir == NULL) - snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); - else - snprintf(filename, PATH_MAX, - "%s/%s/%s", MICSYSFSDIR, dir, entry); - - fd = open(filename, O_RDONLY); - if (fd < 0) { - mpsslog("Failed to open sysfs entry '%s': %s\n", - filename, strerror(errno)); - return NULL; - } - - len = read(fd, value, sizeof(value)); - if (len < 0) { - mpsslog("Failed to read sysfs entry '%s': %s\n", - filename, strerror(errno)); - goto readsys_ret; - } - if (len == 0) - goto readsys_ret; - - value[len - 1] = '\0'; - - string = malloc(strlen(value) + 1); - if (string) - strcpy(string, value); - -readsys_ret: - close(fd); - return string; -} - -int -setsysfs(char *dir, char *entry, char *value) -{ - char filename[PATH_MAX]; - char *oldvalue; - int fd, ret = 0; - - if (dir == NULL) - snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); - else - snprintf(filename, PATH_MAX, "%s/%s/%s", - MICSYSFSDIR, dir, entry); - - oldvalue = readsysfs(dir, entry); - - fd = open(filename, O_RDWR); - if (fd < 0) { - ret = errno; - mpsslog("Failed to open sysfs entry '%s': %s\n", - filename, strerror(errno)); - goto done; - } - - if (!oldvalue || strcmp(value, oldvalue)) { - if (write(fd, value, strlen(value)) < 0) { - ret = errno; - mpsslog("Failed to write new sysfs entry '%s': %s\n", - filename, strerror(errno)); - } - } - close(fd); -done: - if (oldvalue) - free(oldvalue); - return ret; -} --- /dev/null +++ b/samples/mic/mpssd/.gitignore @@ -0,0 +1 @@ +mpssd --- /dev/null +++ b/samples/mic/mpssd/Makefile @@ -0,0 +1,27 @@ +ifndef CROSS_COMPILE +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq ($(ARCH),x86) + +PROGS := mpssd +CC = $(CROSS_COMPILE)gcc +CFLAGS := -I../../../usr/include -I../../../tools/include + +ifdef DEBUG +CFLAGS += -DDEBUG=$(DEBUG) +endif + +all: $(PROGS) +mpssd: mpssd.c sysfs.c + $(CC) $(CFLAGS) mpssd.c sysfs.c -o mpssd -lpthread + +install: + install mpssd /usr/sbin/mpssd + install micctrl /usr/sbin/micctrl + +clean: + rm -fr $(PROGS) + +endif +endif --- /dev/null +++ b/samples/mic/mpssd/micctrl @@ -0,0 +1,173 @@ +#!/bin/bash +# Intel MIC Platform Software Stack (MPSS) +# +# Copyright(c) 2013 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Intel MIC User Space Tools. +# +# micctrl - Controls MIC boot/start/stop. +# +# chkconfig: 2345 95 05 +# description: start MPSS stack processing. +# +### BEGIN INIT INFO +# Provides: micctrl +### END INIT INFO + +# Source function library. +. /etc/init.d/functions + +sysfs="/sys/class/mic" + +_status() +{ + f=$sysfs/$1 + echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`" +} + +status() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _status $1 + return $? + fi + for f in $sysfs/* + do + _status `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_reset() +{ + f=$sysfs/$1 + echo reset > $f/state +} + +reset() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _reset $1 + return $? + fi + for f in $sysfs/* + do + _reset `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_boot() +{ + f=$sysfs/$1 + echo "linux" > $f/bootmode + echo "mic/uos.img" > $f/firmware + echo "mic/$1.image" > $f/ramdisk + echo "boot" > $f/state +} + +boot() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _boot $1 + return $? + fi + for f in $sysfs/* + do + _boot `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_shutdown() +{ + f=$sysfs/$1 + echo shutdown > $f/state +} + +shutdown() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _shutdown $1 + return $? + fi + for f in $sysfs/* + do + _shutdown `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_wait() +{ + f=$sysfs/$1 + while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ] + do + sleep 1 + echo -e "Waiting for $1 to go offline" + done +} + +wait() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _wait $1 + return $? + fi + # Wait for the cards to go offline + for f in $sysfs/* + do + _wait `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +if [ ! -d "$sysfs" ]; then + echo -e $"Module unloaded " + exit 3 +fi + +case $1 in + -s) + status $2 + ;; + -r) + reset $2 + ;; + -b) + boot $2 + ;; + -S) + shutdown $2 + ;; + -w) + wait $2 + ;; + *) + echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}" + exit 2 +esac + +exit $? --- /dev/null +++ b/samples/mic/mpssd/mpss @@ -0,0 +1,200 @@ +#!/bin/bash +# Intel MIC Platform Software Stack (MPSS) +# +# Copyright(c) 2013 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Intel MIC User Space Tools. +# +# mpss Start mpssd. +# +# chkconfig: 2345 95 05 +# description: start MPSS stack processing. +# +### BEGIN INIT INFO +# Provides: mpss +# Required-Start: +# Required-Stop: +# Short-Description: MPSS stack control +# Description: MPSS stack control +### END INIT INFO + +# Source function library. +. /etc/init.d/functions + +exec=/usr/sbin/mpssd +sysfs="/sys/class/mic" +mic_modules="mic_host mic_x100_dma scif" + +start() +{ + [ -x $exec ] || exit 5 + + if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then + echo -e $"MPSSD already running! " + success + echo + return 0 + fi + + echo -e $"Starting MPSS Stack" + echo -e $"Loading MIC drivers:" $mic_modules + + modprobe -a $mic_modules + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + failure + echo + return $RETVAL + fi + + # Start the daemon + echo -n $"Starting MPSSD " + $exec + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + failure + echo + return $RETVAL + fi + success + echo + + sleep 5 + + # Boot the cards + micctrl -b + + # Wait till ping works + for f in $sysfs/* + do + count=100 + ipaddr=`cat $f/cmdline` + ipaddr=${ipaddr#*address,} + ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1` + while [ $count -ge 0 ] + do + echo -e "Pinging "`basename $f`" " + ping -c 1 $ipaddr &> /dev/null + RETVAL=$? + if [ $RETVAL -eq 0 ]; then + success + break + fi + sleep 1 + count=`expr $count - 1` + done + [ $RETVAL -ne 0 ] && failure || success + echo + done + return $RETVAL +} + +stop() +{ + echo -e $"Shutting down MPSS Stack: " + + # Bail out if module is unloaded + if [ ! -d "$sysfs" ]; then + echo -n $"Module unloaded " + success + echo + return 0 + fi + + # Shut down the cards. + micctrl -S + + # Wait for the cards to go offline + for f in $sysfs/* + do + while [ "`cat $f/state`" != "ready" ] + do + sleep 1 + echo -e "Waiting for "`basename $f`" to become ready" + done + done + + # Display the status of the cards + micctrl -s + + # Kill MPSSD now + echo -n $"Killing MPSSD" + killall -9 mpssd 2>/dev/null + RETVAL=$? + [ $RETVAL -ne 0 ] && failure || success + echo + return $RETVAL +} + +restart() +{ + stop + sleep 5 + start +} + +status() +{ + micctrl -s + if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then + echo "mpssd is running" + else + echo "mpssd is stopped" + fi + return 0 +} + +unload() +{ + if [ ! -d "$sysfs" ]; then + echo -n $"No MIC_HOST Module: " + success + echo + return + fi + + stop + + sleep 5 + echo -n $"Removing MIC drivers:" $mic_modules + modprobe -r $mic_modules + RETVAL=$? + [ $RETVAL -ne 0 ] && failure || success + echo + return $RETVAL +} + +case $1 in + start) + start + ;; + stop) + stop + ;; + restart) + restart + ;; + status) + status + ;; + unload) + unload + ;; + *) + echo $"Usage: $0 {start|stop|restart|status|unload}" + exit 2 +esac + +exit $? --- /dev/null +++ b/samples/mic/mpssd/mpssd.c @@ -0,0 +1,1826 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC User Space Tools. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <fcntl.h> +#include <getopt.h> +#include <assert.h> +#include <unistd.h> +#include <stdbool.h> +#include <signal.h> +#include <poll.h> +#include <features.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/virtio_ring.h> +#include <linux/virtio_net.h> +#include <linux/virtio_console.h> +#include <linux/virtio_blk.h> +#include <linux/version.h> +#include "mpssd.h" +#include <linux/mic_ioctl.h> +#include <linux/mic_common.h> +#include <tools/endian.h> + +static void *init_mic(void *arg); + +static FILE *logfp; +static struct mic_info mic_list; + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1 : __min2; }) + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) +#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr, size) _ALIGN_UP(addr, size) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#define GSO_ENABLED 1 +#define MAX_GSO_SIZE (64 * 1024) +#define ETH_H_LEN 14 +#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64)) +#define MIC_DEVICE_PAGE_END 0x1000 + +#ifndef VIRTIO_NET_HDR_F_DATA_VALID +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ +#endif + +static struct { + struct mic_device_desc dd; + struct mic_vqconfig vqconfig[2]; + __u32 host_features, guest_acknowledgements; + struct virtio_console_config cons_config; +} virtcons_dev_page = { + .dd = { + .type = VIRTIO_ID_CONSOLE, + .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig), + .feature_len = sizeof(virtcons_dev_page.host_features), + .config_len = sizeof(virtcons_dev_page.cons_config), + }, + .vqconfig[0] = { + .num = htole16(MIC_VRING_ENTRIES), + }, + .vqconfig[1] = { + .num = htole16(MIC_VRING_ENTRIES), + }, +}; + +static struct { + struct mic_device_desc dd; + struct mic_vqconfig vqconfig[2]; + __u32 host_features, guest_acknowledgements; + struct virtio_net_config net_config; +} virtnet_dev_page = { + .dd = { + .type = VIRTIO_ID_NET, + .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig), + .feature_len = sizeof(virtnet_dev_page.host_features), + .config_len = sizeof(virtnet_dev_page.net_config), + }, + .vqconfig[0] = { + .num = htole16(MIC_VRING_ENTRIES), + }, + .vqconfig[1] = { + .num = htole16(MIC_VRING_ENTRIES), + }, +#if GSO_ENABLED + .host_features = htole32( + 1 << VIRTIO_NET_F_CSUM | + 1 << VIRTIO_NET_F_GSO | + 1 << VIRTIO_NET_F_GUEST_TSO4 | + 1 << VIRTIO_NET_F_GUEST_TSO6 | + 1 << VIRTIO_NET_F_GUEST_ECN), +#else + .host_features = 0, +#endif +}; + +static const char *mic_config_dir = "/etc/mpss"; +static const char *virtblk_backend = "VIRTBLK_BACKEND"; +static struct { + struct mic_device_desc dd; + struct mic_vqconfig vqconfig[1]; + __u32 host_features, guest_acknowledgements; + struct virtio_blk_config blk_config; +} virtblk_dev_page = { + .dd = { + .type = VIRTIO_ID_BLOCK, + .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig), + .feature_len = sizeof(virtblk_dev_page.host_features), + .config_len = sizeof(virtblk_dev_page.blk_config), + }, + .vqconfig[0] = { + .num = htole16(MIC_VRING_ENTRIES), + }, + .host_features = + htole32(1<<VIRTIO_BLK_F_SEG_MAX), + .blk_config = { + .seg_max = htole32(MIC_VRING_ENTRIES - 2), + .capacity = htole64(0), + } +}; + +static char *myname; + +static int +tap_configure(struct mic_info *mic, char *dev) +{ + pid_t pid; + char *ifargv[7]; + char ipaddr[IFNAMSIZ]; + int ret = 0; + + pid = fork(); + if (pid == 0) { + ifargv[0] = "ip"; + ifargv[1] = "link"; + ifargv[2] = "set"; + ifargv[3] = dev; + ifargv[4] = "up"; + ifargv[5] = NULL; + mpsslog("Configuring %s\n", dev); + ret = execvp("ip", ifargv); + if (ret < 0) { + mpsslog("%s execvp failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + } + if (pid < 0) { + mpsslog("%s fork failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + + ret = waitpid(pid, NULL, 0); + if (ret < 0) { + mpsslog("%s waitpid failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + + snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1); + + pid = fork(); + if (pid == 0) { + ifargv[0] = "ip"; + ifargv[1] = "addr"; + ifargv[2] = "add"; + ifargv[3] = ipaddr; + ifargv[4] = "dev"; + ifargv[5] = dev; + ifargv[6] = NULL; + mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr); + ret = execvp("ip", ifargv); + if (ret < 0) { + mpsslog("%s execvp failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + } + if (pid < 0) { + mpsslog("%s fork failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + + ret = waitpid(pid, NULL, 0); + if (ret < 0) { + mpsslog("%s waitpid failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + mpsslog("MIC name %s %s %d DONE!\n", + mic->name, __func__, __LINE__); + return 0; +} + +static int tun_alloc(struct mic_info *mic, char *dev) +{ + struct ifreq ifr; + int fd, err; +#if GSO_ENABLED + unsigned offload; +#endif + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + mpsslog("Could not open /dev/net/tun %s\n", strerror(errno)); + goto done; + } + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + if (*dev) + strncpy(ifr.ifr_name, dev, IFNAMSIZ); + + err = ioctl(fd, TUNSETIFF, (void *)&ifr); + if (err < 0) { + mpsslog("%s %s %d TUNSETIFF failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + close(fd); + return err; + } +#if GSO_ENABLED + offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN; + + err = ioctl(fd, TUNSETOFFLOAD, offload); + if (err < 0) { + mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + close(fd); + return err; + } +#endif + strcpy(dev, ifr.ifr_name); + mpsslog("Created TAP %s\n", dev); +done: + return fd; +} + +#define NET_FD_VIRTIO_NET 0 +#define NET_FD_TUN 1 +#define MAX_NET_FD 2 + +static void set_dp(struct mic_info *mic, int type, void *dp) +{ + switch (type) { + case VIRTIO_ID_CONSOLE: + mic->mic_console.console_dp = dp; + return; + case VIRTIO_ID_NET: + mic->mic_net.net_dp = dp; + return; + case VIRTIO_ID_BLOCK: + mic->mic_virtblk.block_dp = dp; + return; + } + mpsslog("%s %s %d not found\n", mic->name, __func__, type); + assert(0); +} + +static void *get_dp(struct mic_info *mic, int type) +{ + switch (type) { + case VIRTIO_ID_CONSOLE: + return mic->mic_console.console_dp; + case VIRTIO_ID_NET: + return mic->mic_net.net_dp; + case VIRTIO_ID_BLOCK: + return mic->mic_virtblk.block_dp; + } + mpsslog("%s %s %d not found\n", mic->name, __func__, type); + assert(0); + return NULL; +} + +static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type) +{ + struct mic_device_desc *d; + int i; + void *dp = get_dp(mic, type); + + for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE; + i += mic_total_desc_size(d)) { + d = dp + i; + + /* End of list */ + if (d->type == 0) + break; + + if (d->type == -1) + continue; + + mpsslog("%s %s d-> type %d d %p\n", + mic->name, __func__, d->type, d); + + if (d->type == (__u8)type) + return d; + } + mpsslog("%s %s %d not found\n", mic->name, __func__, type); + return NULL; +} + +/* See comments in vhost.c for explanation of next_desc() */ +static unsigned next_desc(struct vring_desc *desc) +{ + unsigned int next; + + if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) + return -1U; + next = le16toh(desc->next); + return next; +} + +/* Sum up all the IOVEC length */ +static ssize_t +sum_iovec_len(struct mic_copy_desc *copy) +{ + ssize_t sum = 0; + int i; + + for (i = 0; i < copy->iovcnt; i++) + sum += copy->iov[i].iov_len; + return sum; +} + +static inline void verify_out_len(struct mic_info *mic, + struct mic_copy_desc *copy) +{ + if (copy->out_len != sum_iovec_len(copy)) { + mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n", + mic->name, __func__, __LINE__, + copy->out_len, sum_iovec_len(copy)); + assert(copy->out_len == sum_iovec_len(copy)); + } +} + +/* Display an iovec */ +static void +disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy, + const char *s, int line) +{ + int i; + + for (i = 0; i < copy->iovcnt; i++) + mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n", + mic->name, s, line, i, + copy->iov[i].iov_base, copy->iov[i].iov_len); +} + +static inline __u16 read_avail_idx(struct mic_vring *vr) +{ + return ACCESS_ONCE(vr->info->avail_idx); +} + +static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr, + struct mic_copy_desc *copy, ssize_t len) +{ + copy->vr_idx = tx ? 0 : 1; + copy->update_used = true; + if (type == VIRTIO_ID_NET) + copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr); + else + copy->iov[0].iov_len = len; +} + +/* Central API which triggers the copies */ +static int +mic_virtio_copy(struct mic_info *mic, int fd, + struct mic_vring *vr, struct mic_copy_desc *copy) +{ + int ret; + + ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy); + if (ret) { + mpsslog("%s %s %d errno %s ret %d\n", + mic->name, __func__, __LINE__, + strerror(errno), ret); + } + return ret; +} + +static inline unsigned _vring_size(unsigned int num, unsigned long align) +{ + return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num) + + align - 1) & ~(align - 1)) + + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; +} + +/* + * This initialization routine requires at least one + * vring i.e. vr0. vr1 is optional. + */ +static void * +init_vr(struct mic_info *mic, int fd, int type, + struct mic_vring *vr0, struct mic_vring *vr1, int num_vq) +{ + int vr_size; + char *va; + + vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES, + MIC_VIRTIO_RING_ALIGN) + + sizeof(struct _mic_vring_info)); + va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq, + PROT_READ, MAP_SHARED, fd, 0); + if (MAP_FAILED == va) { + mpsslog("%s %s %d mmap failed errno %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + goto done; + } + set_dp(mic, type, va); + vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END]; + vr0->info = vr0->va + + _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN); + vring_init(&vr0->vr, + MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN); + mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ", + __func__, mic->name, vr0->va, vr0->info, vr_size, + _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); + mpsslog("magic 0x%x expected 0x%x\n", + le32toh(vr0->info->magic), MIC_MAGIC + type); + assert(le32toh(vr0->info->magic) == MIC_MAGIC + type); + if (vr1) { + vr1->va = (struct mic_vring *) + &va[MIC_DEVICE_PAGE_END + vr_size]; + vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES, + MIC_VIRTIO_RING_ALIGN); + vring_init(&vr1->vr, + MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN); + mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ", + __func__, mic->name, vr1->va, vr1->info, vr_size, + _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); + mpsslog("magic 0x%x expected 0x%x\n", + le32toh(vr1->info->magic), MIC_MAGIC + type + 1); + assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1); + } +done: + return va; +} + +static int +wait_for_card_driver(struct mic_info *mic, int fd, int type) +{ + struct pollfd pollfd; + int err; + struct mic_device_desc *desc = get_device_desc(mic, type); + __u8 prev_status; + + if (!desc) + return -ENODEV; + prev_status = desc->status; + pollfd.fd = fd; + mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n", + mic->name, __func__, type, desc->status); + + while (1) { + pollfd.events = POLLIN; + pollfd.revents = 0; + err = poll(&pollfd, 1, -1); + if (err < 0) { + mpsslog("%s %s poll failed %s\n", + mic->name, __func__, strerror(errno)); + continue; + } + + if (pollfd.revents) { + if (desc->status != prev_status) { + mpsslog("%s %s Waiting... desc-> type %d " + "status 0x%x\n", + mic->name, __func__, type, + desc->status); + prev_status = desc->status; + } + if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { + mpsslog("%s %s poll.revents %d\n", + mic->name, __func__, pollfd.revents); + mpsslog("%s %s desc-> type %d status 0x%x\n", + mic->name, __func__, type, + desc->status); + break; + } + } + } + return 0; +} + +/* Spin till we have some descriptors */ +static void +spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr) +{ + __u16 avail_idx = read_avail_idx(vr); + + while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) { +#ifdef DEBUG + mpsslog("%s %s waiting for desc avail %d info_avail %d\n", + mic->name, __func__, + le16toh(vr->vr.avail->idx), vr->info->avail_idx); +#endif + sched_yield(); + } +} + +static void * +virtio_net(void *arg) +{ + static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)]; + static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64))); + struct iovec vnet_iov[2][2] = { + { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) }, + { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } }, + { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) }, + { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } }, + }; + struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1]; + struct mic_info *mic = (struct mic_info *)arg; + char if_name[IFNAMSIZ]; + struct pollfd net_poll[MAX_NET_FD]; + struct mic_vring tx_vr, rx_vr; + struct mic_copy_desc copy; + struct mic_device_desc *desc; + int err; + + snprintf(if_name, IFNAMSIZ, "mic%d", mic->id); + mic->mic_net.tap_fd = tun_alloc(mic, if_name); + if (mic->mic_net.tap_fd < 0) + goto done; + + if (tap_configure(mic, if_name)) + goto done; + mpsslog("MIC name %s id %d\n", mic->name, mic->id); + + net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd; + net_poll[NET_FD_VIRTIO_NET].events = POLLIN; + net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd; + net_poll[NET_FD_TUN].events = POLLIN; + + if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd, + VIRTIO_ID_NET, &tx_vr, &rx_vr, + virtnet_dev_page.dd.num_vq)) { + mpsslog("%s init_vr failed %s\n", + mic->name, strerror(errno)); + goto done; + } + + copy.iovcnt = 2; + desc = get_device_desc(mic, VIRTIO_ID_NET); + + while (1) { + ssize_t len; + + net_poll[NET_FD_VIRTIO_NET].revents = 0; + net_poll[NET_FD_TUN].revents = 0; + + /* Start polling for data from tap and virtio net */ + err = poll(net_poll, 2, -1); + if (err < 0) { + mpsslog("%s poll failed %s\n", + __func__, strerror(errno)); + continue; + } + if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + err = wait_for_card_driver(mic, + mic->mic_net.virtio_net_fd, + VIRTIO_ID_NET); + if (err) { + mpsslog("%s %s %d Exiting...\n", + mic->name, __func__, __LINE__); + break; + } + } + /* + * Check if there is data to be read from TUN and write to + * virtio net fd if there is. + */ + if (net_poll[NET_FD_TUN].revents & POLLIN) { + copy.iov = iov0; + len = readv(net_poll[NET_FD_TUN].fd, + copy.iov, copy.iovcnt); + if (len > 0) { + struct virtio_net_hdr *hdr + = (struct virtio_net_hdr *)vnet_hdr[0]; + + /* Disable checksums on the card since we are on + a reliable PCIe link */ + hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; +#ifdef DEBUG + mpsslog("%s %s %d hdr->flags 0x%x ", mic->name, + __func__, __LINE__, hdr->flags); + mpsslog("copy.out_len %d hdr->gso_type 0x%x\n", + copy.out_len, hdr->gso_type); +#endif +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d read from tap 0x%lx\n", + mic->name, __func__, __LINE__, + len); +#endif + spin_for_descriptors(mic, &tx_vr); + txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©, + len); + + err = mic_virtio_copy(mic, + mic->mic_net.virtio_net_fd, &tx_vr, + ©); + if (err < 0) { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + } + if (!err) + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d wrote to net 0x%lx\n", + mic->name, __func__, __LINE__, + sum_iovec_len(©)); +#endif + /* Reinitialize IOV for next run */ + iov0[1].iov_len = MAX_NET_PKT_SIZE; + } else if (len < 0) { + disp_iovec(mic, ©, __func__, __LINE__); + mpsslog("%s %s %d read failed %s ", mic->name, + __func__, __LINE__, strerror(errno)); + mpsslog("cnt %d sum %zd\n", + copy.iovcnt, sum_iovec_len(©)); + } + } + + /* + * Check if there is data to be read from virtio net and + * write to TUN if there is. + */ + if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) { + while (rx_vr.info->avail_idx != + le16toh(rx_vr.vr.avail->idx)) { + copy.iov = iov1; + txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©, + MAX_NET_PKT_SIZE + + sizeof(struct virtio_net_hdr)); + + err = mic_virtio_copy(mic, + mic->mic_net.virtio_net_fd, &rx_vr, + ©); + if (!err) { +#ifdef DEBUG + struct virtio_net_hdr *hdr + = (struct virtio_net_hdr *) + vnet_hdr[1]; + + mpsslog("%s %s %d hdr->flags 0x%x, ", + mic->name, __func__, __LINE__, + hdr->flags); + mpsslog("out_len %d gso_type 0x%x\n", + copy.out_len, + hdr->gso_type); +#endif + /* Set the correct output iov_len */ + iov1[1].iov_len = copy.out_len - + sizeof(struct virtio_net_hdr); + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, __LINE__); + mpsslog("read from net 0x%lx\n", + sum_iovec_len(copy)); +#endif + len = writev(net_poll[NET_FD_TUN].fd, + copy.iov, copy.iovcnt); + if (len != sum_iovec_len(©)) { + mpsslog("Tun write failed %s ", + strerror(errno)); + mpsslog("len 0x%zx ", len); + mpsslog("read_len 0x%zx\n", + sum_iovec_len(©)); + } else { +#ifdef DEBUG + disp_iovec(mic, ©, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, + __LINE__); + mpsslog("wrote to tap 0x%lx\n", + len); +#endif + } + } else { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + break; + } + } + } + if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR) + mpsslog("%s: %s: POLLERR\n", __func__, mic->name); + } +done: + pthread_exit(NULL); +} + +/* virtio_console */ +#define VIRTIO_CONSOLE_FD 0 +#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1) +#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */ +#define MAX_BUFFER_SIZE PAGE_SIZE + +static void * +virtio_console(void *arg) +{ + static __u8 vcons_buf[2][PAGE_SIZE]; + struct iovec vcons_iov[2] = { + { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) }, + { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) }, + }; + struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1]; + struct mic_info *mic = (struct mic_info *)arg; + int err; + struct pollfd console_poll[MAX_CONSOLE_FD]; + int pty_fd; + char *pts_name; + ssize_t len; + struct mic_vring tx_vr, rx_vr; + struct mic_copy_desc copy; + struct mic_device_desc *desc; + + pty_fd = posix_openpt(O_RDWR); + if (pty_fd < 0) { + mpsslog("can't open a pseudoterminal master device: %s\n", + strerror(errno)); + goto _return; + } + pts_name = ptsname(pty_fd); + if (pts_name == NULL) { + mpsslog("can't get pts name\n"); + goto _close_pty; + } + printf("%s console message goes to %s\n", mic->name, pts_name); + mpsslog("%s console message goes to %s\n", mic->name, pts_name); + err = grantpt(pty_fd); + if (err < 0) { + mpsslog("can't grant access: %s %s\n", + pts_name, strerror(errno)); + goto _close_pty; + } + err = unlockpt(pty_fd); + if (err < 0) { + mpsslog("can't unlock a pseudoterminal: %s %s\n", + pts_name, strerror(errno)); + goto _close_pty; + } + console_poll[MONITOR_FD].fd = pty_fd; + console_poll[MONITOR_FD].events = POLLIN; + + console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd; + console_poll[VIRTIO_CONSOLE_FD].events = POLLIN; + + if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd, + VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr, + virtcons_dev_page.dd.num_vq)) { + mpsslog("%s init_vr failed %s\n", + mic->name, strerror(errno)); + goto _close_pty; + } + + copy.iovcnt = 1; + desc = get_device_desc(mic, VIRTIO_ID_CONSOLE); + + for (;;) { + console_poll[MONITOR_FD].revents = 0; + console_poll[VIRTIO_CONSOLE_FD].revents = 0; + err = poll(console_poll, MAX_CONSOLE_FD, -1); + if (err < 0) { + mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__, + strerror(errno)); + continue; + } + if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + err = wait_for_card_driver(mic, + mic->mic_console.virtio_console_fd, + VIRTIO_ID_CONSOLE); + if (err) { + mpsslog("%s %s %d Exiting...\n", + mic->name, __func__, __LINE__); + break; + } + } + + if (console_poll[MONITOR_FD].revents & POLLIN) { + copy.iov = iov0; + len = readv(pty_fd, copy.iov, copy.iovcnt); + if (len > 0) { +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d read from tap 0x%lx\n", + mic->name, __func__, __LINE__, + len); +#endif + spin_for_descriptors(mic, &tx_vr); + txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr, + ©, len); + + err = mic_virtio_copy(mic, + mic->mic_console.virtio_console_fd, + &tx_vr, ©); + if (err < 0) { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + } + if (!err) + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d wrote to net 0x%lx\n", + mic->name, __func__, __LINE__, + sum_iovec_len(copy)); +#endif + /* Reinitialize IOV for next run */ + iov0->iov_len = PAGE_SIZE; + } else if (len < 0) { + disp_iovec(mic, ©, __func__, __LINE__); + mpsslog("%s %s %d read failed %s ", + mic->name, __func__, __LINE__, + strerror(errno)); + mpsslog("cnt %d sum %zd\n", + copy.iovcnt, sum_iovec_len(©)); + } + } + + if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) { + while (rx_vr.info->avail_idx != + le16toh(rx_vr.vr.avail->idx)) { + copy.iov = iov1; + txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr, + ©, PAGE_SIZE); + + err = mic_virtio_copy(mic, + mic->mic_console.virtio_console_fd, + &rx_vr, ©); + if (!err) { + /* Set the correct output iov_len */ + iov1->iov_len = copy.out_len; + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, __LINE__); + mpsslog("read from net 0x%lx\n", + sum_iovec_len(copy)); +#endif + len = writev(pty_fd, + copy.iov, copy.iovcnt); + if (len != sum_iovec_len(©)) { + mpsslog("Tun write failed %s ", + strerror(errno)); + mpsslog("len 0x%zx ", len); + mpsslog("read_len 0x%zx\n", + sum_iovec_len(©)); + } else { +#ifdef DEBUG + disp_iovec(mic, copy, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, + __LINE__); + mpsslog("wrote to tap 0x%lx\n", + len); +#endif + } + } else { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + break; + } + } + } + if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR) + mpsslog("%s: %s: POLLERR\n", __func__, mic->name); + } +_close_pty: + close(pty_fd); +_return: + pthread_exit(NULL); +} + +static void +add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd) +{ + char path[PATH_MAX]; + int fd, err; + + snprintf(path, PATH_MAX, "/dev/mic%d", mic->id); + fd = open(path, O_RDWR); + if (fd < 0) { + mpsslog("Could not open %s %s\n", path, strerror(errno)); + return; + } + + err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd); + if (err < 0) { + mpsslog("Could not add %d %s\n", dd->type, strerror(errno)); + close(fd); + return; + } + switch (dd->type) { + case VIRTIO_ID_NET: + mic->mic_net.virtio_net_fd = fd; + mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name); + break; + case VIRTIO_ID_CONSOLE: + mic->mic_console.virtio_console_fd = fd; + mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name); + break; + case VIRTIO_ID_BLOCK: + mic->mic_virtblk.virtio_block_fd = fd; + mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name); + break; + } +} + +static bool +set_backend_file(struct mic_info *mic) +{ + FILE *config; + char buff[PATH_MAX], *line, *evv, *p; + + snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id); + config = fopen(buff, "r"); + if (config == NULL) + return false; + do { /* look for "virtblk_backend=XXXX" */ + line = fgets(buff, PATH_MAX, config); + if (line == NULL) + break; + if (*line == '#') + continue; + p = strchr(line, '\n'); + if (p) + *p = '\0'; + } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0); + fclose(config); + if (line == NULL) + return false; + evv = strchr(line, '='); + if (evv == NULL) + return false; + mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1); + if (mic->mic_virtblk.backend_file == NULL) { + mpsslog("%s %d can't allocate memory\n", mic->name, mic->id); + return false; + } + strcpy(mic->mic_virtblk.backend_file, evv + 1); + return true; +} + +#define SECTOR_SIZE 512 +static bool +set_backend_size(struct mic_info *mic) +{ + mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0, + SEEK_END); + if (mic->mic_virtblk.backend_size < 0) { + mpsslog("%s: can't seek: %s\n", + mic->name, mic->mic_virtblk.backend_file); + return false; + } + virtblk_dev_page.blk_config.capacity = + mic->mic_virtblk.backend_size / SECTOR_SIZE; + if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0) + virtblk_dev_page.blk_config.capacity++; + + virtblk_dev_page.blk_config.capacity = + htole64(virtblk_dev_page.blk_config.capacity); + + return true; +} + +static bool +open_backend(struct mic_info *mic) +{ + if (!set_backend_file(mic)) + goto _error_exit; + mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR); + if (mic->mic_virtblk.backend < 0) { + mpsslog("%s: can't open: %s\n", mic->name, + mic->mic_virtblk.backend_file); + goto _error_free; + } + if (!set_backend_size(mic)) + goto _error_close; + mic->mic_virtblk.backend_addr = mmap(NULL, + mic->mic_virtblk.backend_size, + PROT_READ|PROT_WRITE, MAP_SHARED, + mic->mic_virtblk.backend, 0L); + if (mic->mic_virtblk.backend_addr == MAP_FAILED) { + mpsslog("%s: can't map: %s %s\n", + mic->name, mic->mic_virtblk.backend_file, + strerror(errno)); + goto _error_close; + } + return true; + + _error_close: + close(mic->mic_virtblk.backend); + _error_free: + free(mic->mic_virtblk.backend_file); + _error_exit: + return false; +} + +static void +close_backend(struct mic_info *mic) +{ + munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size); + close(mic->mic_virtblk.backend); + free(mic->mic_virtblk.backend_file); +} + +static bool +start_virtblk(struct mic_info *mic, struct mic_vring *vring) +{ + if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) { + mpsslog("%s: blk_config is not 8 byte aligned.\n", + mic->name); + return false; + } + add_virtio_device(mic, &virtblk_dev_page.dd); + if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd, + VIRTIO_ID_BLOCK, vring, NULL, + virtblk_dev_page.dd.num_vq)) { + mpsslog("%s init_vr failed %s\n", + mic->name, strerror(errno)); + return false; + } + return true; +} + +static void +stop_virtblk(struct mic_info *mic) +{ + int vr_size, ret; + + vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES, + MIC_VIRTIO_RING_ALIGN) + + sizeof(struct _mic_vring_info)); + ret = munmap(mic->mic_virtblk.block_dp, + MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq); + if (ret < 0) + mpsslog("%s munmap errno %d\n", mic->name, errno); + close(mic->mic_virtblk.virtio_block_fd); +} + +static __u8 +header_error_check(struct vring_desc *desc) +{ + if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) { + mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n", + __func__, __LINE__); + return -EIO; + } + if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) { + mpsslog("%s() %d: alone\n", + __func__, __LINE__); + return -EIO; + } + if (le16toh(desc->flags) & VRING_DESC_F_WRITE) { + mpsslog("%s() %d: not read\n", + __func__, __LINE__); + return -EIO; + } + return 0; +} + +static int +read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx) +{ + struct iovec iovec; + struct mic_copy_desc copy; + + iovec.iov_len = sizeof(*hdr); + iovec.iov_base = hdr; + copy.iov = &iovec; + copy.iovcnt = 1; + copy.vr_idx = 0; /* only one vring on virtio_block */ + copy.update_used = false; /* do not update used index */ + return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); +} + +static int +transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt) +{ + struct mic_copy_desc copy; + + copy.iov = iovec; + copy.iovcnt = iovcnt; + copy.vr_idx = 0; /* only one vring on virtio_block */ + copy.update_used = false; /* do not update used index */ + return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); +} + +static __u8 +status_error_check(struct vring_desc *desc) +{ + if (le32toh(desc->len) != sizeof(__u8)) { + mpsslog("%s() %d: length is not sizeof(status)\n", + __func__, __LINE__); + return -EIO; + } + return 0; +} + +static int +write_status(int fd, __u8 *status) +{ + struct iovec iovec; + struct mic_copy_desc copy; + + iovec.iov_base = status; + iovec.iov_len = sizeof(*status); + copy.iov = &iovec; + copy.iovcnt = 1; + copy.vr_idx = 0; /* only one vring on virtio_block */ + copy.update_used = true; /* Update used index */ + return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); +} + +#ifndef VIRTIO_BLK_T_GET_ID +#define VIRTIO_BLK_T_GET_ID 8 +#endif + +static void * +virtio_block(void *arg) +{ + struct mic_info *mic = (struct mic_info *)arg; + int ret; + struct pollfd block_poll; + struct mic_vring vring; + __u16 avail_idx; + __u32 desc_idx; + struct vring_desc *desc; + struct iovec *iovec, *piov; + __u8 status; + __u32 buffer_desc_idx; + struct virtio_blk_outhdr hdr; + void *fos; + + for (;;) { /* forever */ + if (!open_backend(mic)) { /* No virtblk */ + for (mic->mic_virtblk.signaled = 0; + !mic->mic_virtblk.signaled;) + sleep(1); + continue; + } + + /* backend file is specified. */ + if (!start_virtblk(mic, &vring)) + goto _close_backend; + iovec = malloc(sizeof(*iovec) * + le32toh(virtblk_dev_page.blk_config.seg_max)); + if (!iovec) { + mpsslog("%s: can't alloc iovec: %s\n", + mic->name, strerror(ENOMEM)); + goto _stop_virtblk; + } + + block_poll.fd = mic->mic_virtblk.virtio_block_fd; + block_poll.events = POLLIN; + for (mic->mic_virtblk.signaled = 0; + !mic->mic_virtblk.signaled;) { + block_poll.revents = 0; + /* timeout in 1 sec to see signaled */ + ret = poll(&block_poll, 1, 1000); + if (ret < 0) { + mpsslog("%s %d: poll failed: %s\n", + __func__, __LINE__, + strerror(errno)); + continue; + } + + if (!(block_poll.revents & POLLIN)) { +#ifdef DEBUG + mpsslog("%s %d: block_poll.revents=0x%x\n", + __func__, __LINE__, block_poll.revents); +#endif + continue; + } + + /* POLLIN */ + while (vring.info->avail_idx != + le16toh(vring.vr.avail->idx)) { + /* read header element */ + avail_idx = + vring.info->avail_idx & + (vring.vr.num - 1); + desc_idx = le16toh( + vring.vr.avail->ring[avail_idx]); + desc = &vring.vr.desc[desc_idx]; +#ifdef DEBUG + mpsslog("%s() %d: avail_idx=%d ", + __func__, __LINE__, + vring.info->avail_idx); + mpsslog("vring.vr.num=%d desc=%p\n", + vring.vr.num, desc); +#endif + status = header_error_check(desc); + ret = read_header( + mic->mic_virtblk.virtio_block_fd, + &hdr, desc_idx); + if (ret < 0) { + mpsslog("%s() %d %s: ret=%d %s\n", + __func__, __LINE__, + mic->name, ret, + strerror(errno)); + break; + } + /* buffer element */ + piov = iovec; + status = 0; + fos = mic->mic_virtblk.backend_addr + + (hdr.sector * SECTOR_SIZE); + buffer_desc_idx = next_desc(desc); + desc_idx = buffer_desc_idx; + for (desc = &vring.vr.desc[buffer_desc_idx]; + desc->flags & VRING_DESC_F_NEXT; + desc_idx = next_desc(desc), + desc = &vring.vr.desc[desc_idx]) { + piov->iov_len = desc->len; + piov->iov_base = fos; + piov++; + fos += desc->len; + } + /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */ + if (hdr.type & ~(VIRTIO_BLK_T_OUT | + VIRTIO_BLK_T_GET_ID)) { + /* + VIRTIO_BLK_T_IN - does not do + anything. Probably for documenting. + VIRTIO_BLK_T_SCSI_CMD - for + virtio_scsi. + VIRTIO_BLK_T_FLUSH - turned off in + config space. + VIRTIO_BLK_T_BARRIER - defined but not + used in anywhere. + */ + mpsslog("%s() %d: type %x ", + __func__, __LINE__, + hdr.type); + mpsslog("is not supported\n"); + status = -ENOTSUP; + + } else { + ret = transfer_blocks( + mic->mic_virtblk.virtio_block_fd, + iovec, + piov - iovec); + if (ret < 0 && + status != 0) + status = ret; + } + /* write status and update used pointer */ + if (status != 0) + status = status_error_check(desc); + ret = write_status( + mic->mic_virtblk.virtio_block_fd, + &status); +#ifdef DEBUG + mpsslog("%s() %d: write status=%d on desc=%p\n", + __func__, __LINE__, + status, desc); +#endif + } + } + free(iovec); +_stop_virtblk: + stop_virtblk(mic); +_close_backend: + close_backend(mic); + } /* forever */ + + pthread_exit(NULL); +} + +static void +reset(struct mic_info *mic) +{ +#define RESET_TIMEOUT 120 + int i = RESET_TIMEOUT; + setsysfs(mic->name, "state", "reset"); + while (i) { + char *state; + state = readsysfs(mic->name, "state"); + if (!state) + goto retry; + mpsslog("%s: %s %d state %s\n", + mic->name, __func__, __LINE__, state); + + if (!strcmp(state, "ready")) { + free(state); + break; + } + free(state); +retry: + sleep(1); + i--; + } +} + +static int +get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status) +{ + if (!strcmp(shutdown_status, "nop")) + return MIC_NOP; + if (!strcmp(shutdown_status, "crashed")) + return MIC_CRASHED; + if (!strcmp(shutdown_status, "halted")) + return MIC_HALTED; + if (!strcmp(shutdown_status, "poweroff")) + return MIC_POWER_OFF; + if (!strcmp(shutdown_status, "restart")) + return MIC_RESTART; + mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status); + /* Invalid state */ + assert(0); +}; + +static int get_mic_state(struct mic_info *mic) +{ + char *state = NULL; + enum mic_states mic_state; + + while (!state) { + state = readsysfs(mic->name, "state"); + sleep(1); + } + mpsslog("%s: %s %d state %s\n", + mic->name, __func__, __LINE__, state); + + if (!strcmp(state, "ready")) { + mic_state = MIC_READY; + } else if (!strcmp(state, "booting")) { + mic_state = MIC_BOOTING; + } else if (!strcmp(state, "online")) { + mic_state = MIC_ONLINE; + } else if (!strcmp(state, "shutting_down")) { + mic_state = MIC_SHUTTING_DOWN; + } else if (!strcmp(state, "reset_failed")) { + mic_state = MIC_RESET_FAILED; + } else if (!strcmp(state, "resetting")) { + mic_state = MIC_RESETTING; + } else { + mpsslog("%s: BUG invalid state %s\n", mic->name, state); + assert(0); + } + + free(state); + return mic_state; +}; + +static void mic_handle_shutdown(struct mic_info *mic) +{ +#define SHUTDOWN_TIMEOUT 60 + int i = SHUTDOWN_TIMEOUT; + char *shutdown_status; + while (i) { + shutdown_status = readsysfs(mic->name, "shutdown_status"); + if (!shutdown_status) { + sleep(1); + continue; + } + mpsslog("%s: %s %d shutdown_status %s\n", + mic->name, __func__, __LINE__, shutdown_status); + switch (get_mic_shutdown_status(mic, shutdown_status)) { + case MIC_RESTART: + mic->restart = 1; + case MIC_HALTED: + case MIC_POWER_OFF: + case MIC_CRASHED: + free(shutdown_status); + goto reset; + default: + break; + } + free(shutdown_status); + sleep(1); + i--; + } +reset: + if (!i) + mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n", + mic->name, __func__, __LINE__, shutdown_status); + reset(mic); +} + +static int open_state_fd(struct mic_info *mic) +{ + char pathname[PATH_MAX]; + int fd; + + snprintf(pathname, PATH_MAX - 1, "%s/%s/%s", + MICSYSFSDIR, mic->name, "state"); + + fd = open(pathname, O_RDONLY); + if (fd < 0) + mpsslog("%s: opening file %s failed %s\n", + mic->name, pathname, strerror(errno)); + return fd; +} + +static int block_till_state_change(int fd, struct mic_info *mic) +{ + struct pollfd ufds[1]; + char value[PAGE_SIZE]; + int ret; + + ufds[0].fd = fd; + ufds[0].events = POLLERR | POLLPRI; + ret = poll(ufds, 1, -1); + if (ret < 0) { + mpsslog("%s: %s %d poll failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + return ret; + } + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + mpsslog("%s: %s %d Failed to seek to 0: %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + return ret; + } + + ret = read(fd, value, sizeof(value)); + if (ret < 0) { + mpsslog("%s: %s %d Failed to read sysfs entry: %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + return ret; + } + + return 0; +} + +static void * +mic_config(void *arg) +{ + struct mic_info *mic = (struct mic_info *)arg; + int fd, ret, stat = 0; + + fd = open_state_fd(mic); + if (fd < 0) { + mpsslog("%s: %s %d open state fd failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + goto exit; + } + + do { + ret = block_till_state_change(fd, mic); + if (ret < 0) { + mpsslog("%s: %s %d block_till_state_change error %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + goto close_exit; + } + + switch (get_mic_state(mic)) { + case MIC_SHUTTING_DOWN: + mic_handle_shutdown(mic); + break; + case MIC_READY: + case MIC_RESET_FAILED: + ret = kill(mic->pid, SIGTERM); + mpsslog("%s: %s %d kill pid %d ret %d\n", + mic->name, __func__, __LINE__, + mic->pid, ret); + if (!ret) { + ret = waitpid(mic->pid, &stat, + WIFSIGNALED(stat)); + mpsslog("%s: %s %d waitpid ret %d pid %d\n", + mic->name, __func__, __LINE__, + ret, mic->pid); + } + if (mic->boot_on_resume) { + setsysfs(mic->name, "state", "boot"); + mic->boot_on_resume = 0; + } + goto close_exit; + default: + break; + } + } while (1); + +close_exit: + close(fd); +exit: + init_mic(mic); + pthread_exit(NULL); +} + +static void +set_cmdline(struct mic_info *mic) +{ + char buffer[PATH_MAX]; + int len; + + len = snprintf(buffer, PATH_MAX, + "clocksource=tsc highres=off nohz=off "); + len += snprintf(buffer + len, PATH_MAX - len, + "cpufreq_on;corec6_off;pc3_off;pc6_off "); + len += snprintf(buffer + len, PATH_MAX - len, + "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0", + mic->id + 1); + + setsysfs(mic->name, "cmdline", buffer); + mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer); + snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1); + mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer); +} + +static void +set_log_buf_info(struct mic_info *mic) +{ + int fd; + off_t len; + char system_map[] = "/lib/firmware/mic/System.map"; + char *map, *temp, log_buf[17] = {'\0'}; + + fd = open(system_map, O_RDONLY); + if (fd < 0) { + mpsslog("%s: Opening System.map failed: %d\n", + mic->name, errno); + return; + } + len = lseek(fd, 0, SEEK_END); + if (len < 0) { + mpsslog("%s: Reading System.map size failed: %d\n", + mic->name, errno); + close(fd); + return; + } + map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); + if (map == MAP_FAILED) { + mpsslog("%s: mmap of System.map failed: %d\n", + mic->name, errno); + close(fd); + return; + } + temp = strstr(map, "__log_buf"); + if (!temp) { + mpsslog("%s: __log_buf not found: %d\n", mic->name, errno); + munmap(map, len); + close(fd); + return; + } + strncpy(log_buf, temp - 19, 16); + setsysfs(mic->name, "log_buf_addr", log_buf); + mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf); + temp = strstr(map, "log_buf_len"); + if (!temp) { + mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno); + munmap(map, len); + close(fd); + return; + } + strncpy(log_buf, temp - 19, 16); + setsysfs(mic->name, "log_buf_len", log_buf); + mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf); + munmap(map, len); + close(fd); +} + +static void +change_virtblk_backend(int x, siginfo_t *siginfo, void *p) +{ + struct mic_info *mic; + + for (mic = mic_list.next; mic != NULL; mic = mic->next) + mic->mic_virtblk.signaled = 1/* true */; +} + +static void +set_mic_boot_params(struct mic_info *mic) +{ + set_log_buf_info(mic); + set_cmdline(mic); +} + +static void * +init_mic(void *arg) +{ + struct mic_info *mic = (struct mic_info *)arg; + struct sigaction ignore = { + .sa_flags = 0, + .sa_handler = SIG_IGN + }; + struct sigaction act = { + .sa_flags = SA_SIGINFO, + .sa_sigaction = change_virtblk_backend, + }; + char buffer[PATH_MAX]; + int err, fd; + + /* + * Currently, one virtio block device is supported for each MIC card + * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon. + * The signal informs the virtio block backend about a change in the + * configuration file which specifies the virtio backend file name on + * the host. Virtio block backend then re-reads the configuration file + * and switches to the new block device. This signalling mechanism may + * not be required once multiple virtio block devices are supported by + * the MIC daemon. + */ + sigaction(SIGUSR1, &ignore, NULL); +retry: + fd = open_state_fd(mic); + if (fd < 0) { + mpsslog("%s: %s %d open state fd failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + sleep(2); + goto retry; + } + + if (mic->restart) { + snprintf(buffer, PATH_MAX, "boot"); + setsysfs(mic->name, "state", buffer); + mpsslog("%s restarting mic %d\n", + mic->name, mic->restart); + mic->restart = 0; + } + + while (1) { + while (block_till_state_change(fd, mic)) { + mpsslog("%s: %s %d block_till_state_change error %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + sleep(2); + continue; + } + + if (get_mic_state(mic) == MIC_BOOTING) + break; + } + + mic->pid = fork(); + switch (mic->pid) { + case 0: + add_virtio_device(mic, &virtcons_dev_page.dd); + add_virtio_device(mic, &virtnet_dev_page.dd); + err = pthread_create(&mic->mic_console.console_thread, NULL, + virtio_console, mic); + if (err) + mpsslog("%s virtcons pthread_create failed %s\n", + mic->name, strerror(err)); + err = pthread_create(&mic->mic_net.net_thread, NULL, + virtio_net, mic); + if (err) + mpsslog("%s virtnet pthread_create failed %s\n", + mic->name, strerror(err)); + err = pthread_create(&mic->mic_virtblk.block_thread, NULL, + virtio_block, mic); + if (err) + mpsslog("%s virtblk pthread_create failed %s\n", + mic->name, strerror(err)); + sigemptyset(&act.sa_mask); + err = sigaction(SIGUSR1, &act, NULL); + if (err) + mpsslog("%s sigaction SIGUSR1 failed %s\n", + mic->name, strerror(errno)); + while (1) + sleep(60); + case -1: + mpsslog("fork failed MIC name %s id %d errno %d\n", + mic->name, mic->id, errno); + break; + default: + err = pthread_create(&mic->config_thread, NULL, + mic_config, mic); + if (err) + mpsslog("%s mic_config pthread_create failed %s\n", + mic->name, strerror(err)); + } + + return NULL; +} + +static void +start_daemon(void) +{ + struct mic_info *mic; + int err; + + for (mic = mic_list.next; mic; mic = mic->next) { + set_mic_boot_params(mic); + err = pthread_create(&mic->init_thread, NULL, init_mic, mic); + if (err) + mpsslog("%s init_mic pthread_create failed %s\n", + mic->name, strerror(err)); + } + + while (1) + sleep(60); +} + +static int +init_mic_list(void) +{ + struct mic_info *mic = &mic_list; + struct dirent *file; + DIR *dp; + int cnt = 0; + + dp = opendir(MICSYSFSDIR); + if (!dp) + return 0; + + while ((file = readdir(dp)) != NULL) { + if (!strncmp(file->d_name, "mic", 3)) { + mic->next = calloc(1, sizeof(struct mic_info)); + if (mic->next) { + mic = mic->next; + mic->id = atoi(&file->d_name[3]); + mic->name = malloc(strlen(file->d_name) + 16); + if (mic->name) + strcpy(mic->name, file->d_name); + mpsslog("MIC name %s id %d\n", mic->name, + mic->id); + cnt++; + } + } + } + + closedir(dp); + return cnt; +} + +void +mpsslog(char *format, ...) +{ + va_list args; + char buffer[4096]; + char ts[52], *ts1; + time_t t; + + if (logfp == NULL) + return; + + va_start(args, format); + vsprintf(buffer, format, args); + va_end(args); + + time(&t); + ts1 = ctime_r(&t, ts); + ts1[strlen(ts1) - 1] = '\0'; + fprintf(logfp, "%s: %s", ts1, buffer); + + fflush(logfp); +} + +int +main(int argc, char *argv[]) +{ + int cnt; + pid_t pid; + + myname = argv[0]; + + logfp = fopen(LOGFILE_NAME, "a+"); + if (!logfp) { + fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME); + exit(1); + } + pid = fork(); + switch (pid) { + case 0: + break; + case -1: + exit(2); + default: + exit(0); + } + + mpsslog("MIC Daemon start\n"); + + cnt = init_mic_list(); + if (cnt == 0) { + mpsslog("MIC module not loaded\n"); + exit(3); + } + mpsslog("MIC found %d devices\n", cnt); + + start_daemon(); + + exit(0); +} --- /dev/null +++ b/samples/mic/mpssd/mpssd.h @@ -0,0 +1,103 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC User Space Tools. + */ +#ifndef _MPSSD_H_ +#define _MPSSD_H_ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <dirent.h> +#include <libgen.h> +#include <pthread.h> +#include <stdarg.h> +#include <time.h> +#include <errno.h> +#include <sys/dir.h> +#include <sys/ioctl.h> +#include <sys/poll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include <sys/wait.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <pthread.h> +#include <signal.h> +#include <limits.h> +#include <syslog.h> +#include <getopt.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <linux/if_tun.h> +#include <linux/virtio_ids.h> + +#define MICSYSFSDIR "/sys/class/mic" +#define LOGFILE_NAME "/var/log/mpssd" +#define PAGE_SIZE 4096 + +struct mic_console_info { + pthread_t console_thread; + int virtio_console_fd; + void *console_dp; +}; + +struct mic_net_info { + pthread_t net_thread; + int virtio_net_fd; + int tap_fd; + void *net_dp; +}; + +struct mic_virtblk_info { + pthread_t block_thread; + int virtio_block_fd; + void *block_dp; + volatile sig_atomic_t signaled; + char *backend_file; + int backend; + void *backend_addr; + long backend_size; +}; + +struct mic_info { + int id; + char *name; + pthread_t config_thread; + pthread_t init_thread; + pid_t pid; + struct mic_console_info mic_console; + struct mic_net_info mic_net; + struct mic_virtblk_info mic_virtblk; + int restart; + int boot_on_resume; + struct mic_info *next; +}; + +__attribute__((format(printf, 1, 2))) +void mpsslog(char *format, ...); +char *readsysfs(char *dir, char *entry); +int setsysfs(char *dir, char *entry, char *value); +#endif --- /dev/null +++ b/samples/mic/mpssd/sysfs.c @@ -0,0 +1,102 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC User Space Tools. + */ + +#include "mpssd.h" + +#define PAGE_SIZE 4096 + +char * +readsysfs(char *dir, char *entry) +{ + char filename[PATH_MAX]; + char value[PAGE_SIZE]; + char *string = NULL; + int fd; + int len; + + if (dir == NULL) + snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); + else + snprintf(filename, PATH_MAX, + "%s/%s/%s", MICSYSFSDIR, dir, entry); + + fd = open(filename, O_RDONLY); + if (fd < 0) { + mpsslog("Failed to open sysfs entry '%s': %s\n", + filename, strerror(errno)); + return NULL; + } + + len = read(fd, value, sizeof(value)); + if (len < 0) { + mpsslog("Failed to read sysfs entry '%s': %s\n", + filename, strerror(errno)); + goto readsys_ret; + } + if (len == 0) + goto readsys_ret; + + value[len - 1] = '\0'; + + string = malloc(strlen(value) + 1); + if (string) + strcpy(string, value); + +readsys_ret: + close(fd); + return string; +} + +int +setsysfs(char *dir, char *entry, char *value) +{ + char filename[PATH_MAX]; + char *oldvalue; + int fd, ret = 0; + + if (dir == NULL) + snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); + else + snprintf(filename, PATH_MAX, "%s/%s/%s", + MICSYSFSDIR, dir, entry); + + oldvalue = readsysfs(dir, entry); + + fd = open(filename, O_RDWR); + if (fd < 0) { + ret = errno; + mpsslog("Failed to open sysfs entry '%s': %s\n", + filename, strerror(errno)); + goto done; + } + + if (!oldvalue || strcmp(value, oldvalue)) { + if (write(fd, value, strlen(value)) < 0) { + ret = errno; + mpsslog("Failed to write new sysfs entry '%s': %s\n", + filename, strerror(errno)); + } + } + close(fd); +done: + if (oldvalue) + free(oldvalue); + return ret; +} Patches currently in stable-queue which might be from shuahkh@xxxxxxxxxxxxxxx are queue-4.4/samples-move-mic-mpssd-example-code-from-documentation.patch