diff --git a/Documentation/Makefile b/Documentation/Makefile index bc0548201755..fc759598c4c9 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -1,4 +1,4 @@ subdir-y := accounting auxdisplay blackfin connector \ - filesystems filesystems ia64 laptops mic misc-devices \ + filesystems filesystems ia64 laptops misc-devices \ networking pcmcia prctl ptp spi timers vDSO video4linux \ watchdog diff --git a/Documentation/mic/Makefile b/Documentation/mic/Makefile deleted file mode 100644 index a191d453badf..000000000000 --- a/Documentation/mic/Makefile +++ /dev/null @@ -1 +0,0 @@ -subdir-y := mpssd diff --git a/Documentation/mic/mpssd/.gitignore b/Documentation/mic/mpssd/.gitignore deleted file mode 100644 index 8b7c72f07c92..000000000000 --- a/Documentation/mic/mpssd/.gitignore +++ /dev/null @@ -1 +0,0 @@ -mpssd diff --git a/Documentation/mic/mpssd/Makefile b/Documentation/mic/mpssd/Makefile deleted file mode 100644 index 0f3156888048..000000000000 --- a/Documentation/mic/mpssd/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# List of programs to build -hostprogs-y := mpssd - -mpssd-objs := mpssd.o sysfs.o - -# Tell kbuild to always build the programs -always := $(hostprogs-y) - -HOSTCFLAGS += -I$(objtree)/usr/include -I$(srctree)/tools/include - -ifdef DEBUG -HOSTCFLAGS += -DDEBUG=$(DEBUG) -endif - -HOSTLOADLIBES_mpssd := -lpthread - -install: - install mpssd /usr/sbin/mpssd - install micctrl /usr/sbin/micctrl diff --git a/Documentation/mic/mpssd/micctrl b/Documentation/mic/mpssd/micctrl deleted file mode 100755 index 8f2629b41c5f..000000000000 --- a/Documentation/mic/mpssd/micctrl +++ /dev/null @@ -1,173 +0,0 @@ -#!/bin/bash -# Intel MIC Platform Software Stack (MPSS) -# -# Copyright(c) 2013 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License, version 2, as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Intel MIC User Space Tools. -# -# micctrl - Controls MIC boot/start/stop. -# -# chkconfig: 2345 95 05 -# description: start MPSS stack processing. -# -### BEGIN INIT INFO -# Provides: micctrl -### END INIT INFO - -# Source function library. -. /etc/init.d/functions - -sysfs="/sys/class/mic" - -_status() -{ - f=$sysfs/$1 - echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`" -} - -status() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _status $1 - return $? - fi - for f in $sysfs/* - do - _status `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -_reset() -{ - f=$sysfs/$1 - echo reset > $f/state -} - -reset() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _reset $1 - return $? - fi - for f in $sysfs/* - do - _reset `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -_boot() -{ - f=$sysfs/$1 - echo "linux" > $f/bootmode - echo "mic/uos.img" > $f/firmware - echo "mic/$1.image" > $f/ramdisk - echo "boot" > $f/state -} - -boot() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _boot $1 - return $? - fi - for f in $sysfs/* - do - _boot `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -_shutdown() -{ - f=$sysfs/$1 - echo shutdown > $f/state -} - -shutdown() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _shutdown $1 - return $? - fi - for f in $sysfs/* - do - _shutdown `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -_wait() -{ - f=$sysfs/$1 - while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ] - do - sleep 1 - echo -e "Waiting for $1 to go offline" - done -} - -wait() -{ - if [ "`echo $1 | head -c3`" == "mic" ]; then - _wait $1 - return $? - fi - # Wait for the cards to go offline - for f in $sysfs/* - do - _wait `basename $f` - RETVAL=$? - [ $RETVAL -ne 0 ] && return $RETVAL - done - return 0 -} - -if [ ! -d "$sysfs" ]; then - echo -e $"Module unloaded " - exit 3 -fi - -case $1 in - -s) - status $2 - ;; - -r) - reset $2 - ;; - -b) - boot $2 - ;; - -S) - shutdown $2 - ;; - -w) - wait $2 - ;; - *) - echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}" - exit 2 -esac - -exit $? diff --git a/Documentation/mic/mpssd/mpss b/Documentation/mic/mpssd/mpss deleted file mode 100755 index cacbdb0aefb9..000000000000 --- a/Documentation/mic/mpssd/mpss +++ /dev/null @@ -1,202 +0,0 @@ -#!/bin/bash -# Intel MIC Platform Software Stack (MPSS) -# -# Copyright(c) 2013 Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License, version 2, as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# The full GNU General Public License is included in this distribution in -# the file called "COPYING". -# -# Intel MIC User Space Tools. -# -# mpss Start mpssd. -# -# chkconfig: 2345 95 05 -# description: start MPSS stack processing. -# -### BEGIN INIT INFO -# Provides: mpss -# Required-Start: -# Required-Stop: -# Short-Description: MPSS stack control -# Description: MPSS stack control -### END INIT INFO - -# Source function library. -. /etc/init.d/functions - -exec=/usr/sbin/mpssd -sysfs="/sys/class/mic" - -start() -{ - [ -x $exec ] || exit 5 - - if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then - echo -e $"MPSSD already running! " - success - echo - return 0 - fi - - echo -e $"Starting MPSS Stack" - echo -e $"Loading MIC_X100_DMA & MIC_HOST Modules" - - for f in "mic_host" "mic_x100_dma" - do - modprobe $f - RETVAL=$? - if [ $RETVAL -ne 0 ]; then - failure - echo - return $RETVAL - fi - done - - # Start the daemon - echo -n $"Starting MPSSD " - $exec - RETVAL=$? - if [ $RETVAL -ne 0 ]; then - failure - echo - return $RETVAL - fi - success - echo - - sleep 5 - - # Boot the cards - micctrl -b - - # Wait till ping works - for f in $sysfs/* - do - count=100 - ipaddr=`cat $f/cmdline` - ipaddr=${ipaddr#*address,} - ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1` - while [ $count -ge 0 ] - do - echo -e "Pinging "`basename $f`" " - ping -c 1 $ipaddr &> /dev/null - RETVAL=$? - if [ $RETVAL -eq 0 ]; then - success - break - fi - sleep 1 - count=`expr $count - 1` - done - [ $RETVAL -ne 0 ] && failure || success - echo - done - return $RETVAL -} - -stop() -{ - echo -e $"Shutting down MPSS Stack: " - - # Bail out if module is unloaded - if [ ! -d "$sysfs" ]; then - echo -n $"Module unloaded " - success - echo - return 0 - fi - - # Shut down the cards. - micctrl -S - - # Wait for the cards to go offline - for f in $sysfs/* - do - while [ "`cat $f/state`" != "offline" ] - do - sleep 1 - echo -e "Waiting for "`basename $f`" to go offline" - done - done - - # Display the status of the cards - micctrl -s - - # Kill MPSSD now - echo -n $"Killing MPSSD" - killall -9 mpssd 2>/dev/null - RETVAL=$? - [ $RETVAL -ne 0 ] && failure || success - echo - return $RETVAL -} - -restart() -{ - stop - sleep 5 - start -} - -status() -{ - micctrl -s - if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then - echo "mpssd is running" - else - echo "mpssd is stopped" - fi - return 0 -} - -unload() -{ - if [ ! -d "$sysfs" ]; then - echo -n $"No MIC_HOST Module: " - success - echo - return - fi - - stop - - sleep 5 - echo -n $"Removing MIC_HOST & MIC_X100_DMA Modules: " - modprobe -r mic_host mic_x100_dma - RETVAL=$? - [ $RETVAL -ne 0 ] && failure || success - echo - return $RETVAL -} - -case $1 in - start) - start - ;; - stop) - stop - ;; - restart) - restart - ;; - status) - status - ;; - unload) - unload - ;; - *) - echo $"Usage: $0 {start|stop|restart|status|unload}" - exit 2 -esac - -exit $? diff --git a/Documentation/mic/mpssd/mpssd.c b/Documentation/mic/mpssd/mpssd.c deleted file mode 100644 index 3c5c379fc29d..000000000000 --- a/Documentation/mic/mpssd/mpssd.c +++ /dev/null @@ -1,1728 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC User Space Tools. - */ - -#define _GNU_SOURCE - -#include <stdlib.h> -#include <fcntl.h> -#include <getopt.h> -#include <assert.h> -#include <unistd.h> -#include <stdbool.h> -#include <signal.h> -#include <poll.h> -#include <features.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <linux/virtio_ring.h> -#include <linux/virtio_net.h> -#include <linux/virtio_console.h> -#include <linux/virtio_blk.h> -#include <linux/version.h> -#include "mpssd.h" -#include <linux/mic_ioctl.h> -#include <linux/mic_common.h> -#include <tools/endian.h> - -static void init_mic(struct mic_info *mic); - -static FILE *logfp; -static struct mic_info mic_list; - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - -#define min_t(type, x, y) ({ \ - type __min1 = (x); \ - type __min2 = (y); \ - __min1 < __min2 ? __min1 : __min2; }) - -/* align addr on a size boundary - adjust address up/down if needed */ -#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) -#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size) - -/* align addr on a size boundary - adjust address up if needed */ -#define _ALIGN(addr, size) _ALIGN_UP(addr, size) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) - -#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) - -#define GSO_ENABLED 1 -#define MAX_GSO_SIZE (64 * 1024) -#define ETH_H_LEN 14 -#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64)) -#define MIC_DEVICE_PAGE_END 0x1000 - -#ifndef VIRTIO_NET_HDR_F_DATA_VALID -#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ -#endif - -static struct { - struct mic_device_desc dd; - struct mic_vqconfig vqconfig[2]; - __u32 host_features, guest_acknowledgements; - struct virtio_console_config cons_config; -} virtcons_dev_page = { - .dd = { - .type = VIRTIO_ID_CONSOLE, - .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig), - .feature_len = sizeof(virtcons_dev_page.host_features), - .config_len = sizeof(virtcons_dev_page.cons_config), - }, - .vqconfig[0] = { - .num = htole16(MIC_VRING_ENTRIES), - }, - .vqconfig[1] = { - .num = htole16(MIC_VRING_ENTRIES), - }, -}; - -static struct { - struct mic_device_desc dd; - struct mic_vqconfig vqconfig[2]; - __u32 host_features, guest_acknowledgements; - struct virtio_net_config net_config; -} virtnet_dev_page = { - .dd = { - .type = VIRTIO_ID_NET, - .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig), - .feature_len = sizeof(virtnet_dev_page.host_features), - .config_len = sizeof(virtnet_dev_page.net_config), - }, - .vqconfig[0] = { - .num = htole16(MIC_VRING_ENTRIES), - }, - .vqconfig[1] = { - .num = htole16(MIC_VRING_ENTRIES), - }, -#if GSO_ENABLED - .host_features = htole32( - 1 << VIRTIO_NET_F_CSUM | - 1 << VIRTIO_NET_F_GSO | - 1 << VIRTIO_NET_F_GUEST_TSO4 | - 1 << VIRTIO_NET_F_GUEST_TSO6 | - 1 << VIRTIO_NET_F_GUEST_ECN | - 1 << VIRTIO_NET_F_GUEST_UFO), -#else - .host_features = 0, -#endif -}; - -static const char *mic_config_dir = "/etc/sysconfig/mic"; -static const char *virtblk_backend = "VIRTBLK_BACKEND"; -static struct { - struct mic_device_desc dd; - struct mic_vqconfig vqconfig[1]; - __u32 host_features, guest_acknowledgements; - struct virtio_blk_config blk_config; -} virtblk_dev_page = { - .dd = { - .type = VIRTIO_ID_BLOCK, - .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig), - .feature_len = sizeof(virtblk_dev_page.host_features), - .config_len = sizeof(virtblk_dev_page.blk_config), - }, - .vqconfig[0] = { - .num = htole16(MIC_VRING_ENTRIES), - }, - .host_features = - htole32(1<<VIRTIO_BLK_F_SEG_MAX), - .blk_config = { - .seg_max = htole32(MIC_VRING_ENTRIES - 2), - .capacity = htole64(0), - } -}; - -static char *myname; - -static int -tap_configure(struct mic_info *mic, char *dev) -{ - pid_t pid; - char *ifargv[7]; - char ipaddr[IFNAMSIZ]; - int ret = 0; - - pid = fork(); - if (pid == 0) { - ifargv[0] = "ip"; - ifargv[1] = "link"; - ifargv[2] = "set"; - ifargv[3] = dev; - ifargv[4] = "up"; - ifargv[5] = NULL; - mpsslog("Configuring %s\n", dev); - ret = execvp("ip", ifargv); - if (ret < 0) { - mpsslog("%s execvp failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - } - if (pid < 0) { - mpsslog("%s fork failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - - ret = waitpid(pid, NULL, 0); - if (ret < 0) { - mpsslog("%s waitpid failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - - snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id); - - pid = fork(); - if (pid == 0) { - ifargv[0] = "ip"; - ifargv[1] = "addr"; - ifargv[2] = "add"; - ifargv[3] = ipaddr; - ifargv[4] = "dev"; - ifargv[5] = dev; - ifargv[6] = NULL; - mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr); - ret = execvp("ip", ifargv); - if (ret < 0) { - mpsslog("%s execvp failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - } - if (pid < 0) { - mpsslog("%s fork failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - - ret = waitpid(pid, NULL, 0); - if (ret < 0) { - mpsslog("%s waitpid failed errno %s\n", - mic->name, strerror(errno)); - return ret; - } - mpsslog("MIC name %s %s %d DONE!\n", - mic->name, __func__, __LINE__); - return 0; -} - -static int tun_alloc(struct mic_info *mic, char *dev) -{ - struct ifreq ifr; - int fd, err; -#if GSO_ENABLED - unsigned offload; -#endif - fd = open("/dev/net/tun", O_RDWR); - if (fd < 0) { - mpsslog("Could not open /dev/net/tun %s\n", strerror(errno)); - goto done; - } - - memset(&ifr, 0, sizeof(ifr)); - - ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; - if (*dev) - strncpy(ifr.ifr_name, dev, IFNAMSIZ); - - err = ioctl(fd, TUNSETIFF, (void *)&ifr); - if (err < 0) { - mpsslog("%s %s %d TUNSETIFF failed %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - close(fd); - return err; - } -#if GSO_ENABLED - offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | - TUN_F_TSO_ECN | TUN_F_UFO; - - err = ioctl(fd, TUNSETOFFLOAD, offload); - if (err < 0) { - mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n", - mic->name, __func__, __LINE__, strerror(errno)); - close(fd); - return err; - } -#endif - strcpy(dev, ifr.ifr_name); - mpsslog("Created TAP %s\n", dev); -done: - return fd; -} - -#define NET_FD_VIRTIO_NET 0 -#define NET_FD_TUN 1 -#define MAX_NET_FD 2 - -static void set_dp(struct mic_info *mic, int type, void *dp) -{ - switch (type) { - case VIRTIO_ID_CONSOLE: - mic->mic_console.console_dp = dp; - return; - case VIRTIO_ID_NET: - mic->mic_net.net_dp = dp; - return; - case VIRTIO_ID_BLOCK: - mic->mic_virtblk.block_dp = dp; - return; - } - mpsslog("%s %s %d not found\n", mic->name, __func__, type); - assert(0); -} - -static void *get_dp(struct mic_info *mic, int type) -{ - switch (type) { - case VIRTIO_ID_CONSOLE: - return mic->mic_console.console_dp; - case VIRTIO_ID_NET: - return mic->mic_net.net_dp; - case VIRTIO_ID_BLOCK: - return mic->mic_virtblk.block_dp; - } - mpsslog("%s %s %d not found\n", mic->name, __func__, type); - assert(0); - return NULL; -} - -static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type) -{ - struct mic_device_desc *d; - int i; - void *dp = get_dp(mic, type); - - for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE; - i += mic_total_desc_size(d)) { - d = dp + i; - - /* End of list */ - if (d->type == 0) - break; - - if (d->type == -1) - continue; - - mpsslog("%s %s d-> type %d d %p\n", - mic->name, __func__, d->type, d); - - if (d->type == (__u8)type) - return d; - } - mpsslog("%s %s %d not found\n", mic->name, __func__, type); - assert(0); - return NULL; -} - -/* See comments in vhost.c for explanation of next_desc() */ -static unsigned next_desc(struct vring_desc *desc) -{ - unsigned int next; - - if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) - return -1U; - next = le16toh(desc->next); - return next; -} - -/* Sum up all the IOVEC length */ -static ssize_t -sum_iovec_len(struct mic_copy_desc *copy) -{ - ssize_t sum = 0; - int i; - - for (i = 0; i < copy->iovcnt; i++) - sum += copy->iov[i].iov_len; - return sum; -} - -static inline void verify_out_len(struct mic_info *mic, - struct mic_copy_desc *copy) -{ - if (copy->out_len != sum_iovec_len(copy)) { - mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n", - mic->name, __func__, __LINE__, - copy->out_len, sum_iovec_len(copy)); - assert(copy->out_len == sum_iovec_len(copy)); - } -} - -/* Display an iovec */ -static void -disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy, - const char *s, int line) -{ - int i; - - for (i = 0; i < copy->iovcnt; i++) - mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n", - mic->name, s, line, i, - copy->iov[i].iov_base, copy->iov[i].iov_len); -} - -static inline __u16 read_avail_idx(struct mic_vring *vr) -{ - return ACCESS_ONCE(vr->info->avail_idx); -} - -static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr, - struct mic_copy_desc *copy, ssize_t len) -{ - copy->vr_idx = tx ? 0 : 1; - copy->update_used = true; - if (type == VIRTIO_ID_NET) - copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr); - else - copy->iov[0].iov_len = len; -} - -/* Central API which triggers the copies */ -static int -mic_virtio_copy(struct mic_info *mic, int fd, - struct mic_vring *vr, struct mic_copy_desc *copy) -{ - int ret; - - ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy); - if (ret) { - mpsslog("%s %s %d errno %s ret %d\n", - mic->name, __func__, __LINE__, - strerror(errno), ret); - } - return ret; -} - -/* - * This initialization routine requires at least one - * vring i.e. vr0. vr1 is optional. - */ -static void * -init_vr(struct mic_info *mic, int fd, int type, - struct mic_vring *vr0, struct mic_vring *vr1, int num_vq) -{ - int vr_size; - char *va; - - vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES, - MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info)); - va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq, - PROT_READ, MAP_SHARED, fd, 0); - if (MAP_FAILED == va) { - mpsslog("%s %s %d mmap failed errno %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - goto done; - } - set_dp(mic, type, va); - vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END]; - vr0->info = vr0->va + - vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN); - vring_init(&vr0->vr, - MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN); - mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ", - __func__, mic->name, vr0->va, vr0->info, vr_size, - vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); - mpsslog("magic 0x%x expected 0x%x\n", - le32toh(vr0->info->magic), MIC_MAGIC + type); - assert(le32toh(vr0->info->magic) == MIC_MAGIC + type); - if (vr1) { - vr1->va = (struct mic_vring *) - &va[MIC_DEVICE_PAGE_END + vr_size]; - vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES, - MIC_VIRTIO_RING_ALIGN); - vring_init(&vr1->vr, - MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN); - mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ", - __func__, mic->name, vr1->va, vr1->info, vr_size, - vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); - mpsslog("magic 0x%x expected 0x%x\n", - le32toh(vr1->info->magic), MIC_MAGIC + type + 1); - assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1); - } -done: - return va; -} - -static void -wait_for_card_driver(struct mic_info *mic, int fd, int type) -{ - struct pollfd pollfd; - int err; - struct mic_device_desc *desc = get_device_desc(mic, type); - - pollfd.fd = fd; - mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n", - mic->name, __func__, type, desc->status); - while (1) { - pollfd.events = POLLIN; - pollfd.revents = 0; - err = poll(&pollfd, 1, -1); - if (err < 0) { - mpsslog("%s %s poll failed %s\n", - mic->name, __func__, strerror(errno)); - continue; - } - - if (pollfd.revents) { - mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n", - mic->name, __func__, type, desc->status); - if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { - mpsslog("%s %s poll.revents %d\n", - mic->name, __func__, pollfd.revents); - mpsslog("%s %s desc-> type %d status 0x%x\n", - mic->name, __func__, type, - desc->status); - break; - } - } - } -} - -/* Spin till we have some descriptors */ -static void -spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr) -{ - __u16 avail_idx = read_avail_idx(vr); - - while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) { -#ifdef DEBUG - mpsslog("%s %s waiting for desc avail %d info_avail %d\n", - mic->name, __func__, - le16toh(vr->vr.avail->idx), vr->info->avail_idx); -#endif - sched_yield(); - } -} - -static void * -virtio_net(void *arg) -{ - static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)]; - static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64))); - struct iovec vnet_iov[2][2] = { - { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) }, - { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } }, - { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) }, - { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } }, - }; - struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1]; - struct mic_info *mic = (struct mic_info *)arg; - char if_name[IFNAMSIZ]; - struct pollfd net_poll[MAX_NET_FD]; - struct mic_vring tx_vr, rx_vr; - struct mic_copy_desc copy; - struct mic_device_desc *desc; - int err; - - snprintf(if_name, IFNAMSIZ, "mic%d", mic->id); - mic->mic_net.tap_fd = tun_alloc(mic, if_name); - if (mic->mic_net.tap_fd < 0) - goto done; - - if (tap_configure(mic, if_name)) - goto done; - mpsslog("MIC name %s id %d\n", mic->name, mic->id); - - net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd; - net_poll[NET_FD_VIRTIO_NET].events = POLLIN; - net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd; - net_poll[NET_FD_TUN].events = POLLIN; - - if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd, - VIRTIO_ID_NET, &tx_vr, &rx_vr, - virtnet_dev_page.dd.num_vq)) { - mpsslog("%s init_vr failed %s\n", - mic->name, strerror(errno)); - goto done; - } - - copy.iovcnt = 2; - desc = get_device_desc(mic, VIRTIO_ID_NET); - - while (1) { - ssize_t len; - - net_poll[NET_FD_VIRTIO_NET].revents = 0; - net_poll[NET_FD_TUN].revents = 0; - - /* Start polling for data from tap and virtio net */ - err = poll(net_poll, 2, -1); - if (err < 0) { - mpsslog("%s poll failed %s\n", - __func__, strerror(errno)); - continue; - } - if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) - wait_for_card_driver(mic, mic->mic_net.virtio_net_fd, - VIRTIO_ID_NET); - /* - * Check if there is data to be read from TUN and write to - * virtio net fd if there is. - */ - if (net_poll[NET_FD_TUN].revents & POLLIN) { - copy.iov = iov0; - len = readv(net_poll[NET_FD_TUN].fd, - copy.iov, copy.iovcnt); - if (len > 0) { - struct virtio_net_hdr *hdr - = (struct virtio_net_hdr *)vnet_hdr[0]; - - /* Disable checksums on the card since we are on - a reliable PCIe link */ - hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; -#ifdef DEBUG - mpsslog("%s %s %d hdr->flags 0x%x ", mic->name, - __func__, __LINE__, hdr->flags); - mpsslog("copy.out_len %d hdr->gso_type 0x%x\n", - copy.out_len, hdr->gso_type); -#endif -#ifdef DEBUG - disp_iovec(mic, copy, __func__, __LINE__); - mpsslog("%s %s %d read from tap 0x%lx\n", - mic->name, __func__, __LINE__, - len); -#endif - spin_for_descriptors(mic, &tx_vr); - txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©, - len); - - err = mic_virtio_copy(mic, - mic->mic_net.virtio_net_fd, &tx_vr, - ©); - if (err < 0) { - mpsslog("%s %s %d mic_virtio_copy %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - } - if (!err) - verify_out_len(mic, ©); -#ifdef DEBUG - disp_iovec(mic, copy, __func__, __LINE__); - mpsslog("%s %s %d wrote to net 0x%lx\n", - mic->name, __func__, __LINE__, - sum_iovec_len(©)); -#endif - /* Reinitialize IOV for next run */ - iov0[1].iov_len = MAX_NET_PKT_SIZE; - } else if (len < 0) { - disp_iovec(mic, ©, __func__, __LINE__); - mpsslog("%s %s %d read failed %s ", mic->name, - __func__, __LINE__, strerror(errno)); - mpsslog("cnt %d sum %zd\n", - copy.iovcnt, sum_iovec_len(©)); - } - } - - /* - * Check if there is data to be read from virtio net and - * write to TUN if there is. - */ - if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) { - while (rx_vr.info->avail_idx != - le16toh(rx_vr.vr.avail->idx)) { - copy.iov = iov1; - txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©, - MAX_NET_PKT_SIZE - + sizeof(struct virtio_net_hdr)); - - err = mic_virtio_copy(mic, - mic->mic_net.virtio_net_fd, &rx_vr, - ©); - if (!err) { -#ifdef DEBUG - struct virtio_net_hdr *hdr - = (struct virtio_net_hdr *) - vnet_hdr[1]; - - mpsslog("%s %s %d hdr->flags 0x%x, ", - mic->name, __func__, __LINE__, - hdr->flags); - mpsslog("out_len %d gso_type 0x%x\n", - copy.out_len, - hdr->gso_type); -#endif - /* Set the correct output iov_len */ - iov1[1].iov_len = copy.out_len - - sizeof(struct virtio_net_hdr); - verify_out_len(mic, ©); -#ifdef DEBUG - disp_iovec(mic, copy, __func__, - __LINE__); - mpsslog("%s %s %d ", - mic->name, __func__, __LINE__); - mpsslog("read from net 0x%lx\n", - sum_iovec_len(copy)); -#endif - len = writev(net_poll[NET_FD_TUN].fd, - copy.iov, copy.iovcnt); - if (len != sum_iovec_len(©)) { - mpsslog("Tun write failed %s ", - strerror(errno)); - mpsslog("len 0x%zx ", len); - mpsslog("read_len 0x%zx\n", - sum_iovec_len(©)); - } else { -#ifdef DEBUG - disp_iovec(mic, ©, __func__, - __LINE__); - mpsslog("%s %s %d ", - mic->name, __func__, - __LINE__); - mpsslog("wrote to tap 0x%lx\n", - len); -#endif - } - } else { - mpsslog("%s %s %d mic_virtio_copy %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - break; - } - } - } - if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR) - mpsslog("%s: %s: POLLERR\n", __func__, mic->name); - } -done: - pthread_exit(NULL); -} - -/* virtio_console */ -#define VIRTIO_CONSOLE_FD 0 -#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1) -#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */ -#define MAX_BUFFER_SIZE PAGE_SIZE - -static void * -virtio_console(void *arg) -{ - static __u8 vcons_buf[2][PAGE_SIZE]; - struct iovec vcons_iov[2] = { - { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) }, - { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) }, - }; - struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1]; - struct mic_info *mic = (struct mic_info *)arg; - int err; - struct pollfd console_poll[MAX_CONSOLE_FD]; - int pty_fd; - char *pts_name; - ssize_t len; - struct mic_vring tx_vr, rx_vr; - struct mic_copy_desc copy; - struct mic_device_desc *desc; - - pty_fd = posix_openpt(O_RDWR); - if (pty_fd < 0) { - mpsslog("can't open a pseudoterminal master device: %s\n", - strerror(errno)); - goto _return; - } - pts_name = ptsname(pty_fd); - if (pts_name == NULL) { - mpsslog("can't get pts name\n"); - goto _close_pty; - } - printf("%s console message goes to %s\n", mic->name, pts_name); - mpsslog("%s console message goes to %s\n", mic->name, pts_name); - err = grantpt(pty_fd); - if (err < 0) { - mpsslog("can't grant access: %s %s\n", - pts_name, strerror(errno)); - goto _close_pty; - } - err = unlockpt(pty_fd); - if (err < 0) { - mpsslog("can't unlock a pseudoterminal: %s %s\n", - pts_name, strerror(errno)); - goto _close_pty; - } - console_poll[MONITOR_FD].fd = pty_fd; - console_poll[MONITOR_FD].events = POLLIN; - - console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd; - console_poll[VIRTIO_CONSOLE_FD].events = POLLIN; - - if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd, - VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr, - virtcons_dev_page.dd.num_vq)) { - mpsslog("%s init_vr failed %s\n", - mic->name, strerror(errno)); - goto _close_pty; - } - - copy.iovcnt = 1; - desc = get_device_desc(mic, VIRTIO_ID_CONSOLE); - - for (;;) { - console_poll[MONITOR_FD].revents = 0; - console_poll[VIRTIO_CONSOLE_FD].revents = 0; - err = poll(console_poll, MAX_CONSOLE_FD, -1); - if (err < 0) { - mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__, - strerror(errno)); - continue; - } - if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) - wait_for_card_driver(mic, - mic->mic_console.virtio_console_fd, - VIRTIO_ID_CONSOLE); - - if (console_poll[MONITOR_FD].revents & POLLIN) { - copy.iov = iov0; - len = readv(pty_fd, copy.iov, copy.iovcnt); - if (len > 0) { -#ifdef DEBUG - disp_iovec(mic, copy, __func__, __LINE__); - mpsslog("%s %s %d read from tap 0x%lx\n", - mic->name, __func__, __LINE__, - len); -#endif - spin_for_descriptors(mic, &tx_vr); - txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr, - ©, len); - - err = mic_virtio_copy(mic, - mic->mic_console.virtio_console_fd, - &tx_vr, ©); - if (err < 0) { - mpsslog("%s %s %d mic_virtio_copy %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - } - if (!err) - verify_out_len(mic, ©); -#ifdef DEBUG - disp_iovec(mic, copy, __func__, __LINE__); - mpsslog("%s %s %d wrote to net 0x%lx\n", - mic->name, __func__, __LINE__, - sum_iovec_len(copy)); -#endif - /* Reinitialize IOV for next run */ - iov0->iov_len = PAGE_SIZE; - } else if (len < 0) { - disp_iovec(mic, ©, __func__, __LINE__); - mpsslog("%s %s %d read failed %s ", - mic->name, __func__, __LINE__, - strerror(errno)); - mpsslog("cnt %d sum %zd\n", - copy.iovcnt, sum_iovec_len(©)); - } - } - - if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) { - while (rx_vr.info->avail_idx != - le16toh(rx_vr.vr.avail->idx)) { - copy.iov = iov1; - txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr, - ©, PAGE_SIZE); - - err = mic_virtio_copy(mic, - mic->mic_console.virtio_console_fd, - &rx_vr, ©); - if (!err) { - /* Set the correct output iov_len */ - iov1->iov_len = copy.out_len; - verify_out_len(mic, ©); -#ifdef DEBUG - disp_iovec(mic, copy, __func__, - __LINE__); - mpsslog("%s %s %d ", - mic->name, __func__, __LINE__); - mpsslog("read from net 0x%lx\n", - sum_iovec_len(copy)); -#endif - len = writev(pty_fd, - copy.iov, copy.iovcnt); - if (len != sum_iovec_len(©)) { - mpsslog("Tun write failed %s ", - strerror(errno)); - mpsslog("len 0x%zx ", len); - mpsslog("read_len 0x%zx\n", - sum_iovec_len(©)); - } else { -#ifdef DEBUG - disp_iovec(mic, copy, __func__, - __LINE__); - mpsslog("%s %s %d ", - mic->name, __func__, - __LINE__); - mpsslog("wrote to tap 0x%lx\n", - len); -#endif - } - } else { - mpsslog("%s %s %d mic_virtio_copy %s\n", - mic->name, __func__, __LINE__, - strerror(errno)); - break; - } - } - } - if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR) - mpsslog("%s: %s: POLLERR\n", __func__, mic->name); - } -_close_pty: - close(pty_fd); -_return: - pthread_exit(NULL); -} - -static void -add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd) -{ - char path[PATH_MAX]; - int fd, err; - - snprintf(path, PATH_MAX, "/dev/mic%d", mic->id); - fd = open(path, O_RDWR); - if (fd < 0) { - mpsslog("Could not open %s %s\n", path, strerror(errno)); - return; - } - - err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd); - if (err < 0) { - mpsslog("Could not add %d %s\n", dd->type, strerror(errno)); - close(fd); - return; - } - switch (dd->type) { - case VIRTIO_ID_NET: - mic->mic_net.virtio_net_fd = fd; - mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name); - break; - case VIRTIO_ID_CONSOLE: - mic->mic_console.virtio_console_fd = fd; - mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name); - break; - case VIRTIO_ID_BLOCK: - mic->mic_virtblk.virtio_block_fd = fd; - mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name); - break; - } -} - -static bool -set_backend_file(struct mic_info *mic) -{ - FILE *config; - char buff[PATH_MAX], *line, *evv, *p; - - snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id); - config = fopen(buff, "r"); - if (config == NULL) - return false; - do { /* look for "virtblk_backend=XXXX" */ - line = fgets(buff, PATH_MAX, config); - if (line == NULL) - break; - if (*line == '#') - continue; - p = strchr(line, '\n'); - if (p) - *p = '\0'; - } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0); - fclose(config); - if (line == NULL) - return false; - evv = strchr(line, '='); - if (evv == NULL) - return false; - mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1); - if (mic->mic_virtblk.backend_file == NULL) { - mpsslog("%s %d can't allocate memory\n", mic->name, mic->id); - return false; - } - strcpy(mic->mic_virtblk.backend_file, evv + 1); - return true; -} - -#define SECTOR_SIZE 512 -static bool -set_backend_size(struct mic_info *mic) -{ - mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0, - SEEK_END); - if (mic->mic_virtblk.backend_size < 0) { - mpsslog("%s: can't seek: %s\n", - mic->name, mic->mic_virtblk.backend_file); - return false; - } - virtblk_dev_page.blk_config.capacity = - mic->mic_virtblk.backend_size / SECTOR_SIZE; - if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0) - virtblk_dev_page.blk_config.capacity++; - - virtblk_dev_page.blk_config.capacity = - htole64(virtblk_dev_page.blk_config.capacity); - - return true; -} - -static bool -open_backend(struct mic_info *mic) -{ - if (!set_backend_file(mic)) - goto _error_exit; - mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR); - if (mic->mic_virtblk.backend < 0) { - mpsslog("%s: can't open: %s\n", mic->name, - mic->mic_virtblk.backend_file); - goto _error_free; - } - if (!set_backend_size(mic)) - goto _error_close; - mic->mic_virtblk.backend_addr = mmap(NULL, - mic->mic_virtblk.backend_size, - PROT_READ|PROT_WRITE, MAP_SHARED, - mic->mic_virtblk.backend, 0L); - if (mic->mic_virtblk.backend_addr == MAP_FAILED) { - mpsslog("%s: can't map: %s %s\n", - mic->name, mic->mic_virtblk.backend_file, - strerror(errno)); - goto _error_close; - } - return true; - - _error_close: - close(mic->mic_virtblk.backend); - _error_free: - free(mic->mic_virtblk.backend_file); - _error_exit: - return false; -} - -static void -close_backend(struct mic_info *mic) -{ - munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size); - close(mic->mic_virtblk.backend); - free(mic->mic_virtblk.backend_file); -} - -static bool -start_virtblk(struct mic_info *mic, struct mic_vring *vring) -{ - if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) { - mpsslog("%s: blk_config is not 8 byte aligned.\n", - mic->name); - return false; - } - add_virtio_device(mic, &virtblk_dev_page.dd); - if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd, - VIRTIO_ID_BLOCK, vring, NULL, - virtblk_dev_page.dd.num_vq)) { - mpsslog("%s init_vr failed %s\n", - mic->name, strerror(errno)); - return false; - } - return true; -} - -static void -stop_virtblk(struct mic_info *mic) -{ - int vr_size, ret; - - vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES, - MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info)); - ret = munmap(mic->mic_virtblk.block_dp, - MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq); - if (ret < 0) - mpsslog("%s munmap errno %d\n", mic->name, errno); - close(mic->mic_virtblk.virtio_block_fd); -} - -static __u8 -header_error_check(struct vring_desc *desc) -{ - if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) { - mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n", - __func__, __LINE__); - return -EIO; - } - if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) { - mpsslog("%s() %d: alone\n", - __func__, __LINE__); - return -EIO; - } - if (le16toh(desc->flags) & VRING_DESC_F_WRITE) { - mpsslog("%s() %d: not read\n", - __func__, __LINE__); - return -EIO; - } - return 0; -} - -static int -read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx) -{ - struct iovec iovec; - struct mic_copy_desc copy; - - iovec.iov_len = sizeof(*hdr); - iovec.iov_base = hdr; - copy.iov = &iovec; - copy.iovcnt = 1; - copy.vr_idx = 0; /* only one vring on virtio_block */ - copy.update_used = false; /* do not update used index */ - return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); -} - -static int -transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt) -{ - struct mic_copy_desc copy; - - copy.iov = iovec; - copy.iovcnt = iovcnt; - copy.vr_idx = 0; /* only one vring on virtio_block */ - copy.update_used = false; /* do not update used index */ - return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); -} - -static __u8 -status_error_check(struct vring_desc *desc) -{ - if (le32toh(desc->len) != sizeof(__u8)) { - mpsslog("%s() %d: length is not sizeof(status)\n", - __func__, __LINE__); - return -EIO; - } - return 0; -} - -static int -write_status(int fd, __u8 *status) -{ - struct iovec iovec; - struct mic_copy_desc copy; - - iovec.iov_base = status; - iovec.iov_len = sizeof(*status); - copy.iov = &iovec; - copy.iovcnt = 1; - copy.vr_idx = 0; /* only one vring on virtio_block */ - copy.update_used = true; /* Update used index */ - return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); -} - -static void * -virtio_block(void *arg) -{ - struct mic_info *mic = (struct mic_info *)arg; - int ret; - struct pollfd block_poll; - struct mic_vring vring; - __u16 avail_idx; - __u32 desc_idx; - struct vring_desc *desc; - struct iovec *iovec, *piov; - __u8 status; - __u32 buffer_desc_idx; - struct virtio_blk_outhdr hdr; - void *fos; - - for (;;) { /* forever */ - if (!open_backend(mic)) { /* No virtblk */ - for (mic->mic_virtblk.signaled = 0; - !mic->mic_virtblk.signaled;) - sleep(1); - continue; - } - - /* backend file is specified. */ - if (!start_virtblk(mic, &vring)) - goto _close_backend; - iovec = malloc(sizeof(*iovec) * - le32toh(virtblk_dev_page.blk_config.seg_max)); - if (!iovec) { - mpsslog("%s: can't alloc iovec: %s\n", - mic->name, strerror(ENOMEM)); - goto _stop_virtblk; - } - - block_poll.fd = mic->mic_virtblk.virtio_block_fd; - block_poll.events = POLLIN; - for (mic->mic_virtblk.signaled = 0; - !mic->mic_virtblk.signaled;) { - block_poll.revents = 0; - /* timeout in 1 sec to see signaled */ - ret = poll(&block_poll, 1, 1000); - if (ret < 0) { - mpsslog("%s %d: poll failed: %s\n", - __func__, __LINE__, - strerror(errno)); - continue; - } - - if (!(block_poll.revents & POLLIN)) { -#ifdef DEBUG - mpsslog("%s %d: block_poll.revents=0x%x\n", - __func__, __LINE__, block_poll.revents); -#endif - continue; - } - - /* POLLIN */ - while (vring.info->avail_idx != - le16toh(vring.vr.avail->idx)) { - /* read header element */ - avail_idx = - vring.info->avail_idx & - (vring.vr.num - 1); - desc_idx = le16toh( - vring.vr.avail->ring[avail_idx]); - desc = &vring.vr.desc[desc_idx]; -#ifdef DEBUG - mpsslog("%s() %d: avail_idx=%d ", - __func__, __LINE__, - vring.info->avail_idx); - mpsslog("vring.vr.num=%d desc=%p\n", - vring.vr.num, desc); -#endif - status = header_error_check(desc); - ret = read_header( - mic->mic_virtblk.virtio_block_fd, - &hdr, desc_idx); - if (ret < 0) { - mpsslog("%s() %d %s: ret=%d %s\n", - __func__, __LINE__, - mic->name, ret, - strerror(errno)); - break; - } - /* buffer element */ - piov = iovec; - status = 0; - fos = mic->mic_virtblk.backend_addr + - (hdr.sector * SECTOR_SIZE); - buffer_desc_idx = next_desc(desc); - desc_idx = buffer_desc_idx; - for (desc = &vring.vr.desc[buffer_desc_idx]; - desc->flags & VRING_DESC_F_NEXT; - desc_idx = next_desc(desc), - desc = &vring.vr.desc[desc_idx]) { - piov->iov_len = desc->len; - piov->iov_base = fos; - piov++; - fos += desc->len; - } - /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */ - if (hdr.type & ~(VIRTIO_BLK_T_OUT | - VIRTIO_BLK_T_GET_ID)) { - /* - VIRTIO_BLK_T_IN - does not do - anything. Probably for documenting. - VIRTIO_BLK_T_SCSI_CMD - for - virtio_scsi. - VIRTIO_BLK_T_FLUSH - turned off in - config space. - VIRTIO_BLK_T_BARRIER - defined but not - used in anywhere. - */ - mpsslog("%s() %d: type %x ", - __func__, __LINE__, - hdr.type); - mpsslog("is not supported\n"); - status = -ENOTSUP; - - } else { - ret = transfer_blocks( - mic->mic_virtblk.virtio_block_fd, - iovec, - piov - iovec); - if (ret < 0 && - status != 0) - status = ret; - } - /* write status and update used pointer */ - if (status != 0) - status = status_error_check(desc); - ret = write_status( - mic->mic_virtblk.virtio_block_fd, - &status); -#ifdef DEBUG - mpsslog("%s() %d: write status=%d on desc=%p\n", - __func__, __LINE__, - status, desc); -#endif - } - } - free(iovec); -_stop_virtblk: - stop_virtblk(mic); -_close_backend: - close_backend(mic); - } /* forever */ - - pthread_exit(NULL); -} - -static void -reset(struct mic_info *mic) -{ -#define RESET_TIMEOUT 120 - int i = RESET_TIMEOUT; - setsysfs(mic->name, "state", "reset"); - while (i) { - char *state; - state = readsysfs(mic->name, "state"); - if (!state) - goto retry; - mpsslog("%s: %s %d state %s\n", - mic->name, __func__, __LINE__, state); - - /* - * If the shutdown was initiated by OSPM, the state stays - * in "suspended" which is also a valid condition for reset. - */ - if ((!strcmp(state, "offline")) || - (!strcmp(state, "suspended"))) { - free(state); - break; - } - free(state); -retry: - sleep(1); - i--; - } -} - -static int -get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status) -{ - if (!strcmp(shutdown_status, "nop")) - return MIC_NOP; - if (!strcmp(shutdown_status, "crashed")) - return MIC_CRASHED; - if (!strcmp(shutdown_status, "halted")) - return MIC_HALTED; - if (!strcmp(shutdown_status, "poweroff")) - return MIC_POWER_OFF; - if (!strcmp(shutdown_status, "restart")) - return MIC_RESTART; - mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status); - /* Invalid state */ - assert(0); -}; - -static int get_mic_state(struct mic_info *mic, char *state) -{ - if (!strcmp(state, "offline")) - return MIC_OFFLINE; - if (!strcmp(state, "online")) - return MIC_ONLINE; - if (!strcmp(state, "shutting_down")) - return MIC_SHUTTING_DOWN; - if (!strcmp(state, "reset_failed")) - return MIC_RESET_FAILED; - if (!strcmp(state, "suspending")) - return MIC_SUSPENDING; - if (!strcmp(state, "suspended")) - return MIC_SUSPENDED; - mpsslog("%s: BUG invalid state %s\n", mic->name, state); - /* Invalid state */ - assert(0); -}; - -static void mic_handle_shutdown(struct mic_info *mic) -{ -#define SHUTDOWN_TIMEOUT 60 - int i = SHUTDOWN_TIMEOUT, ret, stat = 0; - char *shutdown_status; - while (i) { - shutdown_status = readsysfs(mic->name, "shutdown_status"); - if (!shutdown_status) - continue; - mpsslog("%s: %s %d shutdown_status %s\n", - mic->name, __func__, __LINE__, shutdown_status); - switch (get_mic_shutdown_status(mic, shutdown_status)) { - case MIC_RESTART: - mic->restart = 1; - case MIC_HALTED: - case MIC_POWER_OFF: - case MIC_CRASHED: - free(shutdown_status); - goto reset; - default: - break; - } - free(shutdown_status); - sleep(1); - i--; - } -reset: - ret = kill(mic->pid, SIGTERM); - mpsslog("%s: %s %d kill pid %d ret %d\n", - mic->name, __func__, __LINE__, - mic->pid, ret); - if (!ret) { - ret = waitpid(mic->pid, &stat, - WIFSIGNALED(stat)); - mpsslog("%s: %s %d waitpid ret %d pid %d\n", - mic->name, __func__, __LINE__, - ret, mic->pid); - } - if (ret == mic->pid) - reset(mic); -} - -static void * -mic_config(void *arg) -{ - struct mic_info *mic = (struct mic_info *)arg; - char *state = NULL; - char pathname[PATH_MAX]; - int fd, ret; - struct pollfd ufds[1]; - char value[4096]; - - snprintf(pathname, PATH_MAX - 1, "%s/%s/%s", - MICSYSFSDIR, mic->name, "state"); - - fd = open(pathname, O_RDONLY); - if (fd < 0) { - mpsslog("%s: opening file %s failed %s\n", - mic->name, pathname, strerror(errno)); - goto error; - } - - do { - ret = lseek(fd, 0, SEEK_SET); - if (ret < 0) { - mpsslog("%s: Failed to seek to file start '%s': %s\n", - mic->name, pathname, strerror(errno)); - goto close_error1; - } - ret = read(fd, value, sizeof(value)); - if (ret < 0) { - mpsslog("%s: Failed to read sysfs entry '%s': %s\n", - mic->name, pathname, strerror(errno)); - goto close_error1; - } -retry: - state = readsysfs(mic->name, "state"); - if (!state) - goto retry; - mpsslog("%s: %s %d state %s\n", - mic->name, __func__, __LINE__, state); - switch (get_mic_state(mic, state)) { - case MIC_SHUTTING_DOWN: - mic_handle_shutdown(mic); - goto close_error; - case MIC_SUSPENDING: - mic->boot_on_resume = 1; - setsysfs(mic->name, "state", "suspend"); - mic_handle_shutdown(mic); - goto close_error; - case MIC_OFFLINE: - if (mic->boot_on_resume) { - setsysfs(mic->name, "state", "boot"); - mic->boot_on_resume = 0; - } - break; - default: - break; - } - free(state); - - ufds[0].fd = fd; - ufds[0].events = POLLERR | POLLPRI; - ret = poll(ufds, 1, -1); - if (ret < 0) { - mpsslog("%s: poll failed %s\n", - mic->name, strerror(errno)); - goto close_error1; - } - } while (1); -close_error: - free(state); -close_error1: - close(fd); -error: - init_mic(mic); - pthread_exit(NULL); -} - -static void -set_cmdline(struct mic_info *mic) -{ - char buffer[PATH_MAX]; - int len; - - len = snprintf(buffer, PATH_MAX, - "clocksource=tsc highres=off nohz=off "); - len += snprintf(buffer + len, PATH_MAX - len, - "cpufreq_on;corec6_off;pc3_off;pc6_off "); - len += snprintf(buffer + len, PATH_MAX - len, - "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0", - mic->id); - - setsysfs(mic->name, "cmdline", buffer); - mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer); - snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id); - mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer); -} - -static void -set_log_buf_info(struct mic_info *mic) -{ - int fd; - off_t len; - char system_map[] = "/lib/firmware/mic/System.map"; - char *map, *temp, log_buf[17] = {'\0'}; - - fd = open(system_map, O_RDONLY); - if (fd < 0) { - mpsslog("%s: Opening System.map failed: %d\n", - mic->name, errno); - return; - } - len = lseek(fd, 0, SEEK_END); - if (len < 0) { - mpsslog("%s: Reading System.map size failed: %d\n", - mic->name, errno); - close(fd); - return; - } - map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); - if (map == MAP_FAILED) { - mpsslog("%s: mmap of System.map failed: %d\n", - mic->name, errno); - close(fd); - return; - } - temp = strstr(map, "__log_buf"); - if (!temp) { - mpsslog("%s: __log_buf not found: %d\n", mic->name, errno); - munmap(map, len); - close(fd); - return; - } - strncpy(log_buf, temp - 19, 16); - setsysfs(mic->name, "log_buf_addr", log_buf); - mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf); - temp = strstr(map, "log_buf_len"); - if (!temp) { - mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno); - munmap(map, len); - close(fd); - return; - } - strncpy(log_buf, temp - 19, 16); - setsysfs(mic->name, "log_buf_len", log_buf); - mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf); - munmap(map, len); - close(fd); -} - -static void init_mic(struct mic_info *mic); - -static void -change_virtblk_backend(int x, siginfo_t *siginfo, void *p) -{ - struct mic_info *mic; - - for (mic = mic_list.next; mic != NULL; mic = mic->next) - mic->mic_virtblk.signaled = 1/* true */; -} - -static void -init_mic(struct mic_info *mic) -{ - struct sigaction ignore = { - .sa_flags = 0, - .sa_handler = SIG_IGN - }; - struct sigaction act = { - .sa_flags = SA_SIGINFO, - .sa_sigaction = change_virtblk_backend, - }; - char buffer[PATH_MAX]; - int err; - - /* - * Currently, one virtio block device is supported for each MIC card - * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon. - * The signal informs the virtio block backend about a change in the - * configuration file which specifies the virtio backend file name on - * the host. Virtio block backend then re-reads the configuration file - * and switches to the new block device. This signalling mechanism may - * not be required once multiple virtio block devices are supported by - * the MIC daemon. - */ - sigaction(SIGUSR1, &ignore, NULL); - - mic->pid = fork(); - switch (mic->pid) { - case 0: - set_log_buf_info(mic); - set_cmdline(mic); - add_virtio_device(mic, &virtcons_dev_page.dd); - add_virtio_device(mic, &virtnet_dev_page.dd); - err = pthread_create(&mic->mic_console.console_thread, NULL, - virtio_console, mic); - if (err) - mpsslog("%s virtcons pthread_create failed %s\n", - mic->name, strerror(err)); - err = pthread_create(&mic->mic_net.net_thread, NULL, - virtio_net, mic); - if (err) - mpsslog("%s virtnet pthread_create failed %s\n", - mic->name, strerror(err)); - err = pthread_create(&mic->mic_virtblk.block_thread, NULL, - virtio_block, mic); - if (err) - mpsslog("%s virtblk pthread_create failed %s\n", - mic->name, strerror(err)); - sigemptyset(&act.sa_mask); - err = sigaction(SIGUSR1, &act, NULL); - if (err) - mpsslog("%s sigaction SIGUSR1 failed %s\n", - mic->name, strerror(errno)); - while (1) - sleep(60); - case -1: - mpsslog("fork failed MIC name %s id %d errno %d\n", - mic->name, mic->id, errno); - break; - default: - if (mic->restart) { - snprintf(buffer, PATH_MAX, "boot"); - setsysfs(mic->name, "state", buffer); - mpsslog("%s restarting mic %d\n", - mic->name, mic->restart); - mic->restart = 0; - } - pthread_create(&mic->config_thread, NULL, mic_config, mic); - } -} - -static void -start_daemon(void) -{ - struct mic_info *mic; - - for (mic = mic_list.next; mic != NULL; mic = mic->next) - init_mic(mic); - - while (1) - sleep(60); -} - -static int -init_mic_list(void) -{ - struct mic_info *mic = &mic_list; - struct dirent *file; - DIR *dp; - int cnt = 0; - - dp = opendir(MICSYSFSDIR); - if (!dp) - return 0; - - while ((file = readdir(dp)) != NULL) { - if (!strncmp(file->d_name, "mic", 3)) { - mic->next = calloc(1, sizeof(struct mic_info)); - if (mic->next) { - mic = mic->next; - mic->id = atoi(&file->d_name[3]); - mic->name = malloc(strlen(file->d_name) + 16); - if (mic->name) - strcpy(mic->name, file->d_name); - mpsslog("MIC name %s id %d\n", mic->name, - mic->id); - cnt++; - } - } - } - - closedir(dp); - return cnt; -} - -void -mpsslog(char *format, ...) -{ - va_list args; - char buffer[4096]; - char ts[52], *ts1; - time_t t; - - if (logfp == NULL) - return; - - va_start(args, format); - vsprintf(buffer, format, args); - va_end(args); - - time(&t); - ts1 = ctime_r(&t, ts); - ts1[strlen(ts1) - 1] = '\0'; - fprintf(logfp, "%s: %s", ts1, buffer); - - fflush(logfp); -} - -int -main(int argc, char *argv[]) -{ - int cnt; - pid_t pid; - - myname = argv[0]; - - logfp = fopen(LOGFILE_NAME, "a+"); - if (!logfp) { - fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME); - exit(1); - } - pid = fork(); - switch (pid) { - case 0: - break; - case -1: - exit(2); - default: - exit(0); - } - - mpsslog("MIC Daemon start\n"); - - cnt = init_mic_list(); - if (cnt == 0) { - mpsslog("MIC module not loaded\n"); - exit(3); - } - mpsslog("MIC found %d devices\n", cnt); - - start_daemon(); - - exit(0); -} diff --git a/Documentation/mic/mpssd/mpssd.h b/Documentation/mic/mpssd/mpssd.h deleted file mode 100644 index f5f18b15d9a0..000000000000 --- a/Documentation/mic/mpssd/mpssd.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC User Space Tools. - */ -#ifndef _MPSSD_H_ -#define _MPSSD_H_ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> -#include <dirent.h> -#include <libgen.h> -#include <pthread.h> -#include <stdarg.h> -#include <time.h> -#include <errno.h> -#include <sys/dir.h> -#include <sys/ioctl.h> -#include <sys/poll.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/mman.h> -#include <sys/utsname.h> -#include <sys/wait.h> -#include <netinet/in.h> -#include <arpa/inet.h> -#include <netdb.h> -#include <pthread.h> -#include <signal.h> -#include <limits.h> -#include <syslog.h> -#include <getopt.h> -#include <net/if.h> -#include <linux/if_tun.h> -#include <linux/if_tun.h> -#include <linux/virtio_ids.h> - -#define MICSYSFSDIR "/sys/class/mic" -#define LOGFILE_NAME "/var/log/mpssd" -#define PAGE_SIZE 4096 - -struct mic_console_info { - pthread_t console_thread; - int virtio_console_fd; - void *console_dp; -}; - -struct mic_net_info { - pthread_t net_thread; - int virtio_net_fd; - int tap_fd; - void *net_dp; -}; - -struct mic_virtblk_info { - pthread_t block_thread; - int virtio_block_fd; - void *block_dp; - volatile sig_atomic_t signaled; - char *backend_file; - int backend; - void *backend_addr; - long backend_size; -}; - -struct mic_info { - int id; - char *name; - pthread_t config_thread; - pid_t pid; - struct mic_console_info mic_console; - struct mic_net_info mic_net; - struct mic_virtblk_info mic_virtblk; - int restart; - int boot_on_resume; - struct mic_info *next; -}; - -__attribute__((format(printf, 1, 2))) -void mpsslog(char *format, ...); -char *readsysfs(char *dir, char *entry); -int setsysfs(char *dir, char *entry, char *value); -#endif diff --git a/Documentation/mic/mpssd/sysfs.c b/Documentation/mic/mpssd/sysfs.c deleted file mode 100644 index 8dd326936083..000000000000 --- a/Documentation/mic/mpssd/sysfs.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC User Space Tools. - */ - -#include "mpssd.h" - -#define PAGE_SIZE 4096 - -char * -readsysfs(char *dir, char *entry) -{ - char filename[PATH_MAX]; - char value[PAGE_SIZE]; - char *string = NULL; - int fd; - int len; - - if (dir == NULL) - snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); - else - snprintf(filename, PATH_MAX, - "%s/%s/%s", MICSYSFSDIR, dir, entry); - - fd = open(filename, O_RDONLY); - if (fd < 0) { - mpsslog("Failed to open sysfs entry '%s': %s\n", - filename, strerror(errno)); - return NULL; - } - - len = read(fd, value, sizeof(value)); - if (len < 0) { - mpsslog("Failed to read sysfs entry '%s': %s\n", - filename, strerror(errno)); - goto readsys_ret; - } - if (len == 0) - goto readsys_ret; - - value[len - 1] = '\0'; - - string = malloc(strlen(value) + 1); - if (string) - strcpy(string, value); - -readsys_ret: - close(fd); - return string; -} - -int -setsysfs(char *dir, char *entry, char *value) -{ - char filename[PATH_MAX]; - char *oldvalue; - int fd, ret = 0; - - if (dir == NULL) - snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); - else - snprintf(filename, PATH_MAX, "%s/%s/%s", - MICSYSFSDIR, dir, entry); - - oldvalue = readsysfs(dir, entry); - - fd = open(filename, O_RDWR); - if (fd < 0) { - ret = errno; - mpsslog("Failed to open sysfs entry '%s': %s\n", - filename, strerror(errno)); - goto done; - } - - if (!oldvalue || strcmp(value, oldvalue)) { - if (write(fd, value, strlen(value)) < 0) { - ret = errno; - mpsslog("Failed to write new sysfs entry '%s': %s\n", - filename, strerror(errno)); - } - } - close(fd); -done: - if (oldvalue) - free(oldvalue); - return ret; -} diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt deleted file mode 100644 index c6af4bac5aa8..000000000000 --- a/Documentation/networking/netlink_mmap.txt +++ /dev/null @@ -1,339 +0,0 @@ -This file documents how to use memory mapped I/O with netlink. - -Author: Patrick McHardy <kaber@xxxxxxxxx> - -Overview --------- - -Memory mapped netlink I/O can be used to increase throughput and decrease -overhead of unicast receive and transmit operations. Some netlink subsystems -require high throughput, these are mainly the netfilter subsystems -nfnetlink_queue and nfnetlink_log, but it can also help speed up large -dump operations of f.i. the routing database. - -Memory mapped netlink I/O used two circular ring buffers for RX and TX which -are mapped into the processes address space. - -The RX ring is used by the kernel to directly construct netlink messages into -user-space memory without copying them as done with regular socket I/O, -additionally as long as the ring contains messages no recvmsg() or poll() -syscalls have to be issued by user-space to get more message. - -The TX ring is used to process messages directly from user-space memory, the -kernel processes all messages contained in the ring using a single sendmsg() -call. - -Usage overview --------------- - -In order to use memory mapped netlink I/O, user-space needs three main changes: - -- ring setup -- conversion of the RX path to get messages from the ring instead of recvmsg() -- conversion of the TX path to construct messages into the ring - -Ring setup is done using setsockopt() to provide the ring parameters to the -kernel, then a call to mmap() to map the ring into the processes address space: - -- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params)); -- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params)); -- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) - -Usage of either ring is optional, but even if only the RX ring is used the -mapping still needs to be writable in order to update the frame status after -processing. - -Conversion of the reception path involves calling poll() on the file -descriptor, once the socket is readable the frames from the ring are -processed in order until no more messages are available, as indicated by -a status word in the frame header. - -On kernel side, in order to make use of memory mapped I/O on receive, the -originating netlink subsystem needs to support memory mapped I/O, otherwise -it will use an allocated socket buffer as usual and the contents will be - copied to the ring on transmission, nullifying most of the performance gains. -Dumps of kernel databases automatically support memory mapped I/O. - -Conversion of the transmit path involves changing message construction to -use memory from the TX ring instead of (usually) a buffer declared on the -stack and setting up the frame header appropriately. Optionally poll() can -be used to wait for free frames in the TX ring. - -Structured and definitions for using memory mapped I/O are contained in -<linux/netlink.h>. - -RX and TX rings ----------------- - -Each ring contains a number of continuous memory blocks, containing frames of -fixed size dependent on the parameters used for ring setup. - -Ring: [ block 0 ] - [ frame 0 ] - [ frame 1 ] - [ block 1 ] - [ frame 2 ] - [ frame 3 ] - ... - [ block n ] - [ frame 2 * n ] - [ frame 2 * n + 1 ] - -The blocks are only visible to the kernel, from the point of view of user-space -the ring just contains the frames in a continuous memory zone. - -The ring parameters used for setting up the ring are defined as follows: - -struct nl_mmap_req { - unsigned int nm_block_size; - unsigned int nm_block_nr; - unsigned int nm_frame_size; - unsigned int nm_frame_nr; -}; - -Frames are grouped into blocks, where each block is a continuous region of memory -and holds nm_block_size / nm_frame_size frames. The total number of frames in -the ring is nm_frame_nr. The following invariants hold: - -- frames_per_block = nm_block_size / nm_frame_size - -- nm_frame_nr = frames_per_block * nm_block_nr - -Some parameters are constrained, specifically: - -- nm_block_size must be a multiple of the architectures memory page size. - The getpagesize() function can be used to get the page size. - -- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be - able to hold at least the frame header - -- nm_frame_size must be smaller or equal to nm_block_size - -- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT - -- nm_frame_nr must equal the actual number of frames as specified above. - -When the kernel can't allocate physically continuous memory for a ring block, -it will fall back to use physically discontinuous memory. This might affect -performance negatively, in order to avoid this the nm_frame_size parameter -should be chosen to be as small as possible for the required frame size and -the number of blocks should be increased instead. - -Ring frames ------------- - -Each frames contain a frame header, consisting of a synchronization word and some -meta-data, and the message itself. - -Frame: [ header message ] - -The frame header is defined as follows: - -struct nl_mmap_hdr { - unsigned int nm_status; - unsigned int nm_len; - __u32 nm_group; - /* credentials */ - __u32 nm_pid; - __u32 nm_uid; - __u32 nm_gid; -}; - -- nm_status is used for synchronizing processing between the kernel and user- - space and specifies ownership of the frame as well as the operation to perform - -- nm_len contains the length of the message contained in the data area - -- nm_group specified the destination multicast group of message - -- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending - process. These values correspond to the data available using SOCK_PASSCRED in - the SCM_CREDENTIALS cmsg. - -The possible values in the status word are: - -- NL_MMAP_STATUS_UNUSED: - RX ring: frame belongs to the kernel and contains no message - for user-space. Approriate action is to invoke poll() - to wait for new messages. - - TX ring: frame belongs to user-space and can be used for - message construction. - -- NL_MMAP_STATUS_RESERVED: - RX ring only: frame is currently used by the kernel for message - construction and contains no valid message yet. - Appropriate action is to invoke poll() to wait for - new messages. - -- NL_MMAP_STATUS_VALID: - RX ring: frame contains a valid message. Approriate action is - to process the message and release the frame back to - the kernel by setting the status to - NL_MMAP_STATUS_UNUSED or queue the frame by setting the - status to NL_MMAP_STATUS_SKIP. - - TX ring: the frame contains a valid message from user-space to - be processed by the kernel. After completing processing - the kernel will release the frame back to user-space by - setting the status to NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_COPY: - RX ring only: a message is ready to be processed but could not be - stored in the ring, either because it exceeded the - frame size or because the originating subsystem does - not support memory mapped I/O. Appropriate action is - to invoke recvmsg() to receive the message and release - the frame back to the kernel by setting the status to - NL_MMAP_STATUS_UNUSED. - -- NL_MMAP_STATUS_SKIP: - RX ring only: user-space queued the message for later processing, but - processed some messages following it in the ring. The - kernel should skip this frame when looking for unused - frames. - -The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the -frame header. - -TX limitations --------------- - -Kernel processing usually involves validation of the message received by -user-space, then processing its contents. The kernel must assure that -userspace is not able to modify the message contents after they have been -validated. In order to do so, the message is copied from the ring frame -to an allocated buffer if either of these conditions is false: - -- only a single mapping of the ring exists -- the file descriptor is not shared between processes - -This means that for threaded programs, the kernel will fall back to copying. - -Example -------- - -Ring setup: - - unsigned int block_size = 16 * getpagesize(); - struct nl_mmap_req req = { - .nm_block_size = block_size, - .nm_block_nr = 64, - .nm_frame_size = 16384, - .nm_frame_nr = 64 * block_size / 16384, - }; - unsigned int ring_size; - void *rx_ring, *tx_ring; - - /* Configure ring parameters */ - if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) - exit(1); - if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) - exit(1) - - /* Calculate size of each individual ring */ - ring_size = req.nm_block_nr * req.nm_block_size; - - /* Map RX/TX rings. The TX ring is located after the RX ring */ - rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - if ((long)rx_ring == -1L) - exit(1); - tx_ring = rx_ring + ring_size: - -Message reception: - -This example assumes some ring parameters of the ring setup are available. - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - unsigned char buf[16384]; - ssize_t len; - - while (1) { - struct pollfd pfds[1]; - - pfds[0].fd = fd; - pfds[0].events = POLLIN | POLLERR; - pfds[0].revents = 0; - - if (poll(pfds, 1, -1) < 0 && errno != -EINTR) - exit(1); - - /* Check for errors. Error handling omitted */ - if (pfds[0].revents & POLLERR) - <handle error> - - /* If no new messages, poll again */ - if (!(pfds[0].revents & POLLIN)) - continue; - - /* Process all frames */ - while (1) { - /* Get next frame header */ - hdr = rx_ring + frame_offset; - - if (hdr->nm_status == NL_MMAP_STATUS_VALID) { - /* Regular memory mapped frame */ - nlh = (void *)hdr + NL_MMAP_HDRLEN; - len = hdr->nm_len; - - /* Release empty message immediately. May happen - * on error during message construction. - */ - if (len == 0) - goto release; - } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) { - /* Frame queued to socket receive queue */ - len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); - if (len <= 0) - break; - nlh = buf; - } else - /* No more messages to process, continue polling */ - break; - - process_msg(nlh); -release: - /* Release frame back to the kernel */ - hdr->nm_status = NL_MMAP_STATUS_UNUSED; - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; - } - } - -Message transmission: - -This example assumes some ring parameters of the ring setup are available. -A single message is constructed and transmitted, to send multiple messages -at once they would be constructed in consecutive frames before a final call -to sendto(). - - unsigned int frame_offset = 0; - struct nl_mmap_hdr *hdr; - struct nlmsghdr *nlh; - struct sockaddr_nl addr = { - .nl_family = AF_NETLINK, - }; - - hdr = tx_ring + frame_offset; - if (hdr->nm_status != NL_MMAP_STATUS_UNUSED) - /* No frame available. Use poll() to avoid. */ - exit(1); - - nlh = (void *)hdr + NL_MMAP_HDRLEN; - - /* Build message */ - build_message(nlh); - - /* Fill frame header: length and status need to be set */ - hdr->nm_len = nlh->nlmsg_len; - hdr->nm_status = NL_MMAP_STATUS_VALID; - - if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0) - exit(1); - - /* Advance frame offset to next frame */ - frame_offset = (frame_offset + frame_size) % ring_size; diff --git a/Makefile b/Makefile index 52d32f6bd7c5..252070fdf91c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 18 -SUBLEVEL = 48 +SUBLEVEL = 49 EXTRAVERSION = NAME = Diseased Newt diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h index f4882553fbb0..85a34cc8316a 100644 --- a/arch/arm/include/asm/floppy.h +++ b/arch/arm/include/asm/floppy.h @@ -17,7 +17,7 @@ #define fd_outb(val,port) \ do { \ - if ((port) == FD_DOR) \ + if ((port) == (u32)FD_DOR) \ fd_setdor((val)); \ else \ outb((val),(port)); \ diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 48e16d98b2cc..6ecc67f3736b 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -206,7 +206,6 @@ CONFIG_MLX4_EN=m # CONFIG_MLX4_DEBUG is not set CONFIG_TEHUTI=m CONFIG_BNX2X=m -CONFIG_QLGE=m CONFIG_SFC=m CONFIG_BE2NET=m CONFIG_LIBERTAS_THINFIRM=m diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index 41a2fa1fa12e..c7953f2aca4f 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -146,7 +146,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,cpu_mask_nr_tbl + # open coded PTR_LA t1, cpu_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, cpu_mask_nr_tbl + lui t1, %hi(cpu_mask_nr_tbl) + addiu t1, %lo(cpu_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, cpu_mask_nr_tbl + .set push + .set noat + lui t1, %highest(cpu_mask_nr_tbl) + lui AT, %hi(cpu_mask_nr_tbl) + daddiu t1, t1, %higher(cpu_mask_nr_tbl) + daddiu AT, AT, %lo(cpu_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 1: lw t2,(t1) nop and t2,t0 @@ -195,7 +213,25 @@ /* * Find irq with highest priority */ - PTR_LA t1,asic_mask_nr_tbl + # open coded PTR_LA t1,asic_mask_nr_tbl +#if (_MIPS_SZPTR == 32) + # open coded la t1, asic_mask_nr_tbl + lui t1, %hi(asic_mask_nr_tbl) + addiu t1, %lo(asic_mask_nr_tbl) + +#endif +#if (_MIPS_SZPTR == 64) + # open coded dla t1, asic_mask_nr_tbl + .set push + .set noat + lui t1, %highest(asic_mask_nr_tbl) + lui AT, %hi(asic_mask_nr_tbl) + daddiu t1, t1, %higher(asic_mask_nr_tbl) + daddiu AT, AT, %lo(asic_mask_nr_tbl) + dsll t1, 32 + daddu t1, t1, AT + .set pop +#endif 2: lw t2,(t1) nop and t2,t0 diff --git a/arch/mips/sgi-ip22/Platform b/arch/mips/sgi-ip22/Platform index b7a4b7e04c38..e8f6b3a42a48 100644 --- a/arch/mips/sgi-ip22/Platform +++ b/arch/mips/sgi-ip22/Platform @@ -25,7 +25,7 @@ endif # Simplified: what IP22 does at 128MB+ in ksegN, IP28 does at 512MB+ in xkphys # ifdef CONFIG_SGI_IP28 - ifeq ($(call cc-option-yn,-mr10k-cache-barrier=store), n) + ifeq ($(call cc-option-yn,-march=r10000 -mr10k-cache-barrier=store), n) $(error gcc doesn't support needed option -mr10k-cache-barrier=store) endif endif diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 54651fc2d412..51b667316605 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1806,8 +1806,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ err = -EFAULT; @@ -1829,8 +1827,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ err = -EFAULT; @@ -1854,8 +1850,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: - if (regs->msr & MSR_LE) - return 0; err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -1866,8 +1860,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); else @@ -1876,15 +1868,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); goto ldst_done; #endif @@ -1907,8 +1895,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: - if (regs->msr & MSR_LE) - return 0; if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -1920,8 +1906,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STORE_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); else @@ -1930,15 +1914,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; #endif diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index d559bdb03d18..2006b955ea91 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -74,7 +74,8 @@ extern void execve_tail(void); #else /* CONFIG_64BIT */ -#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit) +#define TASK_SIZE_OF(tsk) ((tsk)->mm ? \ + (tsk)->mm->context.asce_limit : TASK_MAX_SIZE) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ (1UL << 30) : (1UL << 41)) #define TASK_SIZE TASK_SIZE_OF(current) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e80d9ff9a56d..ec3b505238cb 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -822,10 +822,10 @@ static void __init setup_randomness(void) { struct sysinfo_3_2_2 *vmms; - vmms = (struct sysinfo_3_2_2 *) alloc_page(GFP_KERNEL); - if (vmms && stsi(vmms, 3, 2, 2) == 0 && vmms->count) - add_device_randomness(&vmms, vmms->count); - free_page((unsigned long) vmms); + vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) + add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); + memblock_free((unsigned long) vmms, PAGE_SIZE); } /* diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 49e4d64ff74d..0ce029c73ca3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -207,6 +207,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int is_dirty = 0; + if (kvm_is_ucontrol(kvm)) + return -EINVAL; + mutex_lock(&kvm->slots_lock); r = -EINVAL; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 35fde2a24751..d0f59879cdca 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1358,11 +1358,28 @@ EXPORT_SYMBOL_GPL(s390_enable_skey); */ bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) { + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; pte_t *pte; spinlock_t *ptl; bool dirty = false; - pte = get_locked_pte(gmap->mm, address, &ptl); + pgd = pgd_offset(gmap->mm, address); + pud = pud_alloc(gmap->mm, pgd, address); + if (!pud) + return false; + pmd = pmd_alloc(gmap->mm, pud, address); + if (!pmd) + return false; + /* We can't run guests backed by huge pages, but userspace can + * still set them up and then try to migrate them without any + * migration support. + */ + if (pmd_large(*pmd)) + return true; + + pte = pte_alloc_map_lock(gmap->mm, pmd, address, &ptl); if (unlikely(!pte)) return false; diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index de1d72e3ec59..2f5cd2f62a2b 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -218,6 +218,29 @@ static int ghash_async_final(struct ahash_request *req) } } +static int ghash_async_import(struct ahash_request *req, const void *in) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + ghash_async_init(req); + memcpy(dctx, in, sizeof(*dctx)); + return 0; + +} + +static int ghash_async_export(struct ahash_request *req, void *out) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memcpy(out, dctx, sizeof(*dctx)); + return 0; + +} + static int ghash_async_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -285,8 +308,11 @@ static struct ahash_alg ghash_async_alg = { .final = ghash_async_final, .setkey = ghash_async_setkey, .digest = ghash_async_digest, + .export = ghash_async_export, + .import = ghash_async_import, .halg = { .digestsize = GHASH_DIGEST_SIZE, + .statesize = sizeof(struct ghash_desc_ctx), .base = { .cra_name = "ghash", .cra_driver_name = "ghash-clmulni", diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8e046ade1c88..504be087d682 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -329,7 +329,7 @@ do { \ #define __get_user_asm_u64(x, ptr, retval, errret) \ __get_user_asm(x, ptr, retval, "q", "", "=r", errret) #define __get_user_asm_ex_u64(x, ptr) \ - __get_user_asm_ex(x, ptr, "q", "", "=r") + __get_user_asm_ex(x, ptr, "q", "", "=&r") #endif #define __get_user_size(x, ptr, size, retval, errret) \ @@ -372,13 +372,13 @@ do { \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ - __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ + __get_user_asm_ex(x, ptr, "b", "b", "=&q"); \ break; \ case 2: \ - __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ + __get_user_asm_ex(x, ptr, "w", "w", "=&r"); \ break; \ case 4: \ - __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ + __get_user_asm_ex(x, ptr, "l", "k", "=&r"); \ break; \ case 8: \ __get_user_asm_ex_u64(x, ptr); \ @@ -396,7 +396,7 @@ do { \ " jmp 2b\n" \ ".previous\n" \ _ASM_EXTABLE_EX(1b, 3b) \ - : ltype(x) : "m" (__m(addr))) + : ltype(x) : "m" (__m(addr)), "0" (0)) #define __put_user_nocheck(x, ptr, size) \ ({ \ diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 06370ccea9e9..b70097e7f37a 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -133,6 +133,8 @@ static int __init parse_tag_initrd(const bp_tag_t* tag) __tagtable(BP_TAG_INITRD, parse_tag_initrd); +#endif /* CONFIG_BLK_DEV_INITRD */ + #ifdef CONFIG_OF static int __init parse_tag_fdt(const bp_tag_t *tag) @@ -145,8 +147,6 @@ __tagtable(BP_TAG_FDT, parse_tag_fdt); #endif /* CONFIG_OF */ -#endif /* CONFIG_BLK_DEV_INITRD */ - static int __init parse_tag_cmdline(const bp_tag_t* tag) { strlcpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE); diff --git a/crypto/Makefile b/crypto/Makefile index 1445b9100c05..ea11cf871ebc 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o +CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o @@ -68,6 +69,7 @@ obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o +CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index df99445a899a..9eb8174aa58c 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -195,7 +195,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) struct alg_sock *ask = alg_sk(sk); struct hash_ctx *ctx = ask->private; struct ahash_request *req = &ctx->req; - char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))]; + char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1]; struct sock *sk2; struct alg_sock *ask2; struct hash_ctx *ctx2; diff --git a/crypto/cryptd.c b/crypto/cryptd.c index be367e43ffe8..fb0d140065e2 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -606,6 +606,7 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC; inst->alg.halg.digestsize = salg->digestsize; + inst->alg.halg.statesize = salg->statesize; inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx); inst->alg.halg.base.cra_init = cryptd_hash_init_tfm; diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index a8e870444ea9..02a1c10fa909 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -505,6 +505,7 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC; inst->alg.halg.digestsize = salg->digestsize; + inst->alg.halg.statesize = salg->statesize; inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 973c185c9cfe..a31b282de8e0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1802,7 +1802,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, * do we need to block DRBD_SIG if sock == &meta.socket ?? * otherwise wake_asender() might interrupt some send_*Ack ! */ - rv = kernel_sendmsg(sock, &msg, &iov, 1, size); + rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); if (rv == -EAGAIN) { if (we_should_drop_the_connection(connection, sock)) break; diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 036969223c16..0b4b67b859e2 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -94,6 +94,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04CA, 0x300f) }, { USB_DEVICE(0x04CA, 0x3010) }, { USB_DEVICE(0x04CA, 0x3014) }, + { USB_DEVICE(0x04CA, 0x3018) }, { USB_DEVICE(0x0930, 0x0219) }, { USB_DEVICE(0x0930, 0x0220) }, { USB_DEVICE(0x0930, 0x0227) }, @@ -160,6 +161,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 2dd73a3e95a2..ce9a00a5c5ca 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -186,6 +186,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3014), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3018), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 810c51d92b99..30672a3df8a9 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -58,13 +58,9 @@ bool ast_is_vga_enabled(struct drm_device *dev) /* TODO 1180 */ } else { ch = ast_io_read8(ast, AST_IO_VGA_ENABLE_PORT); - if (ch) { - ast_open_key(ast); - ch = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff); - return ch & 0x04; - } + return !!(ch & 0x01); } - return 0; + return false; } static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff }; @@ -375,8 +371,8 @@ void ast_post_gpu(struct drm_device *dev) pci_write_config_dword(ast->dev->pdev, 0x04, reg); ast_enable_vga(dev); - ast_enable_mmio(dev); ast_open_key(ast); + ast_enable_mmio(dev); ast_set_def_ext_reg(dev); if (ast->chip == AST2300 || ast->chip == AST2400) @@ -1630,12 +1626,44 @@ static void ast_init_dram_2300(struct drm_device *dev) temp |= 0x73; ast_write32(ast, 0x12008, temp); + param.dram_freq = 396; param.dram_type = AST_DDR3; + temp = ast_mindwm(ast, 0x1e6e2070); if (temp & 0x01000000) param.dram_type = AST_DDR2; - param.dram_chipid = ast->dram_type; - param.dram_freq = ast->mclk; - param.vram_size = ast->vram_size; + switch (temp & 0x18000000) { + case 0: + param.dram_chipid = AST_DRAM_512Mx16; + break; + default: + case 0x08000000: + param.dram_chipid = AST_DRAM_1Gx16; + break; + case 0x10000000: + param.dram_chipid = AST_DRAM_2Gx16; + break; + case 0x18000000: + param.dram_chipid = AST_DRAM_4Gx16; + break; + } + switch (temp & 0x0c) { + default: + case 0x00: + param.vram_size = AST_VIDMEM_SIZE_8M; + break; + + case 0x04: + param.vram_size = AST_VIDMEM_SIZE_16M; + break; + + case 0x08: + param.vram_size = AST_VIDMEM_SIZE_32M; + break; + + case 0x0c: + param.vram_size = AST_VIDMEM_SIZE_64M; + break; + } if (param.dram_type == AST_DDR3) { get_ddr3_info(ast, ¶m); diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c index 4c9f972eaa07..ba64d23b90a7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_core.c +++ b/drivers/gpu/drm/exynos/exynos_drm_core.c @@ -141,7 +141,7 @@ int exynos_drm_subdrv_open(struct drm_device *dev, struct drm_file *file) return 0; err: - list_for_each_entry_reverse(subdrv, &subdrv->list, list) { + list_for_each_entry_continue_reverse(subdrv, &exynos_drm_subdrv_list, list) { if (subdrv->close) subdrv->close(dev, subdrv->dev, file); } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 7370202c2022..4ca35798b0b2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1617,7 +1617,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) struct ttm_buffer_object *bo; int ret = -EBUSY; int put_count; - uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM); spin_lock(&glob->lru_lock); list_for_each_entry(bo, &glob->swap_lru, swap) { @@ -1653,7 +1652,8 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) if (unlikely(ret != 0)) goto out; - if ((bo->mem.placement & swap_placement) != swap_placement) { + if (bo->mem.mem_type != TTM_PL_SYSTEM || + bo->ttm->caching_state != tt_cached) { struct ttm_mem_reg evict_mem; evict_mem = bo->mem; diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 984e43cb83f0..213f616e7a1f 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -168,6 +168,7 @@ #define USB_DEVICE_ID_ATEN_4PORTKVM 0x2205 #define USB_DEVICE_ID_ATEN_4PORTKVMC 0x2208 #define USB_DEVICE_ID_ATEN_CS682 0x2213 +#define USB_DEVICE_ID_ATEN_CS692 0x8021 #define USB_VENDOR_ID_ATMEL 0x03eb #define USB_DEVICE_ID_ATMEL_MULTITOUCH 0x211c diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index f7dd36e4fcb3..999dff00b214 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -61,6 +61,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS682, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS692, HID_QUIRK_NOGET }, { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FIGHTERSTICK, HID_QUIRK_NOGET }, { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_COMBATSTICK, HID_QUIRK_NOGET }, { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_ECLIPSE_YOKE, HID_QUIRK_NOGET }, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 933efcea0d03..cdf0a78e0c99 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1478,12 +1478,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ret = ipoib_set_mode(dev, buf); - rtnl_unlock(); - - if (!ret) - return count; + /* The assumption is that the function ipoib_set_mode returned + * with the rtnl held by it, if not the value -EBUSY returned, + * then no need to rtnl_unlock + */ + if (ret != -EBUSY) + rtnl_unlock(); - return ret; + return (!ret || ret == -EBUSY) ? count : ret; } static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 58b5aa3b6f2d..483ddbd1bdc8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -236,8 +236,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); - rtnl_lock(); - return 0; + return (!rtnl_trylock()) ? -EBUSY : 0; } if (!strcmp(buf, "datagram\n")) { @@ -246,8 +245,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); rtnl_unlock(); ipoib_flush_paths(dev); - rtnl_lock(); - return 0; + return (!rtnl_trylock()) ? -EBUSY : 0; } return -EINVAL; diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c index 8d9ba0c3827c..94ab494a6ade 100644 --- a/drivers/input/serio/gscps2.c +++ b/drivers/input/serio/gscps2.c @@ -40,7 +40,6 @@ MODULE_AUTHOR("Laurent Canet <canetl@xxxxxxxx>, Thibaut Varene <varenet@xxxxxxxxxxxxxxxx>, Helge Deller <deller@xxxxxx>"); MODULE_DESCRIPTION("HP GSC PS2 port driver"); MODULE_LICENSE("GPL"); -MODULE_DEVICE_TABLE(parisc, gscps2_device_tbl); #define PFX "gscps2.c: " @@ -439,6 +438,7 @@ static struct parisc_device_id gscps2_device_tbl[] = { #endif { 0, } /* 0 terminated list */ }; +MODULE_DEVICE_TABLE(parisc, gscps2_device_tbl); static struct parisc_driver parisc_ps2_driver = { .name = "gsc_ps2", diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 8a0643ae1fd9..6b78e131f6dd 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -317,7 +317,9 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, struct pci_dev *pdev = to_pci_dev(data); struct dmar_pci_notify_info *info; - /* Only care about add/remove events for physical functions */ + /* Only care about add/remove events for physical functions. + * For VFs we actually do the lookup based on the corresponding + * PF in device_to_iommu() anyway. */ if (pdev->is_virtfn) return NOTIFY_DONE; if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 3d1fc736a420..1689632e4e92 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -696,7 +696,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf int i; if (dev_is_pci(dev)) { + struct pci_dev *pf_pdev; + pdev = to_pci_dev(dev); + /* VFs aren't listed in scope tables; we need to look up + * the PF instead to find the IOMMU. */ + pf_pdev = pci_physfn(pdev); + dev = &pf_pdev->dev; segment = pci_domain_nr(pdev->bus); } else if (ACPI_COMPANION(dev)) dev = &ACPI_COMPANION(dev)->dev; @@ -709,6 +715,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, tmp) { if (tmp == dev) { + /* For a VF use its original BDF# not that of the PF + * which we used for the IOMMU lookup. Strictly speaking + * we could do this for all PCI devices; we only need to + * get the BDF# from the scope table for ACPI matches. */ + if (pdev->is_virtfn) + goto got_pdev; + *bus = drhd->devices[i].bus; *devfn = drhd->devices[i].devfn; goto out; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 042114fbd200..fb07be386287 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1300,11 +1300,62 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) } EXPORT_SYMBOL_GPL(dm_accept_partial_bio); +/* + * Flush current->bio_list when the target map method blocks. + * This fixes deadlocks in snapshot and possibly in other targets. + */ +struct dm_offload { + struct blk_plug plug; + struct blk_plug_cb cb; +}; + +static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) +{ + struct dm_offload *o = container_of(cb, struct dm_offload, cb); + struct bio_list list; + struct bio *bio; + + INIT_LIST_HEAD(&o->cb.list); + + if (unlikely(!current->bio_list)) + return; + + list = *current->bio_list; + bio_list_init(current->bio_list); + + while ((bio = bio_list_pop(&list))) { + struct bio_set *bs = bio->bi_pool; + if (unlikely(!bs) || bs == fs_bio_set) { + bio_list_add(current->bio_list, bio); + continue; + } + + spin_lock(&bs->rescue_lock); + bio_list_add(&bs->rescue_list, bio); + queue_work(bs->rescue_workqueue, &bs->rescue_work); + spin_unlock(&bs->rescue_lock); + } +} + +static void dm_offload_start(struct dm_offload *o) +{ + blk_start_plug(&o->plug); + o->cb.callback = flush_current_bio_list; + list_add(&o->cb.list, ¤t->plug->cb_list); +} + +static void dm_offload_end(struct dm_offload *o) +{ + list_del(&o->cb.list); + blk_finish_plug(&o->plug); +} + static void __map_bio(struct dm_target_io *tio) { int r; sector_t sector; struct mapped_device *md; + struct dm_offload o; struct bio *clone = &tio->clone; struct dm_target *ti = tio->ti; @@ -1317,7 +1368,11 @@ static void __map_bio(struct dm_target_io *tio) */ atomic_inc(&tio->io->io_count); sector = clone->bi_iter.bi_sector; + + dm_offload_start(&o); r = ti->type->map(ti, clone); + dm_offload_end(&o); + if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 644f9e576736..3e4def3c2b9f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1171,6 +1171,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) int max_sectors; int sectors; + md_write_start(mddev, bio); + /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. @@ -1556,8 +1558,6 @@ static void make_request(struct mddev *mddev, struct bio *bio) return; } - md_write_start(mddev, bio); - do { /* diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index 605b090c618b..b128624c56b8 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -680,7 +680,7 @@ static void dib0700_rc_urb_completion(struct urb *purb) struct dvb_usb_device *d = purb->context; struct dib0700_rc_response *poll_reply; enum rc_type protocol; - u32 uninitialized_var(keycode); + u32 keycode; u8 toggle; deb_info("%s()\n", __func__); @@ -722,7 +722,8 @@ static void dib0700_rc_urb_completion(struct urb *purb) poll_reply->nec.data == 0x00 && poll_reply->nec.not_data == 0xff) { poll_reply->data_state = 2; - break; + rc_repeat(d->rc_dev); + goto resubmit; } if ((poll_reply->nec.data ^ poll_reply->nec.not_data) != 0xff) { diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c index 744ca5cacc9b..cf54420f943d 100644 --- a/drivers/mtd/maps/pmcmsp-flash.c +++ b/drivers/mtd/maps/pmcmsp-flash.c @@ -139,15 +139,13 @@ static int __init init_msp_flash(void) } msp_maps[i].bankwidth = 1; - msp_maps[i].name = kmalloc(7, GFP_KERNEL); + msp_maps[i].name = kstrndup(flash_name, 7, GFP_KERNEL); if (!msp_maps[i].name) { iounmap(msp_maps[i].virt); kfree(msp_parts[i]); goto cleanup_loop; } - msp_maps[i].name = strncpy(msp_maps[i].name, flash_name, 7); - for (j = 0; j < pcnt; j++) { part_name[5] = '0' + i; part_name[7] = '0' + j; diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 9d38605717a0..303f69149439 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -446,10 +446,11 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai, unsigned long long ec = be64_to_cpu(ech->ec); unmap_peb(ai, pnum); dbg_bld("Adding PEB to free: %i", pnum); + if (err == UBI_IO_FF_BITFLIPS) - add_aeb(ai, free, pnum, ec, 1); - else - add_aeb(ai, free, pnum, ec, 0); + scrub = 1; + + add_aeb(ai, free, pnum, ec, scrub); continue; } else if (err == 0 || err == UBI_IO_BITFLIPS) { dbg_bld("Found non empty PEB:%i in pool", pnum); diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 56022d647837..f0cab2d3532f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -56,13 +56,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) { struct mlx4_cq *cq; + rcu_read_lock(); cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, cqn & (dev->caps.num_cqs - 1)); + rcu_read_unlock(); + if (!cq) { mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ ++cq->arm_sn; cq->comp(cq); @@ -73,23 +79,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq *cq; - spin_lock(&cq_table->lock); - + rcu_read_lock(); cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); - - spin_unlock(&cq_table->lock); + rcu_read_unlock(); if (!cq) { - mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); + mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn); return; } + /* Acessing the CQ outside of rcu_read_lock is safe, because + * the CQ is freed only after interrupt handling is completed. + */ cq->event(cq, event_type); - - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); } static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, @@ -256,9 +258,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, if (err) return err; - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); if (err) goto err_icm; @@ -297,9 +299,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, return 0; err_radix: - spin_lock_irq(&cq_table->lock); + spin_lock(&cq_table->lock); radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); + spin_unlock(&cq_table->lock); err_icm: mlx4_cq_free_icm(dev, cq->cqn); @@ -314,16 +316,16 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) struct mlx4_cq_table *cq_table = &priv->cq_table; int err; + spin_lock(&cq_table->lock); + radix_tree_delete(&cq_table->tree, cq->cqn); + spin_unlock(&cq_table->lock); + err = mlx4_HW2SW_CQ(dev, NULL, cq->cqn); if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); synchronize_irq(priv->eq_table.eq[cq->vector].irq); - spin_lock_irq(&cq_table->lock); - radix_tree_delete(&cq_table->tree, cq->cqn); - spin_unlock_irq(&cq_table->lock); - if (atomic_dec_and_test(&cq->refcount)) complete(&cq->free); wait_for_completion(&cq->free); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 5bbb59dce4d5..c175938d9717 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -438,8 +438,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->stride = stride; - if (ring->stride <= TXBB_SIZE) + if (ring->stride <= TXBB_SIZE) { + /* Stamp first unused send wqe */ + __be32 *ptr = (__be32 *)ring->buf; + __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT); + *ptr = stamp; + /* Move pointer to start of rx section */ ring->buf += TXBB_SIZE; + } ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; @@ -501,8 +507,11 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { - if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) + if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) { + local_bh_disable(); napi_reschedule(&priv->rx_cq[ring]->napi); + local_bh_enable(); + } } } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index e57df91bad06..fbb0c02276f9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2769,6 +2769,9 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, put_res(dev, slave, srqn, RES_SRQ); qp->srq = srq; } + + /* Save param3 for dynamic changes from VST back to VGT */ + qp->param3 = qpc->param3; put_res(dev, slave, rcqn, RES_CQ); put_res(dev, slave, mtt_base, RES_MTT); res_end_move(dev, slave, RES_QP, qpn); @@ -3531,7 +3534,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, int qpn = vhcr->in_modifier & 0x7fffff; struct res_qp *qp; u8 orig_sched_queue; - __be32 orig_param3 = qpc->param3; u8 orig_vlan_control = qpc->pri_path.vlan_control; u8 orig_fvl_rx = qpc->pri_path.fvl_rx; u8 orig_pri_path_fl = qpc->pri_path.fl; @@ -3572,7 +3574,6 @@ out: */ if (!err) { qp->sched_queue = orig_sched_queue; - qp->param3 = orig_param3; qp->vlan_control = orig_vlan_control; qp->fvl_rx = orig_fvl_rx; qp->pri_path_fl = orig_pri_path_fl; diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 6cc3cf6f17c8..9a289e79e8a7 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -535,7 +535,7 @@ static inline void smc_rcv(struct net_device *dev) #define smc_special_lock(lock, flags) spin_lock_irqsave(lock, flags) #define smc_special_unlock(lock, flags) spin_unlock_irqrestore(lock, flags) #else -#define smc_special_trylock(lock, flags) (flags == flags) +#define smc_special_trylock(lock, flags) ((void)flags, true) #define smc_special_lock(lock, flags) do { flags = 0; } while (0) #define smc_special_unlock(lock, flags) do { flags = 0; } while (0) #endif diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index dd9430043536..cf5ce371ec21 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1235,7 +1235,7 @@ int cpmac_init(void) goto fail_alloc; } -#warning FIXME: unhardcode gpio&reset bits + /* FIXME: unhardcode gpio&reset bits */ ar7_gpio_disable(26); ar7_gpio_disable(27); ar7_device_reset(AR7_RESET_BIT_CPMAC_LO); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 59282dd2d9e0..2dc89582575a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2260,7 +2260,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (data[IFLA_VXLAN_ID]) { __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); - if (id >= VXLAN_VID_MASK) + if (id >= VXLAN_N_VID) return -ERANGE; } diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 966497d10c6e..f3c64b38f1d3 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -293,6 +293,8 @@ int pwmchip_remove(struct pwm_chip *chip) unsigned int i; int ret = 0; + pwmchip_sysfs_unexport_children(chip); + mutex_lock(&pwm_lock); for (i = 0; i < chip->npwm; i++) { diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index 4bd0c639e16d..6c88e1adf637 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -340,6 +340,24 @@ void pwmchip_sysfs_unexport(struct pwm_chip *chip) } } +void pwmchip_sysfs_unexport_children(struct pwm_chip *chip) +{ + struct device *parent; + unsigned int i; + + parent = class_find_device(&pwm_class, NULL, chip, + pwmchip_sysfs_match); + if (!parent) + return; + + for (i = 0; i < chip->npwm; i++) { + struct pwm_device *pwm = &chip->pwms[i]; + + if (test_bit(PWMF_EXPORTED, &pwm->flags)) + pwm_unexport_child(parent, pwm); + } +} + static int __init pwm_sysfs_init(void) { return class_register(&pwm_class); diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index 5d06253c2a7a..30e9fbbff051 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -147,11 +147,11 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq) struct qdio_q *q; int i; - for_each_input_queue(irq, q, i) { - if (!references_shared_dsci(irq) && - has_multiple_inq_on_dsci(irq)) - xchg(q->irq_ptr->dsci, 0); + if (!references_shared_dsci(irq) && + has_multiple_inq_on_dsci(irq)) + xchg(irq->dsci, 0); + for_each_input_queue(irq, q, i) { if (q->u.in.queue_start_poll) { /* skip if polling is enabled or already in work */ if (test_and_set_bit(QDIO_QUEUE_IRQS_DISABLED, diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index 89215d44d83f..f054d8dee013 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -737,8 +737,8 @@ static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf mv_dprintk("device %016llx not ready.\n", SAS_ADDR(dev->sas_addr)); - rc = SAS_PHY_DOWN; - return rc; + rc = SAS_PHY_DOWN; + return rc; } tei.port = dev->port->lldd_port; if (tei.port && !tei.port->port_attached && !tmf) { diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index ad4f5790a76f..658d640022be 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -392,6 +392,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) ret = PTR_ERR(vmfile); goto out; } + vmfile->f_mode |= FMODE_LSEEK; asma->file = vmfile; } get_file(asma->file); diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index c69c40d69d5c..2186380a6f51 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -998,7 +998,7 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) static struct binder_ref *binder_get_ref(struct binder_proc *proc, - uint32_t desc) + u32 desc, bool need_strong_ref) { struct rb_node *n = proc->refs_by_desc.rb_node; struct binder_ref *ref; @@ -1006,12 +1006,16 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, while (n) { ref = rb_entry(n, struct binder_ref, rb_node_desc); - if (desc < ref->desc) + if (desc < ref->desc) { n = n->rb_left; - else if (desc > ref->desc) + } else if (desc > ref->desc) { n = n->rb_right; - else + } else if (need_strong_ref && !ref->strong) { + binder_user_error("tried to use weak ref as strong ref\n"); + return NULL; + } else { return ref; + } } return NULL; } @@ -1281,7 +1285,10 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref; + + ref = binder_get_ref(proc, fp->handle, + fp->type == BINDER_TYPE_HANDLE); if (ref == NULL) { pr_err("transaction release %d bad handle %d\n", @@ -1375,7 +1382,7 @@ static void binder_transaction(struct binder_proc *proc, if (tr->target.handle) { struct binder_ref *ref; - ref = binder_get_ref(proc, tr->target.handle); + ref = binder_get_ref(proc, tr->target.handle, true); if (ref == NULL) { binder_user_error("%d:%d got transaction to invalid handle\n", proc->pid, thread->pid); @@ -1556,7 +1563,9 @@ static void binder_transaction(struct binder_proc *proc, fp->type = BINDER_TYPE_HANDLE; else fp->type = BINDER_TYPE_WEAK_HANDLE; + fp->binder = 0; fp->handle = ref->desc; + fp->cookie = 0; binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE, &thread->todo); @@ -1568,7 +1577,10 @@ static void binder_transaction(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref; + + ref = binder_get_ref(proc, fp->handle, + fp->type == BINDER_TYPE_HANDLE); if (ref == NULL) { binder_user_error("%d:%d got transaction with invalid handle, %d\n", @@ -1598,7 +1610,9 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_binder_get_ref_for_node_failed; } + fp->binder = 0; fp->handle = new_ref->desc; + fp->cookie = 0; binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL); trace_binder_transaction_ref_to_ref(t, ref, new_ref); @@ -1645,6 +1659,7 @@ static void binder_transaction(struct binder_proc *proc, binder_debug(BINDER_DEBUG_TRANSACTION, " fd %d -> %d\n", fp->handle, target_fd); /* TODO: fput? */ + fp->binder = 0; fp->handle = target_fd; } break; @@ -1767,7 +1782,9 @@ static int binder_thread_write(struct binder_proc *proc, ref->desc); } } else - ref = binder_get_ref(proc, target); + ref = binder_get_ref(proc, target, + cmd == BC_ACQUIRE || + cmd == BC_RELEASE); if (ref == NULL) { binder_user_error("%d:%d refcount change on invalid ref %d\n", proc->pid, thread->pid, target); @@ -1963,7 +1980,7 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); - ref = binder_get_ref(proc, target); + ref = binder_get_ref(proc, target, false); if (ref == NULL) { binder_user_error("%d:%d %s invalid ref %d\n", proc->pid, thread->pid, diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index b6bd609c3655..3559e1d19c3a 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -647,6 +647,7 @@ static void ad5933_work(struct work_struct *work) struct iio_dev *indio_dev = i2c_get_clientdata(st->client); signed short buf[2]; unsigned char status; + int ret; mutex_lock(&indio_dev->mlock); if (st->state == AD5933_CTRL_INIT_START_FREQ) { @@ -654,19 +655,22 @@ static void ad5933_work(struct work_struct *work) ad5933_cmd(st, AD5933_CTRL_START_SWEEP); st->state = AD5933_CTRL_START_SWEEP; schedule_delayed_work(&st->work, st->poll_time_jiffies); - mutex_unlock(&indio_dev->mlock); - return; + goto out; } - ad5933_i2c_read(st->client, AD5933_REG_STATUS, 1, &status); + ret = ad5933_i2c_read(st->client, AD5933_REG_STATUS, 1, &status); + if (ret) + goto out; if (status & AD5933_STAT_DATA_VALID) { int scan_count = bitmap_weight(indio_dev->active_scan_mask, indio_dev->masklength); - ad5933_i2c_read(st->client, + ret = ad5933_i2c_read(st->client, test_bit(1, indio_dev->active_scan_mask) ? AD5933_REG_REAL_DATA : AD5933_REG_IMAG_DATA, scan_count * 2, (u8 *)buf); + if (ret) + goto out; if (scan_count == 2) { buf[0] = be16_to_cpu(buf[0]); @@ -678,8 +682,7 @@ static void ad5933_work(struct work_struct *work) } else { /* no data available - try again later */ schedule_delayed_work(&st->work, st->poll_time_jiffies); - mutex_unlock(&indio_dev->mlock); - return; + goto out; } if (status & AD5933_STAT_SWEEP_DONE) { @@ -691,7 +694,7 @@ static void ad5933_work(struct work_struct *work) ad5933_cmd(st, AD5933_CTRL_INC_FREQ); schedule_delayed_work(&st->work, st->poll_time_jiffies); } - +out: mutex_unlock(&indio_dev->mlock); } diff --git a/drivers/staging/nvec/nvec_ps2.c b/drivers/staging/nvec/nvec_ps2.c index f56f1db15bad..3f631c067f54 100644 --- a/drivers/staging/nvec/nvec_ps2.c +++ b/drivers/staging/nvec/nvec_ps2.c @@ -106,13 +106,12 @@ static int nvec_mouse_probe(struct platform_device *pdev) { struct nvec_chip *nvec = dev_get_drvdata(pdev->dev.parent); struct serio *ser_dev; - char mouse_reset[] = { NVEC_PS2, SEND_COMMAND, PSMOUSE_RST, 3 }; ser_dev = devm_kzalloc(&pdev->dev, sizeof(struct serio), GFP_KERNEL); if (ser_dev == NULL) return -ENOMEM; - ser_dev->id.type = SERIO_PS_PSTHRU; + ser_dev->id.type = SERIO_8042; ser_dev->write = ps2_sendcommand; ser_dev->start = ps2_startstreaming; ser_dev->stop = ps2_stopstreaming; @@ -127,9 +126,6 @@ static int nvec_mouse_probe(struct platform_device *pdev) serio_register_port(ser_dev); - /* mouse reset */ - nvec_write_async(nvec, mouse_reset, sizeof(mouse_reset)); - return 0; } diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 644ddb841d9f..6d1e2f746ab4 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -114,7 +114,7 @@ #define DEFAULT_TX_BUF_COUNT 3 struct n_hdlc_buf { - struct n_hdlc_buf *link; + struct list_head list_item; int count; char buf[1]; }; @@ -122,8 +122,7 @@ struct n_hdlc_buf { #define N_HDLC_BUF_SIZE (sizeof(struct n_hdlc_buf) + maxframe) struct n_hdlc_buf_list { - struct n_hdlc_buf *head; - struct n_hdlc_buf *tail; + struct list_head list; int count; spinlock_t spinlock; }; @@ -136,7 +135,6 @@ struct n_hdlc_buf_list { * @backup_tty - TTY to use if tty gets closed * @tbusy - reentrancy flag for tx wakeup code * @woke_up - FIXME: describe this field - * @tbuf - currently transmitting tx buffer * @tx_buf_list - list of pending transmit frame buffers * @rx_buf_list - list of received frame buffers * @tx_free_buf_list - list unused transmit frame buffers @@ -149,7 +147,6 @@ struct n_hdlc { struct tty_struct *backup_tty; int tbusy; int woke_up; - struct n_hdlc_buf *tbuf; struct n_hdlc_buf_list tx_buf_list; struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; @@ -159,7 +156,8 @@ struct n_hdlc { /* * HDLC buffer list manipulation functions */ -static void n_hdlc_buf_list_init(struct n_hdlc_buf_list *list); +static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list, + struct n_hdlc_buf *buf); static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, struct n_hdlc_buf *buf); static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); @@ -209,16 +207,9 @@ static void flush_tx_queue(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty2n_hdlc(tty); struct n_hdlc_buf *buf; - unsigned long flags; while ((buf = n_hdlc_buf_get(&n_hdlc->tx_buf_list))) n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, buf); - spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags); - if (n_hdlc->tbuf) { - n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, n_hdlc->tbuf); - n_hdlc->tbuf = NULL; - } - spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); } static struct tty_ldisc_ops n_hdlc_ldisc = { @@ -284,7 +275,6 @@ static void n_hdlc_release(struct n_hdlc *n_hdlc) } else break; } - kfree(n_hdlc->tbuf); kfree(n_hdlc); } /* end of n_hdlc_release() */ @@ -403,13 +393,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) n_hdlc->woke_up = 0; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags); - /* get current transmit buffer or get new transmit */ - /* buffer from list of pending transmit buffers */ - - tbuf = n_hdlc->tbuf; - if (!tbuf) - tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); - + tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list); while (tbuf) { if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)sending frame %p, count=%d\n", @@ -421,7 +405,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) /* rollback was possible and has been done */ if (actual == -ERESTARTSYS) { - n_hdlc->tbuf = tbuf; + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } /* if transmit error, throw frame away by */ @@ -436,10 +420,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) /* free current transmit buffer */ n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, tbuf); - - /* this tx buffer is done */ - n_hdlc->tbuf = NULL; - + /* wait up sleeping writers */ wake_up_interruptible(&tty->write_wait); @@ -449,10 +430,12 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty) if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)frame %p pending\n", __FILE__,__LINE__,tbuf); - - /* buffer not accepted by driver */ - /* set this buffer as pending buffer */ - n_hdlc->tbuf = tbuf; + + /* + * the buffer was not accepted by driver, + * return it back into tx queue + */ + n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf); break; } } @@ -750,7 +733,8 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, int error = 0; int count; unsigned long flags; - + struct n_hdlc_buf *buf = NULL; + if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d)n_hdlc_tty_ioctl() called %d\n", __FILE__,__LINE__,cmd); @@ -764,8 +748,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, /* report count of read data available */ /* in next available frame (if any) */ spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags); - if (n_hdlc->rx_buf_list.head) - count = n_hdlc->rx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->rx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count = buf->count; else count = 0; spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags); @@ -777,8 +763,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file, count = tty_chars_in_buffer(tty); /* add size of next output frame in queue */ spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags); - if (n_hdlc->tx_buf_list.head) - count += n_hdlc->tx_buf_list.head->count; + buf = list_first_entry_or_null(&n_hdlc->tx_buf_list.list, + struct n_hdlc_buf, list_item); + if (buf) + count += buf->count; spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags); error = put_user(count, (int __user *)arg); break; @@ -826,14 +814,14 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp, poll_wait(filp, &tty->write_wait, wait); /* set bits for operations that won't block */ - if (n_hdlc->rx_buf_list.head) + if (!list_empty(&n_hdlc->rx_buf_list.list)) mask |= POLLIN | POLLRDNORM; /* readable */ if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= POLLHUP; if (tty_hung_up_p(filp)) mask |= POLLHUP; if (!tty_is_writelocked(tty) && - n_hdlc->tx_free_buf_list.head) + !list_empty(&n_hdlc->tx_free_buf_list.list)) mask |= POLLOUT | POLLWRNORM; /* writable */ } return mask; @@ -853,11 +841,16 @@ static struct n_hdlc *n_hdlc_alloc(void) if (!n_hdlc) return NULL; - n_hdlc_buf_list_init(&n_hdlc->rx_free_buf_list); - n_hdlc_buf_list_init(&n_hdlc->tx_free_buf_list); - n_hdlc_buf_list_init(&n_hdlc->rx_buf_list); - n_hdlc_buf_list_init(&n_hdlc->tx_buf_list); - + spin_lock_init(&n_hdlc->rx_free_buf_list.spinlock); + spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock); + spin_lock_init(&n_hdlc->rx_buf_list.spinlock); + spin_lock_init(&n_hdlc->tx_buf_list.spinlock); + + INIT_LIST_HEAD(&n_hdlc->rx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_free_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->rx_buf_list.list); + INIT_LIST_HEAD(&n_hdlc->tx_buf_list.list); + /* allocate free rx buffer list */ for(i=0;i<DEFAULT_RX_BUF_COUNT;i++) { buf = kmalloc(N_HDLC_BUF_SIZE, GFP_KERNEL); @@ -885,63 +878,65 @@ static struct n_hdlc *n_hdlc_alloc(void) } /* end of n_hdlc_alloc() */ /** - * n_hdlc_buf_list_init - initialize specified HDLC buffer list - * @list - pointer to buffer list + * n_hdlc_buf_return - put the HDLC buffer after the head of the specified list + * @buf_list - pointer to the buffer list + * @buf - pointer to the buffer */ -static void n_hdlc_buf_list_init(struct n_hdlc_buf_list *list) +static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list, + struct n_hdlc_buf *buf) { - memset(list, 0, sizeof(*list)); - spin_lock_init(&list->spinlock); -} /* end of n_hdlc_buf_list_init() */ + unsigned long flags; + + spin_lock_irqsave(&buf_list->spinlock, flags); + + list_add(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); +} /** * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list - * @list - pointer to buffer list + * @buf_list - pointer to buffer list * @buf - pointer to buffer */ -static void n_hdlc_buf_put(struct n_hdlc_buf_list *list, +static void n_hdlc_buf_put(struct n_hdlc_buf_list *buf_list, struct n_hdlc_buf *buf) { unsigned long flags; - spin_lock_irqsave(&list->spinlock,flags); - - buf->link=NULL; - if (list->tail) - list->tail->link = buf; - else - list->head = buf; - list->tail = buf; - (list->count)++; - - spin_unlock_irqrestore(&list->spinlock,flags); - + + spin_lock_irqsave(&buf_list->spinlock, flags); + + list_add_tail(&buf->list_item, &buf_list->list); + buf_list->count++; + + spin_unlock_irqrestore(&buf_list->spinlock, flags); } /* end of n_hdlc_buf_put() */ /** * n_hdlc_buf_get - remove and return an HDLC buffer from list - * @list - pointer to HDLC buffer list + * @buf_list - pointer to HDLC buffer list * * Remove and return an HDLC buffer from the head of the specified HDLC buffer * list. * Returns a pointer to HDLC buffer if available, otherwise %NULL. */ -static struct n_hdlc_buf* n_hdlc_buf_get(struct n_hdlc_buf_list *list) +static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list) { unsigned long flags; struct n_hdlc_buf *buf; - spin_lock_irqsave(&list->spinlock,flags); - - buf = list->head; + + spin_lock_irqsave(&buf_list->spinlock, flags); + + buf = list_first_entry_or_null(&buf_list->list, + struct n_hdlc_buf, list_item); if (buf) { - list->head = buf->link; - (list->count)--; + list_del(&buf->list_item); + buf_list->count--; } - if (!list->head) - list->tail = NULL; - - spin_unlock_irqrestore(&list->spinlock,flags); + + spin_unlock_irqrestore(&buf_list->spinlock, flags); return buf; - } /* end of n_hdlc_buf_get() */ static char hdlc_banner[] __initdata = diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index f6e5ef5bac5e..786bc9e6bed9 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2743,6 +2743,8 @@ enum pci_board_num_t { pbn_b0_4_1152000_200, pbn_b0_8_1152000_200, + pbn_b0_4_1250000, + pbn_b0_2_1843200, pbn_b0_4_1843200, @@ -2971,6 +2973,13 @@ static struct pciserial_board pci_boards[] = { .uart_offset = 0x200, }, + [pbn_b0_4_1250000] = { + .flags = FL_BASE0, + .num_ports = 4, + .base_baud = 1250000, + .uart_offset = 8, + }, + [pbn_b0_2_1843200] = { .flags = FL_BASE0, .num_ports = 2, @@ -5464,6 +5473,10 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_DEVICE(0x1c29, 0x1108), .driver_data = pbn_fintek_8 }, { PCI_DEVICE(0x1c29, 0x1112), .driver_data = pbn_fintek_12 }, + /* MKS Tenta SCOM-080x serial cards */ + { PCI_DEVICE(0x1601, 0x0800), .driver_data = pbn_b0_4_1250000 }, + { PCI_DEVICE(0x1601, 0xa801), .driver_data = pbn_b0_4_1250000 }, + /* * These entries match devices with class COMMUNICATION_SERIAL, * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 1a049e4de937..a57f3761ab47 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1169,7 +1169,7 @@ static void csi_J(struct vc_data *vc, int vpar) break; case 3: /* erase scroll-back buffer (and whole display) */ scr_memsetw(vc->vc_screenbuf, vc->vc_video_erase_char, - vc->vc_screenbuf_size >> 1); + vc->vc_screenbuf_size); set_origin(vc); if (CON_IS_VISIBLE(vc)) update_screen(vc); diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 9bdc6bd73432..78f4d70db917 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -594,6 +594,7 @@ static int ci_hdrc_probe(struct platform_device *pdev) return -ENOMEM; } + spin_lock_init(&ci->lock); ci->dev = dev; ci->platdata = dev_get_platdata(dev); ci->imx28_write_fix = !!(ci->platdata->flags & diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 7b362870277e..98d5882aa91b 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1798,8 +1798,6 @@ static int udc_start(struct ci_hdrc *ci) struct device *dev = ci->dev; int retval = 0; - spin_lock_init(&ci->lock); - ci->gadget.ops = &usb_gadget_ops; ci->gadget.speed = USB_SPEED_UNKNOWN; ci->gadget.max_speed = USB_SPEED_HIGH; diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index e2f63f5d30cb..201e9229d323 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -872,8 +872,6 @@ static int wait_serial_change(struct acm *acm, unsigned long arg) DECLARE_WAITQUEUE(wait, current); struct async_icount old, new; - if (arg & (TIOCM_DSR | TIOCM_RI | TIOCM_CD )) - return -EINVAL; do { spin_lock_irq(&acm->read_lock); old = acm->oldcount; diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 81336acc7040..65e2cec1ca2a 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -208,6 +208,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, /* * Adjust bInterval for quirked devices. + */ + /* + * This quirk fixes bIntervals reported in ms. + */ + if (to_usb_device(ddev)->quirks & + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) { + n = clamp(fls(d->bInterval) + 3, i, j); + i = j = n; + } + /* * This quirk fixes bIntervals reported in * linear microframes. */ diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 096bb82c69c4..652e3e558257 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -498,8 +498,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb) */ tbuf_size = max_t(u16, sizeof(struct usb_hub_descriptor), wLength); tbuf = kzalloc(tbuf_size, GFP_KERNEL); - if (!tbuf) - return -ENOMEM; + if (!tbuf) { + status = -ENOMEM; + goto err_alloc; + } bufp = tbuf; @@ -702,6 +704,7 @@ error: } kfree(tbuf); + err_alloc: /* any errors get returned through the urb completion */ spin_lock_irq(&hcd_root_hub_lock); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1a3df0f8b891..a3c136b03810 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2616,8 +2616,15 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1, if (ret < 0) return ret; - /* The port state is unknown until the reset completes. */ - if (!(portstatus & USB_PORT_STAT_RESET)) + /* + * The port state is unknown until the reset completes. + * + * On top of that, some chips may require additional time + * to re-establish a connection after the reset is complete, + * so also wait for the connection to be re-established. + */ + if (!(portstatus & USB_PORT_STAT_RESET) && + (portstatus & USB_PORT_STAT_CONNECTION)) break; /* switch to the long delay after two short delay failures */ @@ -4168,7 +4175,7 @@ static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev) struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); int connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN; - if (!udev->usb2_hw_lpm_capable) + if (!udev->usb2_hw_lpm_capable || !udev->bos) return; if (hub) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6c7e47f67dd4..ab12e018de86 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -163,6 +163,14 @@ static const struct usb_device_id usb_quirk_list[] = { /* M-Systems Flash Disk Pioneers */ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Baum Vario Ultra */ + { USB_DEVICE(0x0904, 0x6101), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + { USB_DEVICE(0x0904, 0x6102), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + { USB_DEVICE(0x0904, 0x6103), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Keytouch QWERTY Panel keyboard */ { USB_DEVICE(0x0926, 0x3333), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index 18ae3eaa8b6f..ccd9694f8e36 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -28,23 +28,23 @@ struct dwc3; #define gadget_to_dwc(g) (container_of(g, struct dwc3, gadget)) /* DEPCFG parameter 1 */ -#define DWC3_DEPCFG_INT_NUM(n) ((n) << 0) +#define DWC3_DEPCFG_INT_NUM(n) (((n) & 0x1f) << 0) #define DWC3_DEPCFG_XFER_COMPLETE_EN (1 << 8) #define DWC3_DEPCFG_XFER_IN_PROGRESS_EN (1 << 9) #define DWC3_DEPCFG_XFER_NOT_READY_EN (1 << 10) #define DWC3_DEPCFG_FIFO_ERROR_EN (1 << 11) #define DWC3_DEPCFG_STREAM_EVENT_EN (1 << 13) -#define DWC3_DEPCFG_BINTERVAL_M1(n) ((n) << 16) +#define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16) #define DWC3_DEPCFG_STREAM_CAPABLE (1 << 24) -#define DWC3_DEPCFG_EP_NUMBER(n) ((n) << 25) +#define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25) #define DWC3_DEPCFG_BULK_BASED (1 << 30) #define DWC3_DEPCFG_FIFO_BASED (1 << 31) /* DEPCFG parameter 0 */ -#define DWC3_DEPCFG_EP_TYPE(n) ((n) << 1) -#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) ((n) << 3) -#define DWC3_DEPCFG_FIFO_NUMBER(n) ((n) << 17) -#define DWC3_DEPCFG_BURST_SIZE(n) ((n) << 22) +#define DWC3_DEPCFG_EP_TYPE(n) (((n) & 0x3) << 1) +#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) (((n) & 0x7ff) << 3) +#define DWC3_DEPCFG_FIFO_NUMBER(n) (((n) & 0x1f) << 17) +#define DWC3_DEPCFG_BURST_SIZE(n) (((n) & 0xf) << 22) #define DWC3_DEPCFG_DATA_SEQ_NUM(n) ((n) << 26) /* This applies for core versions earlier than 1.94a */ #define DWC3_DEPCFG_IGN_SEQ_NUM (1 << 31) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index ab9b7ac63407..58888b8bd389 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1649,11 +1649,14 @@ static int ffs_func_eps_enable(struct ffs_function *func) spin_lock_irqsave(&func->ffs->eps_lock, flags); do { struct usb_endpoint_descriptor *ds; + struct usb_ss_ep_comp_descriptor *comp_desc = NULL; + int needs_comp_desc = false; int desc_idx; - if (ffs->gadget->speed == USB_SPEED_SUPER) + if (ffs->gadget->speed == USB_SPEED_SUPER) { desc_idx = 2; - else if (ffs->gadget->speed == USB_SPEED_HIGH) + needs_comp_desc = true; + } else if (ffs->gadget->speed == USB_SPEED_HIGH) desc_idx = 1; else desc_idx = 0; @@ -1670,6 +1673,14 @@ static int ffs_func_eps_enable(struct ffs_function *func) ep->ep->driver_data = ep; ep->ep->desc = ds; + + comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + + USB_DT_ENDPOINT_SIZE); + ep->ep->maxburst = comp_desc->bMaxBurst + 1; + + if (needs_comp_desc) + ep->ep->comp_desc = comp_desc; + ret = usb_ep_enable(ep->ep); if (likely(!ret)) { epfile->ep = ep; diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 945b3bd2ca98..f4a0b25d1810 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -643,7 +643,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) uvc_ss_streaming_comp.bMaxBurst = opts->streaming_maxburst; uvc_ss_streaming_comp.wBytesPerInterval = cpu_to_le16(max_packet_size * max_packet_mult * - opts->streaming_maxburst); + (opts->streaming_maxburst + 1)); /* Allocate endpoints. */ ep = usb_ep_autoconfig(cdev->gadget, &uvc_control_ep); diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 2e7d27b4cf89..33011f8ee865 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -589,14 +589,6 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, req->length = length; - /* throttle high/super speed IRQ rate back slightly */ - if (gadget_is_dualspeed(dev->gadget)) - req->no_interrupt = (((dev->gadget->speed == USB_SPEED_HIGH || - dev->gadget->speed == USB_SPEED_SUPER)) && - !list_empty(&dev->tx_reqs)) - ? ((atomic_read(&dev->tx_qlen) % dev->qmult) != 0) - : 0; - retval = usb_ep_queue(in, req, GFP_ATOMIC); switch (retval) { default: diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 81dc5959e36b..53c747fed9d7 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -972,6 +972,8 @@ static int dummy_udc_probe(struct platform_device *pdev) int rc; dum = *((void **)dev_get_platdata(&pdev->dev)); + /* Clear usb_gadget region for new registration to udc-core */ + memzero_explicit(&dum->gadget, sizeof(struct usb_gadget)); dum->gadget.name = gadget_name; dum->gadget.ops = &dummy_ops; dum->gadget.max_speed = USB_SPEED_SUPER; diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 3dd487872bf1..8788799538eb 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -189,6 +189,8 @@ static int xhci_plat_remove(struct platform_device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct clk *clk = xhci->clk; + xhci->xhc_state |= XHCI_STATE_REMOVING; + usb_remove_hcd(xhci->shared_hcd); usb_put_hcd(xhci->shared_hcd); diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index 4e38683c653c..6d4e75785710 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -346,6 +346,9 @@ static int idmouse_probe(struct usb_interface *interface, if (iface_desc->desc.bInterfaceClass != 0x0A) return -ENODEV; + if (iface_desc->desc.bNumEndpoints < 1) + return -ENODEV; + /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 1950e87b4219..775690bed4c0 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -787,12 +787,6 @@ static int iowarrior_probe(struct usb_interface *interface, iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); - if (iface_desc->desc.bNumEndpoints < 1) { - dev_err(&interface->dev, "Invalid number of endpoints\n"); - retval = -EINVAL; - goto error; - } - /* set up the endpoint information */ for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; @@ -803,6 +797,21 @@ static int iowarrior_probe(struct usb_interface *interface, /* this one will match for the IOWarrior56 only */ dev->int_out_endpoint = endpoint; } + + if (!dev->int_in_endpoint) { + dev_err(&interface->dev, "no interrupt-in endpoint found\n"); + retval = -ENODEV; + goto error; + } + + if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if (!dev->int_out_endpoint) { + dev_err(&interface->dev, "no interrupt-out endpoint found\n"); + retval = -ENODEV; + goto error; + } + } + /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c index 62cb8cd08403..66e2d036bb37 100644 --- a/drivers/usb/misc/lvstest.c +++ b/drivers/usb/misc/lvstest.c @@ -370,6 +370,10 @@ static int lvs_rh_probe(struct usb_interface *intf, hdev = interface_to_usbdev(intf); desc = intf->cur_altsetting; + + if (desc->desc.bNumEndpoints < 1) + return -ENODEV; + endpoint = &desc->endpoint[0].desc; /* valid only for SS root hub */ diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 40ef40affe83..3cb05eb5f1df 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -715,6 +715,11 @@ static int uss720_probe(struct usb_interface *intf, interface = intf->cur_altsetting; + if (interface->desc.bNumEndpoints < 3) { + usb_put_dev(usbdev); + return -ENODEV; + } + /* * Allocate parport interface */ diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 3df7b7ec178e..e0b1fe2f60e1 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1483,16 +1483,20 @@ static int digi_read_oob_callback(struct urb *urb) struct usb_serial *serial = port->serial; struct tty_struct *tty; struct digi_port *priv = usb_get_serial_port_data(port); + unsigned char *buf = urb->transfer_buffer; int opcode, line, status, val; int i; unsigned int rts; + if (urb->actual_length < 4) + return -1; + /* handle each oob command */ - for (i = 0; i < urb->actual_length - 3;) { - opcode = ((unsigned char *)urb->transfer_buffer)[i++]; - line = ((unsigned char *)urb->transfer_buffer)[i++]; - status = ((unsigned char *)urb->transfer_buffer)[i++]; - val = ((unsigned char *)urb->transfer_buffer)[i++]; + for (i = 0; i < urb->actual_length - 3; i += 4) { + opcode = buf[i]; + line = buf[i + 1]; + status = buf[i + 2]; + val = buf[i + 3]; dev_dbg(&port->dev, "digi_read_oob_callback: opcode=%d, line=%d, status=%d, val=%d\n", opcode, line, status, val); diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index ddbb8fe1046d..f6f6960218a7 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1546,6 +1546,12 @@ static void edge_interrupt_callback(struct urb *urb) function = TIUMP_GET_FUNC_FROM_CODE(data[0]); dev_dbg(dev, "%s - port_number %d, function %d, info 0x%x\n", __func__, port_number, function, data[1]); + + if (port_number >= edge_serial->serial->num_ports) { + dev_err(dev, "bad port number %d\n", port_number); + goto exit; + } + port = edge_serial->serial->port[port_number]; edge_port = usb_get_serial_port_data(port); if (!edge_port) { @@ -1626,7 +1632,7 @@ static void edge_bulk_in_callback(struct urb *urb) port_number = edge_port->port->port_number; - if (edge_port->lsr_event) { + if (urb->actual_length > 0 && edge_port->lsr_event) { edge_port->lsr_event = 0; dev_dbg(dev, "%s ===== Port %u LSR Status = %02x, Data = %02x ======\n", __func__, port_number, edge_port->lsr_mask, *data); diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index f6c6900bccf0..880bbdba19d2 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -129,12 +129,6 @@ static int omninet_port_remove(struct usb_serial_port *port) static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port) { - struct usb_serial *serial = port->serial; - struct usb_serial_port *wport; - - wport = serial->port[1]; - tty_port_tty_set(&wport->port, tty); - return usb_serial_generic_open(tty, port); } diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 9dfbea6c1d2c..ae652223fabc 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -163,6 +163,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81a9)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81b1)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ + {DEVICE_SWI(0x413c, 0x81b5)}, /* Dell Wireless 5811e QDL */ + {DEVICE_SWI(0x413c, 0x81b6)}, /* Dell Wireless 5811e QDL */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c index b2dff0f14743..236ea43f7815 100644 --- a/drivers/usb/serial/safe_serial.c +++ b/drivers/usb/serial/safe_serial.c @@ -205,6 +205,11 @@ static void safe_process_read_urb(struct urb *urb) if (!safe) goto out; + if (length < 2) { + dev_err(&port->dev, "malformed packet\n"); + return; + } + fcs = fcs_compute10(data, length, CRC10_INITFCS); if (fcs) { dev_err(&port->dev, "%s - bad CRC %x\n", __func__, fcs); diff --git a/drivers/usb/wusbcore/wa-hc.c b/drivers/usb/wusbcore/wa-hc.c index 252c7bd9218a..d01496fd27fe 100644 --- a/drivers/usb/wusbcore/wa-hc.c +++ b/drivers/usb/wusbcore/wa-hc.c @@ -39,6 +39,9 @@ int wa_create(struct wahc *wa, struct usb_interface *iface, int result; struct device *dev = &iface->dev; + if (iface->cur_altsetting->desc.bNumEndpoints < 3) + return -ENODEV; + result = wa_rpipes_create(wa); if (result < 0) goto error_rpipes_create; diff --git a/drivers/video/fbdev/core/fbcmap.c b/drivers/video/fbdev/core/fbcmap.c index f89245b8ba8e..68a113594808 100644 --- a/drivers/video/fbdev/core/fbcmap.c +++ b/drivers/video/fbdev/core/fbcmap.c @@ -163,17 +163,18 @@ void fb_dealloc_cmap(struct fb_cmap *cmap) int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); @@ -187,17 +188,18 @@ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) { - int tooff = 0, fromoff = 0; - int size; + unsigned int tooff = 0, fromoff = 0; + size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; - size = to->len - tooff; - if (size > (int) (from->len - fromoff)) - size = from->len - fromoff; - if (size <= 0) + if (fromoff >= from->len || tooff >= to->len) + return -EINVAL; + + size = min_t(size_t, to->len - tooff, from->len - fromoff); + if (size == 0) return -EINVAL; size *= sizeof(u16); diff --git a/fs/coredump.c b/fs/coredump.c index 7eb6181184ea..0ad7a30e3107 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1,6 +1,7 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/fdtable.h> +#include <linux/freezer.h> #include <linux/mm.h> #include <linux/stat.h> #include <linux/fcntl.h> @@ -393,7 +394,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state) if (core_waiters > 0) { struct core_thread *ptr; + freezer_do_not_count(); wait_for_completion(&core_state->startup); + freezer_count(); /* * Wait for all the threads to become inactive, so that * all the thread context (extended register state, like diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 756aead10d96..75b5a159d607 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1266,6 +1266,16 @@ out: return 0; } +static void fat_dummy_inode_init(struct inode *inode) +{ + /* Initialize this dummy inode to work as no-op. */ + MSDOS_I(inode)->mmu_private = 0; + MSDOS_I(inode)->i_start = 0; + MSDOS_I(inode)->i_logstart = 0; + MSDOS_I(inode)->i_attrs = 0; + MSDOS_I(inode)->i_pos = 0; +} + static int fat_read_root(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -1711,12 +1721,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - MSDOS_I(fat_inode)->i_pos = 0; + fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; + fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f5b2453a43b2..5dcd602b2996 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -153,6 +153,12 @@ xfs_setfilesize( rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], 0, 1, _THIS_IP_); + /* we abort the update if there was an IO error */ + if (ioend->io_error) { + xfs_trans_cancel(tp, 0); + return ioend->io_error; + } + xfs_ilock(ip, XFS_ILOCK_EXCL); isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); if (!isize) { @@ -208,14 +214,17 @@ xfs_end_io( ioend->io_error = -EIO; goto done; } - if (ioend->io_error) - goto done; /* * For unwritten extents we need to issue transactions to convert a * range to normal written extens after the data I/O has finished. + * Detecting and handling completion IO errors is done individually + * for each case as different cleanup operations need to be performed + * on error. */ if (ioend->io_type == XFS_IO_UNWRITTEN) { + if (ioend->io_error) + goto done; error = xfs_iomap_write_unwritten(ip, ioend->io_offset, ioend->io_size); } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { diff --git a/include/linux/can/core.h b/include/linux/can/core.h index a0875001b13c..df08a41d5be5 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -45,10 +45,9 @@ struct can_proto { extern int can_proto_register(const struct can_proto *cp); extern void can_proto_unregister(const struct can_proto *cp); -extern int can_rx_register(struct net_device *dev, canid_t can_id, - canid_t mask, - void (*func)(struct sk_buff *, void *), - void *data, char *ident); +int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, + void (*func)(struct sk_buff *, void *), + void *data, char *ident, struct sock *sk); extern void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 561ea896c657..e5ea74c354a0 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -49,7 +49,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) case CEPH_POOL_TYPE_EC: return false; default: - BUG_ON(1); + BUG(); } } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index ff82a32871b5..439cde68dda9 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -351,7 +351,8 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return fl1->fl_pid == fl2->fl_pid + return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) + && fl1->fl_pid == fl2->fl_pid && fl1->fl_owner == fl2->fl_owner && fl1->fl_start == fl2->fl_start && fl1->fl_end == fl2->fl_end diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 79aaa9fc1a15..d5277fc3ce2e 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -103,5 +103,5 @@ struct mfc_cache { struct rtmsg; extern int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #endif diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 66982e764051..f831155dc7d1 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -115,7 +115,7 @@ struct mfc6_cache { struct rtmsg; extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, - struct rtmsg *rtm, int nowait); + struct rtmsg *rtm, int nowait, u32 portid); #ifdef CONFIG_IPV6_MROUTE extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb); diff --git a/include/linux/pwm.h b/include/linux/pwm.h index e90628cac8fa..84e526a12def 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -299,6 +299,7 @@ static inline void pwm_add_table(struct pwm_lookup *table, size_t num) #ifdef CONFIG_PWM_SYSFS void pwmchip_sysfs_export(struct pwm_chip *chip); void pwmchip_sysfs_unexport(struct pwm_chip *chip); +void pwmchip_sysfs_unexport_children(struct pwm_chip *chip); #else static inline void pwmchip_sysfs_export(struct pwm_chip *chip) { @@ -307,6 +308,10 @@ static inline void pwmchip_sysfs_export(struct pwm_chip *chip) static inline void pwmchip_sysfs_unexport(struct pwm_chip *chip) { } + +static inline void pwmchip_sysfs_unexport_children(struct pwm_chip *chip) +{ +} #endif /* CONFIG_PWM_SYSFS */ #endif /* __LINUX_PWM_H */ diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 1d0043dc34e4..de2a722fe3cf 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -50,4 +50,10 @@ /* device can't handle Link Power Management */ #define USB_QUIRK_NO_LPM BIT(10) +/* + * Device reports its bInterval as linear frames instead of the + * USB 2.0 calculation. + */ +#define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL BIT(11) + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 3ebb168b9afc..a34b141f125f 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -309,6 +309,10 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, } for (opt_iter = 6; opt_iter < opt_len;) { + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto out; + } tag_len = opt[opt_iter + 1]; if ((tag_len == 0) || (tag_len > (opt_len - opt_iter))) { err_offset = opt_iter + 1; diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 14e49c798135..b35533b94277 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM raw_syscalls +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 1a85940f8ab7..8a8135c4e99a 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -106,8 +106,10 @@ struct nlmsgerr { #define NETLINK_PKTINFO 3 #define NETLINK_BROADCAST_ERROR 4 #define NETLINK_NO_ENOBUFS 5 +#ifndef __KERNEL__ #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#endif struct nl_pktinfo { __u32 group; @@ -130,6 +132,7 @@ struct nl_mmap_hdr { __u32 nm_gid; }; +#ifndef __KERNEL__ enum nl_mmap_status { NL_MMAP_STATUS_UNUSED, NL_MMAP_STATUS_RESERVED, @@ -141,6 +144,7 @@ enum nl_mmap_status { #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) +#endif #define NET_MAJOR 36 /* Major 36 is reserved for networking */ diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index f2159d30d1f5..d79399394b46 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -48,6 +48,8 @@ enum { #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ +#ifndef __KERNEL__ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ +#endif #endif diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index d08c63f3dd6f..0c5d5dd61b6a 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -64,7 +64,7 @@ struct packet_diag_mclist { __u32 pdmc_count; __u16 pdmc_type; __u16 pdmc_alen; - __u8 pdmc_addr[MAX_ADDR_LEN]; + __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */ }; struct packet_diag_ring { diff --git a/kernel/futex.c b/kernel/futex.c index 8874a7b431e4..54ebb63711f4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2567,7 +2567,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, { struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; - struct rt_mutex *pi_mutex = NULL; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; @@ -2651,6 +2650,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) + rt_mutex_unlock(&q.pi_state->pi_mutex); /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. @@ -2659,6 +2660,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, spin_unlock(q.lock_ptr); } } else { + struct rt_mutex *pi_mutex; + /* * We have been woken up by futex_unlock_pi(), a timeout, or a * signal. futex_unlock_pi() will not destroy the lock_ptr nor @@ -2682,18 +2685,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (res) ret = (res < 0) ? res : 0; + /* + * If fixup_pi_state_owner() faulted and was unable to handle + * the fault, unlock the rt_mutex and return the fault to + * userspace. + */ + if (ret && rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + /* Unqueue and drop the lock. */ unqueue_me_pi(&q); } - /* - * If fixup_pi_state_owner() faulted and was unable to handle the - * fault, unlock the rt_mutex and return the fault to userspace. - */ - if (ret == -EFAULT) { - if (pi_mutex && rt_mutex_owner(pi_mutex) == current) - rt_mutex_unlock(pi_mutex); - } else if (ret == -EINTR) { + if (ret == -EINTR) { /* * We've already been requeued, but cannot restart by calling * futex_lock_pi() directly. We could restart this syscall, but diff --git a/kernel/padata.c b/kernel/padata.c index 161402f0b517..1e3f9772dbd4 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -189,19 +189,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) reorder = &next_queue->reorder; + spin_lock(&reorder->lock); if (!list_empty(&reorder->list)) { padata = list_entry(reorder->list.next, struct padata_priv, list); - spin_lock(&reorder->lock); list_del_init(&padata->list); atomic_dec(&pd->reorder_objects); - spin_unlock(&reorder->lock); pd->processed++; + spin_unlock(&reorder->lock); goto out; } + spin_unlock(&reorder->lock); if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { padata = ERR_PTR(-ENODATA); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index ec337f64f52d..3146abf4efc3 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1728,7 +1728,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) /* Reject out-of-range values early. Large positive sizes are used for unknown buffer sizes. */ - if (WARN_ON_ONCE((int) size < 0)) + if (WARN_ON_ONCE(size > INT_MAX)) return 0; str = buf; diff --git a/mm/swapfile.c b/mm/swapfile.c index 8798b2e0ac59..14ae9e3ec728 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2184,6 +2184,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p, swab32s(&swap_header->info.version); swab32s(&swap_header->info.last_page); swab32s(&swap_header->info.nr_badpages); + if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) + return 0; for (i = 0; i < swap_header->info.nr_badpages; i++) swab32s(&swap_header->info.badpages[i]); } diff --git a/net/can/af_can.c b/net/can/af_can.c index d6030d6949df..ee6eee7a8b42 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -446,6 +446,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, * @func: callback function on filter match * @data: returned parameter for callback function * @ident: string for calling module identification + * @sk: socket pointer (might be NULL) * * Description: * Invokes the callback function with the received sk_buff and the given @@ -469,7 +470,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, */ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data, - char *ident) + char *ident, struct sock *sk) { struct receiver *r; struct hlist_head *rl; @@ -497,6 +498,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, r->func = func; r->data = data; r->ident = ident; + r->sk = sk; hlist_add_head_rcu(&r->list, rl); d->entries++; @@ -521,8 +523,11 @@ EXPORT_SYMBOL(can_rx_register); static void can_rx_delete_receiver(struct rcu_head *rp) { struct receiver *r = container_of(rp, struct receiver, rcu); + struct sock *sk = r->sk; kmem_cache_free(rcv_cache, r); + if (sk) + sock_put(sk); } /** @@ -597,8 +602,11 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, spin_unlock(&can_rcvlists_lock); /* schedule the receiver item for deletion */ - if (r) + if (r) { + if (r->sk) + sock_hold(r->sk); call_rcu(&r->rcu, can_rx_delete_receiver); + } } EXPORT_SYMBOL(can_rx_unregister); diff --git a/net/can/af_can.h b/net/can/af_can.h index fca0fe9fc45a..b86f5129e838 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -50,13 +50,14 @@ struct receiver { struct hlist_node list; - struct rcu_head rcu; canid_t can_id; canid_t mask; unsigned long matches; void (*func)(struct sk_buff *, void *); void *data; char *ident; + struct sock *sk; + struct rcu_head rcu; }; #define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) diff --git a/net/can/bcm.c b/net/can/bcm.c index dcb75c0e66c1..51c208f0d3d1 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1169,7 +1169,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, err = can_rx_register(dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, - "bcm"); + "bcm", sk); op->rx_reg_dev = dev; dev_put(dev); @@ -1178,7 +1178,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } else err = can_rx_register(NULL, op->can_id, REGMASK(op->can_id), - bcm_rx_handler, op, "bcm"); + bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ list_del(&op->list); diff --git a/net/can/gw.c b/net/can/gw.c index 050a2110d43f..d4921582d3bd 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -441,7 +441,7 @@ static inline int cgw_register_filter(struct cgw_job *gwj) { return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, - gwj, "gw"); + gwj, "gw", NULL); } static inline void cgw_unregister_filter(struct cgw_job *gwj) diff --git a/net/can/raw.c b/net/can/raw.c index 081e81fd017f..bdca782df701 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -164,7 +164,7 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk, for (i = 0; i < count; i++) { err = can_rx_register(dev, filter[i].can_id, filter[i].can_mask, - raw_rcv, sk, "raw"); + raw_rcv, sk, "raw", sk); if (err) { /* clean up successfully registered filters */ while (--i >= 0) @@ -185,7 +185,7 @@ static int raw_enable_errfilter(struct net_device *dev, struct sock *sk, if (err_mask) err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG, - raw_rcv, sk, "raw"); + raw_rcv, sk, "raw", sk); return err; } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 60441b6a9546..8bb2e0c1cb50 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1240,7 +1240,6 @@ static int decode_new_up_state_weight(void **p, void *end, if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && (xorstate & CEPH_OSD_EXISTS)) { pr_info("osd%d does not exist\n", osd); - map->osd_weight[osd] = CEPH_OSD_IN; ret = set_primary_affinity(map, osd, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); if (ret) diff --git a/net/core/dev.c b/net/core/dev.c index 2a24197f3a45..86bdf6d813f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1611,24 +1611,19 @@ EXPORT_SYMBOL(call_netdevice_notifiers); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL -/* We are not allowed to call static_key_slow_dec() from irq context - * If net_disable_timestamp() is called from irq context, defer the - * static_key_slow_dec() calls. - */ static atomic_t netstamp_needed_deferred; -#endif - -void net_enable_timestamp(void) +static void netstamp_clear(struct work_struct *work) { -#ifdef HAVE_JUMP_LABEL int deferred = atomic_xchg(&netstamp_needed_deferred, 0); - if (deferred) { - while (--deferred) - static_key_slow_dec(&netstamp_needed); - return; - } + while (deferred--) + static_key_slow_dec(&netstamp_needed); +} +static DECLARE_WORK(netstamp_work, netstamp_clear); #endif + +void net_enable_timestamp(void) +{ static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1636,12 +1631,12 @@ EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - if (in_interrupt()) { - atomic_inc(&netstamp_needed_deferred); - return; - } -#endif + /* net_disable_timestamp() can be called from non process context */ + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_dec(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_disable_timestamp); @@ -2746,6 +2741,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } return head; } +EXPORT_SYMBOL_GPL(validate_xmit_skb_list); static void qdisc_pkt_len_init(struct sk_buff *skb) { @@ -4898,6 +4894,7 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -4907,7 +4904,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(dev, adj_dev, dev_list); if (adj) { - adj->ref_nr++; + adj->ref_nr += ref_nr; return 0; } @@ -4917,7 +4914,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = 1; + adj->ref_nr = ref_nr; adj->private = private; dev_hold(adj_dev); @@ -4956,6 +4953,7 @@ free_adj: static void __netdev_adjacent_dev_remove(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list) { struct netdev_adjacent *adj; @@ -4968,10 +4966,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, BUG(); } - if (adj->ref_nr > 1) { - pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, - adj->ref_nr-1); - adj->ref_nr--; + if (adj->ref_nr > ref_nr) { + pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, + ref_nr, adj->ref_nr-ref_nr); + adj->ref_nr -= ref_nr; return; } @@ -4990,21 +4988,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, - master); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, - false); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); return ret; } @@ -5012,9 +5011,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev, } static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - return __netdev_adjacent_dev_link_lists(dev, upper_dev, + return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower, NULL, false); @@ -5022,17 +5022,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev, static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); - __netdev_adjacent_dev_remove(upper_dev, dev, down_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } @@ -5041,17 +5043,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); + int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); if (ret) return ret; - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, + ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower, private, master); if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); return ret; } @@ -5061,8 +5063,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); } @@ -5103,7 +5105,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { pr_debug("Interlinking %s with %s, non-neighbour\n", i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev); + ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); if (ret) goto rollback_mesh; } @@ -5113,7 +5115,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { pr_debug("linking %s's upper device %s with %s\n", upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev); + ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); if (ret) goto rollback_upper_mesh; } @@ -5122,7 +5124,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &dev->all_adj_list.lower, list) { pr_debug("linking %s's lower device %s with %s\n", dev->name, i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev); + ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); if (ret) goto rollback_lower_mesh; } @@ -5135,7 +5137,7 @@ rollback_lower_mesh: list_for_each_entry(i, &dev->all_adj_list.lower, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); } i = NULL; @@ -5145,7 +5147,7 @@ rollback_upper_mesh: list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); } i = j = NULL; @@ -5157,7 +5159,7 @@ rollback_mesh: list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { if (i == to_i && j == to_j) break; - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); } if (i == to_i) break; @@ -5233,16 +5235,16 @@ void netdev_upper_dev_unlink(struct net_device *dev, */ list_for_each_entry(i, &dev->all_adj_list.lower, list) list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); /* remove also the devices itself from lower/upper device * list */ list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 4ff3eacc99f5..b3aa63260cab 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -211,8 +211,8 @@ #define T_REMDEV (1<<3) /* Remove one dev */ /* If lock -- protects updating of if_list */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); +#define if_lock(t) mutex_lock(&(t->if_lock)); +#define if_unlock(t) mutex_unlock(&(t->if_lock)); /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 @@ -418,7 +418,7 @@ struct pktgen_net { }; struct pktgen_thread { - spinlock_t if_lock; /* for list of devices */ + struct mutex if_lock; /* for list of devices */ struct list_head if_list; /* All device here */ struct list_head th_list; struct task_struct *tsk; @@ -1952,11 +1952,13 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d { struct pktgen_thread *t; + mutex_lock(&pktgen_thread_lock); + list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; - rcu_read_lock(); - list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { + if_lock(t); + list_for_each_entry(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; @@ -1971,8 +1973,9 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d dev->name); break; } - rcu_read_unlock(); + if_unlock(t); } + mutex_unlock(&pktgen_thread_lock); } static int pktgen_device_event(struct notifier_block *unused, @@ -3656,7 +3659,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) return -ENOMEM; } - spin_lock_init(&t->if_lock); + mutex_init(&t->if_lock); t->cpu = cpu; INIT_LIST_HEAD(&t->if_list); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 97549212e9e3..b2e2a53c2284 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3617,13 +3617,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, { struct sock *sk = skb->sk; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); - - sock_put(sk); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + *skb_hwtstamps(skb) = *hwtstamps; + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); + sock_put(sk); + } } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); @@ -3661,7 +3662,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) { struct sock *sk = skb->sk; struct sock_exterr_skb *serr; - int err; + int err = 1; skb->wifi_acked_valid = 1; skb->wifi_acked = acked; @@ -3671,14 +3672,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - err = sock_queue_err_skb(sk, skb); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + err = sock_queue_err_skb(sk, skb); + sock_put(sk); + } if (err) kfree_skb(skb); - - sock_put(sk); } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index f053198e730c..5e3a7302f774 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) for (i = 0; i < hc->tx_seqbufc; i++) kfree(hc->tx_seqbuf[i]); hc->tx_seqbufc = 0; + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) diff --git a/net/dccp/input.c b/net/dccp/input.c index 3c8ec7d4a34e..700440e4fa3b 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -606,7 +606,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - goto discard; + consume_skb(skb); + return 0; } if (dh->dccph_type == DCCP_PKT_RESET) goto discard; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6ca645c4b48e..502d9a885329 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -262,7 +262,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) switch (type) { case ICMP_REDIRECT: - dccp_do_redirect(skb, sk); + if (!sock_owned_by_user(sk)) + dccp_do_redirect(skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 7bcb22317841..ff186dac3e07 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -131,10 +131,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index b50dc436db1f..e6aaf92e0e1b 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -135,6 +135,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; + bh_unlock_sock(newsk); sk_free(newsk); return NULL; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index bc7c9662a904..e08003d026eb 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1655,6 +1655,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) goto validate_return_locked; } + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto validate_return_locked; + } tag_len = tag[1]; if (tag_len > (opt_len - opt_iter)) { err_offset = opt_iter + 1; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 046fce012da5..8839b55ea4de 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1066,7 +1066,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + /* We need to keep the dst for __ip_options_echo() + * We could restrict the test to opt.ts_needtime || opt.srr, + * but the following is good enough as IP options are not often used. + */ + if (unlikely(IPCB(skb)->opt.optlen)) + skb_dst_force(skb); + else + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1b7f6da99ef4..2348cc87b0bc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2188,7 +2188,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait) + struct rtmsg *rtm, int nowait, u32 portid) { struct mfc_cache *cache; struct mr_table *mrt; @@ -2233,6 +2233,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_push(skb2, sizeof(struct iphdr)); skb_reset_network_header(skb2); iph = ip_hdr(skb2); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 64f4edb2dbf9..c225b227cc45 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -644,6 +644,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); + if (!skb) + return 0; pfh->wcheck = csum_partial((char *)&pfh->icmph, sizeof(struct icmphdr), pfh->wcheck); pfh->icmph.checksum = csum_fold(pfh->wcheck); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 12c5df33c0b7..2fe9459fbe24 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1835,6 +1835,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, { int res; + tos &= IPTOS_RT_MASK; rcu_read_lock(); /* Multicast recognition logic is moved from route cache to here. @@ -2373,7 +2374,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, - r, nowait); + r, nowait, portid); + if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 633cd34f57ec..3d61bdbae920 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -103,10 +103,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); - write_seqlock(&net->ipv4.ip_local_ports.lock); + write_seqlock_bh(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.ip_local_ports.lock); + write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a21c47289765..daf3b217b1e4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -775,6 +775,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } + /* if __tcp_splice_read() got nothing while we have + * an skb in receive queue, we do not want to loop. + * This might happen with URG data. + */ + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; sk_wait_data(sk, &timeo); if (signal_pending(current)) { ret = sock_intr_errno(timeo); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 067baa738001..737186dbbd74 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2360,10 +2360,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->inet_dport), + &sk->sk_v6_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5d5390299277..d3d62be2376b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -271,10 +271,13 @@ EXPORT_SYMBOL(tcp_v4_connect); */ void tcp_v4_mtu_reduced(struct sock *sk) { - struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); - u32 mtu = tcp_sk(sk)->mtu_info; + struct dst_entry *dst; + u32 mtu; + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + return; + mtu = tcp_sk(sk)->mtu_info; dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; @@ -386,7 +389,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) switch (type) { case ICMP_REDIRECT: - do_redirect(icmp_skb, sk); + if (!sock_owned_by_user(sk)) + do_redirect(icmp_skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index af2f64eeb98f..a680177694ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1869,12 +1869,14 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - if (nskb->ip_summed) + if (nskb->ip_summed) { skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); - else - nskb->csum = skb_copy_and_csum_bits(skb, 0, - skb_put(nskb, copy), - copy, nskb->csum); + } else { + __wsum csum = skb_copy_and_csum_bits(skb, 0, + skb_put(nskb, copy), + copy, 0); + nskb->csum = csum_block_add(nskb->csum, csum, len); + } if (skb->len <= copy) { /* We've eaten all the data from this skb. @@ -2291,9 +2293,11 @@ u32 __tcp_select_window(struct sock *sk) int full_space = min_t(int, tp->window_clamp, allowed_space); int window; - if (mss > full_space) + if (unlikely(mss > full_space)) { mss = full_space; - + if (mss <= 0) + return 0; + } if (free_space < (full_space >> 1)) { icsk->icsk_ack.quick = 0; @@ -2479,7 +2483,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * copying overhead: fragmentation, tunneling, mangling etc. */ if (atomic_read(&sk->sk_wmem_alloc) > - min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) + min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), + sk->sk_sndbuf)) return -EAGAIN; if (skb_still_in_host_queue(sk, skb)) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 9b21ae8b2e31..baec68553e90 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -207,7 +207,8 @@ void tcp_delack_timer_handler(struct sock *sk) sk_mem_reclaim_partial(sk); - if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; if (time_after(icsk->icsk_ack.timeout, jiffies)) { @@ -486,7 +487,8 @@ void tcp_write_timer_handler(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); int event; - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !icsk->icsk_pending) goto out; if (time_after(icsk->icsk_timeout, jiffies)) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 953572c17b3d..4cc14452d5cc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2695,7 +2695,7 @@ static void init_loopback(struct net_device *dev) * lo device down, release this obsolete dst and * reallocate a new router for ifa. */ - if (sp_ifa->rt->dst.obsolete > 0) { + if (!atomic_read(&sp_ifa->rt->rt6i_ref)) { ip6_rt_put(sp_ifa->rt); sp_ifa->rt = NULL; } else { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1a93a39b2aab..df3633e20458 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -55,6 +55,7 @@ #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/ip6_tunnel.h> +#include <net/gre.h> static bool log_ecn_error = true; @@ -367,35 +368,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; - __be16 *p = (__be16 *)(skb->data + offset); - int grehlen = offset + 4; + const struct gre_base_hdr *greh; + const struct ipv6hdr *ipv6h; + int grehlen = sizeof(*greh); struct ip6_tnl *t; + int key_off = 0; __be16 flags; + __be32 key; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } + if (!pskb_may_pull(skb, offset + grehlen)) + return; + greh = (const struct gre_base_hdr *)(skb->data + offset); + flags = greh->flags; + if (flags & (GRE_VERSION | GRE_ROUTING)) + return; + if (flags & GRE_CSUM) + grehlen += 4; + if (flags & GRE_KEY) { + key_off = grehlen + offset; + grehlen += 4; } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (!pskb_may_pull(skb, grehlen)) + if (!pskb_may_pull(skb, offset + grehlen)) return; ipv6h = (const struct ipv6hdr *)skb->data; - p = (__be16 *)(skb->data + offset); + greh = (const struct gre_base_hdr *)(skb->data + offset); + key = key_off ? *(__be32 *)(skb->data + key_off) : 0; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); + key, greh->protocol); if (t == NULL) return; @@ -889,7 +892,6 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = skb->protocol; err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6987d3cb4163..8afea07e53a2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -407,18 +407,19 @@ ip6_tnl_dev_uninit(struct net_device *dev) __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; - __u8 nexthdr = ipv6h->nexthdr; - __u16 off = sizeof(*ipv6h); + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; + unsigned int nhoff = raw - skb->data; + unsigned int off = nhoff + sizeof(*ipv6h); + u8 next, nexthdr = ipv6h->nexthdr; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { - __u16 optlen = 0; struct ipv6_opt_hdr *hdr; - if (raw + off + sizeof(*hdr) > skb->data && - !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) + u16 optlen; + + if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; - hdr = (struct ipv6_opt_hdr *) (raw + off); + hdr = (struct ipv6_opt_hdr *)(skb->data + off); if (nexthdr == NEXTHDR_FRAGMENT) { struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; if (frag_hdr->frag_off) @@ -429,20 +430,29 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) } else { optlen = ipv6_optlen(hdr); } + /* cache hdr->nexthdr, since pskb_may_pull() might + * invalidate hdr + */ + next = hdr->nexthdr; if (nexthdr == NEXTHDR_DEST) { - __u16 i = off + 2; + u16 i = 2; + + /* Remember : hdr is no longer valid at this point. */ + if (!pskb_may_pull(skb, off + optlen)) + break; + while (1) { struct ipv6_tlv_tnl_enc_lim *tel; /* No more room for encapsulation limit */ - if (i + sizeof (*tel) > off + optlen) + if (i + sizeof(*tel) > optlen) break; - tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; + tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); /* return index of option if found and valid */ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && tel->length == 1) - return i; + return i + off - nhoff; /* else jump to next option */ if (tel->type) i += tel->length + 2; @@ -450,7 +460,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) i++; } } - nexthdr = hdr->nexthdr; + nexthdr = next; off += optlen; } return 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1d67b37592d1..591e2355cc9e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -645,6 +645,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) u->link = p->link; u->i_key = p->i_key; u->o_key = p->o_key; + if (u->i_key) + u->i_flags |= GRE_KEY; + if (u->o_key) + u->o_flags |= GRE_KEY; u->proto = p->proto; memcpy(u->name, p->name, sizeof(u->name)); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4ca7cdd15aad..2b93262d5d5f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2276,8 +2276,8 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, return 1; } -int ip6mr_get_route(struct net *net, - struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, + int nowait, u32 portid) { int err; struct mr6_table *mrt; @@ -2322,6 +2322,7 @@ int ip6mr_get_route(struct net *net, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_reset_transport_header(skb2); skb_put(skb2, sizeof(struct ipv6hdr)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2d9aca57e7c7..8d3103459b2d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2612,7 +2612,9 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(net, skb, rtm, nowait); + int err = ip6mr_get_route(net, skb, rtm, nowait, + portid); + if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 45eae1e609d6..eb36219f0a3c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1388,6 +1388,7 @@ static int ipip6_tunnel_init(struct net_device *dev) tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); if (!tunnel->dst_cache) { free_percpu(dev->tstats); + dev->tstats = NULL; return -ENOMEM; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b8e14a5ae0b1..0ade453839c4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -389,10 +389,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } @@ -1049,6 +1051,16 @@ drop: return 0; /* don't send reset */ } +static void tcp_v6_restore_cb(struct sk_buff *skb) +{ + /* We need to move header back to the beginning if xfrm6_policy_check() + * and tcp_v6_fill_cb() are going to be called again. + * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. + */ + memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, + sizeof(struct inet6_skb_parm)); +} + static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) @@ -1180,8 +1192,10 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) + if (newnp->pktoptions) { + tcp_v6_restore_cb(newnp->pktoptions); skb_set_owner_r(newnp->pktoptions, newsk); + } } newnp->opt = NULL; newnp->mcast_oif = tcp_v6_iif(skb); @@ -1381,6 +1395,7 @@ ipv6_pktoptions: np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { skb_set_owner_r(opt_skb, sk); + tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { __kfree_skb(opt_skb); @@ -1414,15 +1429,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, TCP_SKB_CB(skb)->sacked = 0; } -static void tcp_v6_restore_cb(struct sk_buff *skb) -{ - /* We need to move header back to the beginning if xfrm6_policy_check() - * and tcp_v6_fill_cb() are going to be called again. - */ - memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, - sizeof(struct inet6_skb_parm)); -} - static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 68aa9ffd4ae4..e9ec7d2cc357 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -273,6 +273,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); /* Session reference counts. Incremented when code obtains a reference * to a session. diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index e45d2b77bb42..c2cd3dd7fa67 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <asm/ioctls.h> #include <linux/icmp.h> #include <linux/module.h> #include <linux/skbuff.h> @@ -381,7 +382,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) drop: IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); kfree_skb(skb); - return -1; + return 0; } /* Userspace will call sendmsg() on the tunnel socket to send L2TP @@ -554,6 +555,30 @@ out: return err ? err : copied; } +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct sk_buff *skb; + int amount; + + switch (cmd) { + case SIOCOUTQ: + amount = sk_wmem_alloc_get(sk); + break; + case SIOCINQ: + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + amount = skb ? skb->len : 0; + spin_unlock_bh(&sk->sk_receive_queue.lock); + break; + + default: + return -ENOIOCTLCMD; + } + + return put_user(amount, (int __user *)arg); +} +EXPORT_SYMBOL(l2tp_ioctl); + static struct proto l2tp_ip_prot = { .name = "L2TP/IP", .owner = THIS_MODULE, @@ -562,7 +587,7 @@ static struct proto l2tp_ip_prot = { .bind = l2tp_ip_bind, .connect = l2tp_ip_connect, .disconnect = l2tp_ip_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip_destroy_sock, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index b8e469b832df..f5d6fd834303 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -715,7 +715,7 @@ static struct proto l2tp_ip6_prot = { .bind = l2tp_ip6_bind, .connect = l2tp_ip6_connect, .disconnect = l2tp_ip6_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip6_destroy_sock, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 42dc2e45c921..9c68d0bca046 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) * another trick required to cope with how the PROCOM state * machine works. -acme */ + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; } if (!sock_owned_by_user(sk)) llc_conn_rcv(sk, skb); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 06033f6c845f..cdc1b620cbe1 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 4a95fe3cffbc..3b98c1b4a719 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -116,6 +116,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) break; } + flush_delayed_work(&sdata->dec_tailroom_needed_wk); drv_remove_interface(local, sdata); } diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig index 2c5e95e9bfbd..5d6e8c05b3d4 100644 --- a/net/netlink/Kconfig +++ b/net/netlink/Kconfig @@ -2,15 +2,6 @@ # Netlink Sockets # -config NETLINK_MMAP - bool "NETLINK: mmaped IO" - ---help--- - This option enables support for memory mapped netlink IO. This - reduces overhead by avoiding copying data between kernel- and - userspace. - - If unsure, say N. - config NETLINK_DIAG tristate "NETLINK: socket monitoring interface" default n diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 15fc0938e1c4..4792e76b7d4a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -234,7 +234,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, dev_hold(dev); - if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) + if (is_vmalloc_addr(skb->head)) nskb = netlink_to_full_skb(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -308,599 +308,8 @@ static void netlink_rcv_wake(struct sock *sk) wake_up_interruptible(&nlk->wait); } -#ifdef CONFIG_NETLINK_MMAP -static bool netlink_rx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->rx_ring.pg_vec != NULL; -} - -static bool netlink_tx_is_mmaped(struct sock *sk) -{ - return nlk_sk(sk)->tx_ring.pg_vec != NULL; -} - -static __pure struct page *pgvec_to_page(const void *addr) -{ - if (is_vmalloc_addr(addr)) - return vmalloc_to_page(addr); - else - return virt_to_page(addr); -} - -static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) -{ - unsigned int i; - - for (i = 0; i < len; i++) { - if (pg_vec[i] != NULL) { - if (is_vmalloc_addr(pg_vec[i])) - vfree(pg_vec[i]); - else - free_pages((unsigned long)pg_vec[i], order); - } - } - kfree(pg_vec); -} - -static void *alloc_one_pg_vec_page(unsigned long order) -{ - void *buffer; - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | - __GFP_NOWARN | __GFP_NORETRY; - - buffer = (void *)__get_free_pages(gfp_flags, order); - if (buffer != NULL) - return buffer; - - buffer = vzalloc((1 << order) * PAGE_SIZE); - if (buffer != NULL) - return buffer; - - gfp_flags &= ~__GFP_NORETRY; - return (void *)__get_free_pages(gfp_flags, order); -} - -static void **alloc_pg_vec(struct netlink_sock *nlk, - struct nl_mmap_req *req, unsigned int order) -{ - unsigned int block_nr = req->nm_block_nr; - unsigned int i; - void **pg_vec; - - pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); - if (pg_vec == NULL) - return NULL; - - for (i = 0; i < block_nr; i++) { - pg_vec[i] = alloc_one_pg_vec_page(order); - if (pg_vec[i] == NULL) - goto err1; - } - - return pg_vec; -err1: - free_pg_vec(pg_vec, order, block_nr); - return NULL; -} - - -static void -__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, - unsigned int order) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct sk_buff_head *queue; - struct netlink_ring *ring; - - queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - spin_lock_bh(&queue->lock); - - ring->frame_max = req->nm_frame_nr - 1; - ring->head = 0; - ring->frame_size = req->nm_frame_size; - ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; - - swap(ring->pg_vec_len, req->nm_block_nr); - swap(ring->pg_vec_order, order); - swap(ring->pg_vec, pg_vec); - - __skb_queue_purge(queue); - spin_unlock_bh(&queue->lock); - - WARN_ON(atomic_read(&nlk->mapped)); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); -} - -static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, - bool tx_ring) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - void **pg_vec = NULL; - unsigned int order = 0; - - ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; - - if (atomic_read(&nlk->mapped)) - return -EBUSY; - if (atomic_read(&ring->pending)) - return -EBUSY; - - if (req->nm_block_nr) { - if (ring->pg_vec != NULL) - return -EBUSY; - - if ((int)req->nm_block_size <= 0) - return -EINVAL; - if (!PAGE_ALIGNED(req->nm_block_size)) - return -EINVAL; - if (req->nm_frame_size < NL_MMAP_HDRLEN) - return -EINVAL; - if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) - return -EINVAL; - - ring->frames_per_block = req->nm_block_size / - req->nm_frame_size; - if (ring->frames_per_block == 0) - return -EINVAL; - if (ring->frames_per_block * req->nm_block_nr != - req->nm_frame_nr) - return -EINVAL; - - order = get_order(req->nm_block_size); - pg_vec = alloc_pg_vec(nlk, req, order); - if (pg_vec == NULL) - return -ENOMEM; - } else { - if (req->nm_frame_nr) - return -EINVAL; - } - - mutex_lock(&nlk->pg_vec_lock); - if (atomic_read(&nlk->mapped) == 0) { - __netlink_set_ring(sk, req, tx_ring, pg_vec, order); - mutex_unlock(&nlk->pg_vec_lock); - return 0; - } - - mutex_unlock(&nlk->pg_vec_lock); - - if (pg_vec) - free_pg_vec(pg_vec, order, req->nm_block_nr); - - return -EBUSY; -} - -static void netlink_mm_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_inc(&nlk_sk(sk)->mapped); -} - -static void netlink_mm_close(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct socket *sock = file->private_data; - struct sock *sk = sock->sk; - - if (sk) - atomic_dec(&nlk_sk(sk)->mapped); -} - -static const struct vm_operations_struct netlink_mmap_ops = { - .open = netlink_mm_open, - .close = netlink_mm_close, -}; - -static int netlink_mmap(struct file *file, struct socket *sock, - struct vm_area_struct *vma) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - unsigned long start, size, expected; - unsigned int i; - int err = -EINVAL; - - if (vma->vm_pgoff) - return -EINVAL; - - mutex_lock(&nlk->pg_vec_lock); - - expected = 0; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; - } - - if (expected == 0) - goto out; - - size = vma->vm_end - vma->vm_start; - if (size != expected) - goto out; - - start = vma->vm_start; - for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { - if (ring->pg_vec == NULL) - continue; - - for (i = 0; i < ring->pg_vec_len; i++) { - struct page *page; - void *kaddr = ring->pg_vec[i]; - unsigned int pg_num; - - for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { - page = pgvec_to_page(kaddr); - err = vm_insert_page(vma, start, page); - if (err < 0) - goto out; - start += PAGE_SIZE; - kaddr += PAGE_SIZE; - } - } - } - - atomic_inc(&nlk->mapped); - vma->vm_ops = &netlink_mmap_ops; - err = 0; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) -{ -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 - struct page *p_start, *p_end; - - /* First page is flushed through netlink_{get,set}_status */ - p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); - while (p_start <= p_end) { - flush_dcache_page(p_start); - p_start++; - } -#endif -} - -static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) -{ - smp_rmb(); - flush_dcache_page(pgvec_to_page(hdr)); - return hdr->nm_status; -} - -static void netlink_set_status(struct nl_mmap_hdr *hdr, - enum nl_mmap_status status) -{ - smp_mb(); - hdr->nm_status = status; - flush_dcache_page(pgvec_to_page(hdr)); -} - -static struct nl_mmap_hdr * -__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) -{ - unsigned int pg_vec_pos, frame_off; - - pg_vec_pos = pos / ring->frames_per_block; - frame_off = pos % ring->frames_per_block; - - return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); -} - -static struct nl_mmap_hdr * -netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, - enum nl_mmap_status status) -{ - struct nl_mmap_hdr *hdr; - - hdr = __netlink_lookup_frame(ring, pos); - if (netlink_get_status(hdr) != status) - return NULL; - - return hdr; -} - -static struct nl_mmap_hdr * -netlink_current_frame(const struct netlink_ring *ring, - enum nl_mmap_status status) -{ - return netlink_lookup_frame(ring, ring->head, status); -} - -static struct nl_mmap_hdr * -netlink_previous_frame(const struct netlink_ring *ring, - enum nl_mmap_status status) -{ - unsigned int prev; - - prev = ring->head ? ring->head - 1 : ring->frame_max; - return netlink_lookup_frame(ring, prev, status); -} - -static void netlink_increment_head(struct netlink_ring *ring) -{ - ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; -} - -static void netlink_forward_ring(struct netlink_ring *ring) -{ - unsigned int head = ring->head, pos = head; - const struct nl_mmap_hdr *hdr; - - do { - hdr = __netlink_lookup_frame(ring, pos); - if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) - break; - if (hdr->nm_status != NL_MMAP_STATUS_SKIP) - break; - netlink_increment_head(ring); - } while (ring->head != head); -} - -static bool netlink_dump_space(struct netlink_sock *nlk) -{ - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - unsigned int n; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - return false; - - n = ring->head + ring->frame_max / 2; - if (n > ring->frame_max) - n -= ring->frame_max; - - hdr = __netlink_lookup_frame(ring, n); - - return hdr->nm_status == NL_MMAP_STATUS_UNUSED; -} - -static unsigned int netlink_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - struct netlink_sock *nlk = nlk_sk(sk); - unsigned int mask; - int err; - - if (nlk->rx_ring.pg_vec != NULL) { - /* Memory mapped sockets don't call recvmsg(), so flow control - * for dumps is performed here. A dump is allowed to continue - * if at least half the ring is unused. - */ - while (nlk->cb_running && netlink_dump_space(nlk)) { - err = netlink_dump(sk); - if (err < 0) { - sk->sk_err = -err; - sk->sk_error_report(sk); - break; - } - } - netlink_rcv_wake(sk); - } - - mask = datagram_poll(file, sock, wait); - - spin_lock_bh(&sk->sk_receive_queue.lock); - if (nlk->rx_ring.pg_vec) { - netlink_forward_ring(&nlk->rx_ring); - if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED)) - mask |= POLLIN | POLLRDNORM; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - - spin_lock_bh(&sk->sk_write_queue.lock); - if (nlk->tx_ring.pg_vec) { - if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) - mask |= POLLOUT | POLLWRNORM; - } - spin_unlock_bh(&sk->sk_write_queue.lock); - - return mask; -} - -static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) -{ - return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); -} - -static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, - struct netlink_ring *ring, - struct nl_mmap_hdr *hdr) -{ - unsigned int size; - void *data; - - size = ring->frame_size - NL_MMAP_HDRLEN; - data = (void *)hdr + NL_MMAP_HDRLEN; - - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->len = 0; - - skb->destructor = netlink_skb_destructor; - NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; - NETLINK_CB(skb).sk = sk; -} - -static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, - u32 dst_portid, u32 dst_group, - struct sock_iocb *siocb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - struct sk_buff *skb; - unsigned int maxlen; - int err = 0, len = 0; - - mutex_lock(&nlk->pg_vec_lock); - - ring = &nlk->tx_ring; - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - - do { - unsigned int nm_len; - - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); - if (hdr == NULL) { - if (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending)) - schedule(); - continue; - } - - nm_len = ACCESS_ONCE(hdr->nm_len); - if (nm_len > maxlen) { - err = -EINVAL; - goto out; - } - - netlink_frame_flush_dcache(hdr, nm_len); - - skb = alloc_skb(nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - - netlink_increment_head(ring); - - NETLINK_CB(skb).portid = nlk->portid; - NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).creds = siocb->scm->creds; - - err = security_netlink_send(sk, skb); - if (err) { - kfree_skb(skb); - goto out; - } - - if (unlikely(dst_group)) { - atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_portid, dst_group, - GFP_KERNEL); - } - err = netlink_unicast(sk, skb, dst_portid, - msg->msg_flags & MSG_DONTWAIT); - if (err < 0) - goto out; - len += err; - - } while (hdr != NULL || - (!(msg->msg_flags & MSG_DONTWAIT) && - atomic_read(&nlk->tx_ring.pending))); - - if (len > 0) - err = len; -out: - mutex_unlock(&nlk->pg_vec_lock); - return err; -} - -static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) -{ - struct nl_mmap_hdr *hdr; - - hdr = netlink_mmap_hdr(skb); - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - - NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; - kfree_skb(skb); -} - -static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - struct netlink_ring *ring = &nlk->rx_ring; - struct nl_mmap_hdr *hdr; - - spin_lock_bh(&sk->sk_receive_queue.lock); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - kfree_skb(skb); - netlink_overrun(sk); - return; - } - netlink_increment_head(ring); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - - hdr->nm_len = skb->len; - hdr->nm_group = NETLINK_CB(skb).dst_group; - hdr->nm_pid = NETLINK_CB(skb).creds.pid; - hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); - hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_set_status(hdr, NL_MMAP_STATUS_COPY); -} - -#else /* CONFIG_NETLINK_MMAP */ -#define netlink_rx_is_mmaped(sk) false -#define netlink_tx_is_mmaped(sk) false -#define netlink_mmap sock_no_mmap -#define netlink_poll datagram_poll -#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 -#endif /* CONFIG_NETLINK_MMAP */ - static void netlink_skb_destructor(struct sk_buff *skb) { -#ifdef CONFIG_NETLINK_MMAP - struct nl_mmap_hdr *hdr; - struct netlink_ring *ring; - struct sock *sk; - - /* If a packet from the kernel to userspace was freed because of an - * error without being delivered to userspace, the kernel must reset - * the status. In the direction userspace to kernel, the status is - * always reset here after the packet was processed and freed. - */ - if (netlink_skb_is_mmaped(skb)) { - hdr = netlink_mmap_hdr(skb); - sk = NETLINK_CB(skb).sk; - - if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - ring = &nlk_sk(sk)->tx_ring; - } else { - if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { - hdr->nm_len = 0; - netlink_set_status(hdr, NL_MMAP_STATUS_VALID); - } - ring = &nlk_sk(sk)->rx_ring; - } - - WARN_ON(atomic_read(&ring->pending) == 0); - atomic_dec(&ring->pending); - sock_put(sk); - - skb->head = NULL; - } -#endif if (is_vmalloc_addr(skb->head)) { if (!skb->cloned || !atomic_dec_return(&(skb_shinfo(skb)->dataref))) @@ -934,18 +343,6 @@ static void netlink_sock_destruct(struct sock *sk) } skb_queue_purge(&sk->sk_receive_queue); -#ifdef CONFIG_NETLINK_MMAP - if (1) { - struct nl_mmap_req req; - - memset(&req, 0, sizeof(req)); - if (nlk->rx_ring.pg_vec) - __netlink_set_ring(sk, &req, false, NULL, 0); - memset(&req, 0, sizeof(req)); - if (nlk->tx_ring.pg_vec) - __netlink_set_ring(sk, &req, true, NULL, 0); - } -#endif /* CONFIG_NETLINK_MMAP */ if (!sock_flag(sk, SOCK_DEAD)) { printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); @@ -1153,9 +550,6 @@ static int __netlink_create(struct net *net, struct socket *sock, mutex_init(nlk->cb_mutex); } init_waitqueue_head(&nlk->wait); -#ifdef CONFIG_NETLINK_MMAP - mutex_init(&nlk->pg_vec_lock); -#endif sk->sk_destruct = netlink_sock_destruct; sk->sk_protocol = protocol; @@ -1653,9 +1047,8 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_CONGESTED, &nlk->state)) && - !netlink_skb_is_mmaped(skb)) { + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || + test_bit(NETLINK_CONGESTED, &nlk->state)) { DECLARE_WAITQUEUE(wait, current); if (!*timeo) { if (!ssk || netlink_is_kernel(ssk)) @@ -1693,14 +1086,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) netlink_deliver_tap(skb); -#ifdef CONFIG_NETLINK_MMAP - if (netlink_skb_is_mmaped(skb)) - netlink_queue_mmaped_skb(sk, skb); - else if (netlink_rx_is_mmaped(sk)) - netlink_ring_set_copied(sk, skb); - else -#endif /* CONFIG_NETLINK_MMAP */ - skb_queue_tail(&sk->sk_receive_queue, skb); + skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); return len; } @@ -1724,9 +1110,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) int delta; WARN_ON(skb->sk != NULL); - if (netlink_skb_is_mmaped(skb)) - return skb; - delta = skb->end - skb->tail; if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; @@ -1805,66 +1188,6 @@ EXPORT_SYMBOL(netlink_unicast); struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid, gfp_t gfp_mask) { -#ifdef CONFIG_NETLINK_MMAP - struct sock *sk = NULL; - struct sk_buff *skb; - struct netlink_ring *ring; - struct nl_mmap_hdr *hdr; - unsigned int maxlen; - - sk = netlink_getsockbyportid(ssk, dst_portid); - if (IS_ERR(sk)) - goto out; - - ring = &nlk_sk(sk)->rx_ring; - /* fast-path without atomic ops for common case: non-mmaped receiver */ - if (ring->pg_vec == NULL) - goto out_put; - - if (ring->frame_size - NL_MMAP_HDRLEN < size) - goto out_put; - - skb = alloc_skb_head(gfp_mask); - if (skb == NULL) - goto err1; - - spin_lock_bh(&sk->sk_receive_queue.lock); - /* check again under lock */ - if (ring->pg_vec == NULL) - goto out_free; - - /* check again under lock */ - maxlen = ring->frame_size - NL_MMAP_HDRLEN; - if (maxlen < size) - goto out_free; - - netlink_forward_ring(ring); - hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); - if (hdr == NULL) - goto err2; - netlink_ring_setup_skb(skb, sk, ring, hdr); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - netlink_increment_head(ring); - - spin_unlock_bh(&sk->sk_receive_queue.lock); - return skb; - -err2: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - netlink_overrun(sk); -err1: - sock_put(sk); - return NULL; - -out_free: - kfree_skb(skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); -out_put: - sock_put(sk); -out: -#endif return alloc_skb(size, gfp_mask); } EXPORT_SYMBOL_GPL(netlink_alloc_skb); @@ -2126,8 +1449,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_NETLINK) return -ENOPROTOOPT; - if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && - optlen >= sizeof(int) && + if (optlen >= sizeof(int) && get_user(val, (unsigned int __user *)optval)) return -EFAULT; @@ -2180,25 +1502,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } err = 0; break; -#ifdef CONFIG_NETLINK_MMAP - case NETLINK_RX_RING: - case NETLINK_TX_RING: { - struct nl_mmap_req req; - - /* Rings might consume more memory than queue limits, require - * CAP_NET_ADMIN. - */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (optlen < sizeof(req)) - return -EINVAL; - if (copy_from_user(&req, optval, sizeof(req))) - return -EFAULT; - err = netlink_set_ring(sk, &req, - optname == NETLINK_TX_RING); - break; - } -#endif /* CONFIG_NETLINK_MMAP */ default: err = -ENOPROTOOPT; } @@ -2311,13 +1614,6 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } - if (netlink_tx_is_mmaped(sk) && - msg->msg_iov->iov_base == NULL) { - err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, - siocb); - goto out; - } - err = -EMSGSIZE; if (len > sk->sk_sndbuf - 32) goto out; @@ -2398,7 +1694,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, /* Record the max length of recvmsg() calls for future allocations */ nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - 16384); + SKB_WITH_OVERHEAD(32768)); copied = data_skb->len; if (len < copied) { @@ -2643,8 +1939,7 @@ static int netlink_dump(struct sock *sk) cb = &nlk->cb; alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (!netlink_rx_is_mmaped(sk) && - atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; /* NLMSG_GOODSIZE is small to avoid high order allocations being @@ -2656,9 +1951,8 @@ static int netlink_dump(struct sock *sk) skb = netlink_alloc_skb(sk, nlk->max_recvmsg_len, nlk->portid, - GFP_KERNEL | - __GFP_NOWARN | - __GFP_NORETRY); + (GFP_KERNEL & ~__GFP_WAIT) | + __GFP_NOWARN | __GFP_NORETRY); /* available room should be exact amount to avoid MSG_TRUNC */ if (skb) skb_reserve(skb, skb_tailroom(skb) - @@ -2666,7 +1960,7 @@ static int netlink_dump(struct sock *sk) } if (!skb) skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, - GFP_KERNEL); + (GFP_KERNEL & ~__GFP_WAIT)); if (!skb) goto errout_skb; netlink_skb_set_owner_r(skb, sk); @@ -2722,16 +2016,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_sock *nlk; int ret; - /* Memory mapped dump requests need to be copied to avoid looping - * on the pending state in netlink_mmap_sendmsg() while the CB hold - * a reference to the skb. - */ - if (netlink_skb_is_mmaped(skb)) { - skb = skb_copy(skb, GFP_KERNEL); - if (skb == NULL) - return -ENOBUFS; - } else - atomic_inc(&skb->users); + atomic_inc(&skb->users); sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { @@ -3072,7 +2357,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll = netlink_poll, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -3080,7 +2365,7 @@ static const struct proto_ops netlink_ops = { .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, - .mmap = netlink_mmap, + .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 3951874e715d..c6bd3dd35cb3 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -42,12 +42,6 @@ struct netlink_sock { int (*netlink_bind)(int group); void (*netlink_unbind)(int group); struct module *module; -#ifdef CONFIG_NETLINK_MMAP - struct mutex pg_vec_lock; - struct netlink_ring rx_ring; - struct netlink_ring tx_ring; - atomic_t mapped; -#endif /* CONFIG_NETLINK_MMAP */ struct rhash_head node; }; @@ -57,15 +51,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) return container_of(sk, struct netlink_sock, sk); } -static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ -#ifdef CONFIG_NETLINK_MMAP - return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -#else - return false; -#endif /* CONFIG_NETLINK_MMAP */ -} - struct netlink_table { struct rhashtable hash; struct hlist_head mc_list; diff --git a/net/netlink/diag.c b/net/netlink/diag.c index de8c74a3c061..dd5f2ffd13e6 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -8,41 +8,6 @@ #include "af_netlink.h" -#ifdef CONFIG_NETLINK_MMAP -static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, - struct sk_buff *nlskb) -{ - struct netlink_diag_ring ndr; - - ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; - ndr.ndr_block_nr = ring->pg_vec_len; - ndr.ndr_frame_size = ring->frame_size; - ndr.ndr_frame_nr = ring->frame_max + 1; - - return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); -} - -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - struct netlink_sock *nlk = nlk_sk(sk); - int ret; - - mutex_lock(&nlk->pg_vec_lock); - ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); - if (!ret) - ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, - nlskb); - mutex_unlock(&nlk->pg_vec_lock); - - return ret; -} -#else -static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) -{ - return 0; -} -#endif - static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) { struct netlink_sock *nlk = nlk_sk(sk); @@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; - if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && - sk_diag_put_rings_cfg(sk, skb)) - goto out_nlmsg_trim; - return nlmsg_end(skb, nlh); out_nlmsg_trim: diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index abffc1519e4d..86b3e2f70a27 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -240,7 +240,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - netdev_features_t features; + struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; @@ -248,9 +248,8 @@ static int packet_direct_xmit(struct sk_buff *skb) !netif_carrier_ok(dev))) goto drop; - features = netif_skb_features(skb); - if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) + skb = validate_xmit_skb_list(skb, dev); + if (skb != orig_skb) goto drop; txq = skb_get_tx_queue(dev, skb); @@ -270,7 +269,7 @@ static int packet_direct_xmit(struct sk_buff *skb) return ret; drop: atomic_long_inc(&dev->tx_dropped); - kfree_skb(skb); + kfree_skb_list(skb); return NET_XMIT_DROP; } @@ -2739,7 +2738,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[15]; + char name[sizeof(uaddr->sa_data) + 1]; /* * Check legality @@ -2747,7 +2746,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, if (addr_len != sizeof(struct sockaddr)) return -EINVAL; - strlcpy(name, uaddr->sa_data, sizeof(name)); + /* uaddr->sa_data comes from the userspace, it's not guaranteed to be + * zero-terminated. + */ + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); + name[sizeof(uaddr->sa_data)] = 0; return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } @@ -3552,6 +3555,7 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); + fanout_release(sk); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); @@ -3804,8 +3808,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) goto out; if (po->tp_version >= TPACKET_V3 && - (int)(req->tp_block_size - - BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + req->tp_block_size <= + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e6bb98e583fb..690a973b72b5 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3426,6 +3426,12 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + /* Report violation if chunk len overflows */ + ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + if (ch_end > skb_tail_pointer(skb)) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* Now that we know we at least have a chunk header, * do things that are type appropriate. */ @@ -3457,12 +3463,6 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } } - /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); - if (ch_end > skb_tail_pointer(skb)) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); - ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 72da7d58ccca..1e557ee4cd65 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4385,7 +4385,7 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len <= 0) + if (len == 0) return -EINVAL; if (len > sizeof(struct sctp_event_subscribe)) len = sizeof(struct sctp_event_subscribe); @@ -5981,6 +5981,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; + if (len < 0) + return -EINVAL; + lock_sock(sk); switch (optname) { @@ -6962,7 +6965,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); - BUG_ON(sk != asoc->base.sk); + if (sk != asoc->base.sk) + goto do_error; lock_sock(sk); *timeo_p = current_timeo; diff --git a/net/socket.c b/net/socket.c index 7f61789c78ff..e72371710fe3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2355,8 +2355,10 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, return err; err = sock_error(sock->sk); - if (err) + if (err) { + datagrams = err; goto out_put; + } entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 287087b10a7e..7950b4c26651 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -940,20 +940,32 @@ fail: return NULL; } -static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode, - struct path *res) +static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) { - int err; + struct dentry *dentry; + struct path path; + int err = 0; + /* + * Get the parent directory, calculate the hash for last + * component. + */ + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + return err; - err = security_path_mknod(path, dentry, mode, 0); + /* + * All right, let's create it. + */ + err = security_path_mknod(&path, dentry, mode, 0); if (!err) { - err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0); + err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); if (!err) { - res->mnt = mntget(path->mnt); + res->mnt = mntget(path.mnt); res->dentry = dget(dentry); } } - + done_path_create(&path, dentry); return err; } @@ -964,12 +976,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; - int err, name_err; + int err; unsigned int hash; struct unix_address *addr; struct hlist_head *list; - struct path path; - struct dentry *dentry; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) @@ -985,34 +995,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - name_err = 0; - dentry = NULL; - if (sun_path[0]) { - /* Get the parent directory, calculate the hash for last - * component. - */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - - if (IS_ERR(dentry)) { - /* delay report until after 'already bound' check */ - name_err = PTR_ERR(dentry); - dentry = NULL; - } - } - err = mutex_lock_interruptible(&u->readlock); if (err) - goto out_path; + goto out; err = -EINVAL; if (u->addr) goto out_up; - if (name_err) { - err = name_err == -EEXIST ? -EADDRINUSE : name_err; - goto out_up; - } - err = -ENOMEM; addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); if (!addr) @@ -1023,11 +1013,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr->hash = hash ^ sk->sk_type; atomic_set(&addr->refcnt, 1); - if (dentry) { - struct path u_path; + if (sun_path[0]) { + struct path path; umode_t mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(dentry, &path, mode, &u_path); + err = unix_mknod(sun_path, mode, &path); if (err) { if (err == -EEXIST) err = -EADDRINUSE; @@ -1035,9 +1025,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out_up; } addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1); spin_lock(&unix_table_lock); - u->path = u_path; + u->path = path; list = &unix_socket_table[hash]; } else { spin_lock(&unix_table_lock); @@ -1060,10 +1050,6 @@ out_unlock: spin_unlock(&unix_table_lock); out_up: mutex_unlock(&u->readlock); -out_path: - if (dentry) - done_path_create(&path, dentry); - out: return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4769382d9478..64faba06be38 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3004,6 +3004,11 @@ static int __net_init xfrm_net_init(struct net *net) { int rv; + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + rwlock_init(&net->xfrm.xfrm_policy_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + rv = xfrm_statistics_init(net); if (rv < 0) goto out_statistics; @@ -3020,11 +3025,6 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out; - /* Initialize the per-net locks here */ - spin_lock_init(&net->xfrm.xfrm_state_lock); - rwlock_init(&net->xfrm.xfrm_policy_lock); - mutex_init(&net->xfrm.xfrm_cfg_mutex); - return 0; out: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e812e988c111..b8170ae1461e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -386,7 +386,14 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) + return -EINVAL; + + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) return -EINVAL; return 0; diff --git a/samples/mic/mpssd/.gitignore b/samples/mic/mpssd/.gitignore new file mode 100644 index 000000000000..8b7c72f07c92 --- /dev/null +++ b/samples/mic/mpssd/.gitignore @@ -0,0 +1 @@ +mpssd diff --git a/samples/mic/mpssd/Makefile b/samples/mic/mpssd/Makefile new file mode 100644 index 000000000000..3e3ef91fed6b --- /dev/null +++ b/samples/mic/mpssd/Makefile @@ -0,0 +1,27 @@ +ifndef CROSS_COMPILE +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq ($(ARCH),x86) + +PROGS := mpssd +CC = $(CROSS_COMPILE)gcc +CFLAGS := -I../../../usr/include -I../../../tools/include + +ifdef DEBUG +CFLAGS += -DDEBUG=$(DEBUG) +endif + +all: $(PROGS) +mpssd: mpssd.c sysfs.c + $(CC) $(CFLAGS) mpssd.c sysfs.c -o mpssd -lpthread + +install: + install mpssd /usr/sbin/mpssd + install micctrl /usr/sbin/micctrl + +clean: + rm -fr $(PROGS) + +endif +endif diff --git a/samples/mic/mpssd/micctrl b/samples/mic/mpssd/micctrl new file mode 100644 index 000000000000..8f2629b41c5f --- /dev/null +++ b/samples/mic/mpssd/micctrl @@ -0,0 +1,173 @@ +#!/bin/bash +# Intel MIC Platform Software Stack (MPSS) +# +# Copyright(c) 2013 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Intel MIC User Space Tools. +# +# micctrl - Controls MIC boot/start/stop. +# +# chkconfig: 2345 95 05 +# description: start MPSS stack processing. +# +### BEGIN INIT INFO +# Provides: micctrl +### END INIT INFO + +# Source function library. +. /etc/init.d/functions + +sysfs="/sys/class/mic" + +_status() +{ + f=$sysfs/$1 + echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`" +} + +status() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _status $1 + return $? + fi + for f in $sysfs/* + do + _status `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_reset() +{ + f=$sysfs/$1 + echo reset > $f/state +} + +reset() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _reset $1 + return $? + fi + for f in $sysfs/* + do + _reset `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_boot() +{ + f=$sysfs/$1 + echo "linux" > $f/bootmode + echo "mic/uos.img" > $f/firmware + echo "mic/$1.image" > $f/ramdisk + echo "boot" > $f/state +} + +boot() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _boot $1 + return $? + fi + for f in $sysfs/* + do + _boot `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_shutdown() +{ + f=$sysfs/$1 + echo shutdown > $f/state +} + +shutdown() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _shutdown $1 + return $? + fi + for f in $sysfs/* + do + _shutdown `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +_wait() +{ + f=$sysfs/$1 + while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ] + do + sleep 1 + echo -e "Waiting for $1 to go offline" + done +} + +wait() +{ + if [ "`echo $1 | head -c3`" == "mic" ]; then + _wait $1 + return $? + fi + # Wait for the cards to go offline + for f in $sysfs/* + do + _wait `basename $f` + RETVAL=$? + [ $RETVAL -ne 0 ] && return $RETVAL + done + return 0 +} + +if [ ! -d "$sysfs" ]; then + echo -e $"Module unloaded " + exit 3 +fi + +case $1 in + -s) + status $2 + ;; + -r) + reset $2 + ;; + -b) + boot $2 + ;; + -S) + shutdown $2 + ;; + -w) + wait $2 + ;; + *) + echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}" + exit 2 +esac + +exit $? diff --git a/samples/mic/mpssd/mpss b/samples/mic/mpssd/mpss new file mode 100644 index 000000000000..cacbdb0aefb9 --- /dev/null +++ b/samples/mic/mpssd/mpss @@ -0,0 +1,202 @@ +#!/bin/bash +# Intel MIC Platform Software Stack (MPSS) +# +# Copyright(c) 2013 Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Intel MIC User Space Tools. +# +# mpss Start mpssd. +# +# chkconfig: 2345 95 05 +# description: start MPSS stack processing. +# +### BEGIN INIT INFO +# Provides: mpss +# Required-Start: +# Required-Stop: +# Short-Description: MPSS stack control +# Description: MPSS stack control +### END INIT INFO + +# Source function library. +. /etc/init.d/functions + +exec=/usr/sbin/mpssd +sysfs="/sys/class/mic" + +start() +{ + [ -x $exec ] || exit 5 + + if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then + echo -e $"MPSSD already running! " + success + echo + return 0 + fi + + echo -e $"Starting MPSS Stack" + echo -e $"Loading MIC_X100_DMA & MIC_HOST Modules" + + for f in "mic_host" "mic_x100_dma" + do + modprobe $f + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + failure + echo + return $RETVAL + fi + done + + # Start the daemon + echo -n $"Starting MPSSD " + $exec + RETVAL=$? + if [ $RETVAL -ne 0 ]; then + failure + echo + return $RETVAL + fi + success + echo + + sleep 5 + + # Boot the cards + micctrl -b + + # Wait till ping works + for f in $sysfs/* + do + count=100 + ipaddr=`cat $f/cmdline` + ipaddr=${ipaddr#*address,} + ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1` + while [ $count -ge 0 ] + do + echo -e "Pinging "`basename $f`" " + ping -c 1 $ipaddr &> /dev/null + RETVAL=$? + if [ $RETVAL -eq 0 ]; then + success + break + fi + sleep 1 + count=`expr $count - 1` + done + [ $RETVAL -ne 0 ] && failure || success + echo + done + return $RETVAL +} + +stop() +{ + echo -e $"Shutting down MPSS Stack: " + + # Bail out if module is unloaded + if [ ! -d "$sysfs" ]; then + echo -n $"Module unloaded " + success + echo + return 0 + fi + + # Shut down the cards. + micctrl -S + + # Wait for the cards to go offline + for f in $sysfs/* + do + while [ "`cat $f/state`" != "offline" ] + do + sleep 1 + echo -e "Waiting for "`basename $f`" to go offline" + done + done + + # Display the status of the cards + micctrl -s + + # Kill MPSSD now + echo -n $"Killing MPSSD" + killall -9 mpssd 2>/dev/null + RETVAL=$? + [ $RETVAL -ne 0 ] && failure || success + echo + return $RETVAL +} + +restart() +{ + stop + sleep 5 + start +} + +status() +{ + micctrl -s + if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then + echo "mpssd is running" + else + echo "mpssd is stopped" + fi + return 0 +} + +unload() +{ + if [ ! -d "$sysfs" ]; then + echo -n $"No MIC_HOST Module: " + success + echo + return + fi + + stop + + sleep 5 + echo -n $"Removing MIC_HOST & MIC_X100_DMA Modules: " + modprobe -r mic_host mic_x100_dma + RETVAL=$? + [ $RETVAL -ne 0 ] && failure || success + echo + return $RETVAL +} + +case $1 in + start) + start + ;; + stop) + stop + ;; + restart) + restart + ;; + status) + status + ;; + unload) + unload + ;; + *) + echo $"Usage: $0 {start|stop|restart|status|unload}" + exit 2 +esac + +exit $? diff --git a/samples/mic/mpssd/mpssd.c b/samples/mic/mpssd/mpssd.c new file mode 100644 index 000000000000..3c5c379fc29d --- /dev/null +++ b/samples/mic/mpssd/mpssd.c @@ -0,0 +1,1728 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC User Space Tools. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <fcntl.h> +#include <getopt.h> +#include <assert.h> +#include <unistd.h> +#include <stdbool.h> +#include <signal.h> +#include <poll.h> +#include <features.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/virtio_ring.h> +#include <linux/virtio_net.h> +#include <linux/virtio_console.h> +#include <linux/virtio_blk.h> +#include <linux/version.h> +#include "mpssd.h" +#include <linux/mic_ioctl.h> +#include <linux/mic_common.h> +#include <tools/endian.h> + +static void init_mic(struct mic_info *mic); + +static FILE *logfp; +static struct mic_info mic_list; + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1 : __min2; }) + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) +#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr, size) _ALIGN_UP(addr, size) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#define GSO_ENABLED 1 +#define MAX_GSO_SIZE (64 * 1024) +#define ETH_H_LEN 14 +#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64)) +#define MIC_DEVICE_PAGE_END 0x1000 + +#ifndef VIRTIO_NET_HDR_F_DATA_VALID +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ +#endif + +static struct { + struct mic_device_desc dd; + struct mic_vqconfig vqconfig[2]; + __u32 host_features, guest_acknowledgements; + struct virtio_console_config cons_config; +} virtcons_dev_page = { + .dd = { + .type = VIRTIO_ID_CONSOLE, + .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig), + .feature_len = sizeof(virtcons_dev_page.host_features), + .config_len = sizeof(virtcons_dev_page.cons_config), + }, + .vqconfig[0] = { + .num = htole16(MIC_VRING_ENTRIES), + }, + .vqconfig[1] = { + .num = htole16(MIC_VRING_ENTRIES), + }, +}; + +static struct { + struct mic_device_desc dd; + struct mic_vqconfig vqconfig[2]; + __u32 host_features, guest_acknowledgements; + struct virtio_net_config net_config; +} virtnet_dev_page = { + .dd = { + .type = VIRTIO_ID_NET, + .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig), + .feature_len = sizeof(virtnet_dev_page.host_features), + .config_len = sizeof(virtnet_dev_page.net_config), + }, + .vqconfig[0] = { + .num = htole16(MIC_VRING_ENTRIES), + }, + .vqconfig[1] = { + .num = htole16(MIC_VRING_ENTRIES), + }, +#if GSO_ENABLED + .host_features = htole32( + 1 << VIRTIO_NET_F_CSUM | + 1 << VIRTIO_NET_F_GSO | + 1 << VIRTIO_NET_F_GUEST_TSO4 | + 1 << VIRTIO_NET_F_GUEST_TSO6 | + 1 << VIRTIO_NET_F_GUEST_ECN | + 1 << VIRTIO_NET_F_GUEST_UFO), +#else + .host_features = 0, +#endif +}; + +static const char *mic_config_dir = "/etc/sysconfig/mic"; +static const char *virtblk_backend = "VIRTBLK_BACKEND"; +static struct { + struct mic_device_desc dd; + struct mic_vqconfig vqconfig[1]; + __u32 host_features, guest_acknowledgements; + struct virtio_blk_config blk_config; +} virtblk_dev_page = { + .dd = { + .type = VIRTIO_ID_BLOCK, + .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig), + .feature_len = sizeof(virtblk_dev_page.host_features), + .config_len = sizeof(virtblk_dev_page.blk_config), + }, + .vqconfig[0] = { + .num = htole16(MIC_VRING_ENTRIES), + }, + .host_features = + htole32(1<<VIRTIO_BLK_F_SEG_MAX), + .blk_config = { + .seg_max = htole32(MIC_VRING_ENTRIES - 2), + .capacity = htole64(0), + } +}; + +static char *myname; + +static int +tap_configure(struct mic_info *mic, char *dev) +{ + pid_t pid; + char *ifargv[7]; + char ipaddr[IFNAMSIZ]; + int ret = 0; + + pid = fork(); + if (pid == 0) { + ifargv[0] = "ip"; + ifargv[1] = "link"; + ifargv[2] = "set"; + ifargv[3] = dev; + ifargv[4] = "up"; + ifargv[5] = NULL; + mpsslog("Configuring %s\n", dev); + ret = execvp("ip", ifargv); + if (ret < 0) { + mpsslog("%s execvp failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + } + if (pid < 0) { + mpsslog("%s fork failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + + ret = waitpid(pid, NULL, 0); + if (ret < 0) { + mpsslog("%s waitpid failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + + snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id); + + pid = fork(); + if (pid == 0) { + ifargv[0] = "ip"; + ifargv[1] = "addr"; + ifargv[2] = "add"; + ifargv[3] = ipaddr; + ifargv[4] = "dev"; + ifargv[5] = dev; + ifargv[6] = NULL; + mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr); + ret = execvp("ip", ifargv); + if (ret < 0) { + mpsslog("%s execvp failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + } + if (pid < 0) { + mpsslog("%s fork failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + + ret = waitpid(pid, NULL, 0); + if (ret < 0) { + mpsslog("%s waitpid failed errno %s\n", + mic->name, strerror(errno)); + return ret; + } + mpsslog("MIC name %s %s %d DONE!\n", + mic->name, __func__, __LINE__); + return 0; +} + +static int tun_alloc(struct mic_info *mic, char *dev) +{ + struct ifreq ifr; + int fd, err; +#if GSO_ENABLED + unsigned offload; +#endif + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + mpsslog("Could not open /dev/net/tun %s\n", strerror(errno)); + goto done; + } + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + if (*dev) + strncpy(ifr.ifr_name, dev, IFNAMSIZ); + + err = ioctl(fd, TUNSETIFF, (void *)&ifr); + if (err < 0) { + mpsslog("%s %s %d TUNSETIFF failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + close(fd); + return err; + } +#if GSO_ENABLED + offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | + TUN_F_TSO_ECN | TUN_F_UFO; + + err = ioctl(fd, TUNSETOFFLOAD, offload); + if (err < 0) { + mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n", + mic->name, __func__, __LINE__, strerror(errno)); + close(fd); + return err; + } +#endif + strcpy(dev, ifr.ifr_name); + mpsslog("Created TAP %s\n", dev); +done: + return fd; +} + +#define NET_FD_VIRTIO_NET 0 +#define NET_FD_TUN 1 +#define MAX_NET_FD 2 + +static void set_dp(struct mic_info *mic, int type, void *dp) +{ + switch (type) { + case VIRTIO_ID_CONSOLE: + mic->mic_console.console_dp = dp; + return; + case VIRTIO_ID_NET: + mic->mic_net.net_dp = dp; + return; + case VIRTIO_ID_BLOCK: + mic->mic_virtblk.block_dp = dp; + return; + } + mpsslog("%s %s %d not found\n", mic->name, __func__, type); + assert(0); +} + +static void *get_dp(struct mic_info *mic, int type) +{ + switch (type) { + case VIRTIO_ID_CONSOLE: + return mic->mic_console.console_dp; + case VIRTIO_ID_NET: + return mic->mic_net.net_dp; + case VIRTIO_ID_BLOCK: + return mic->mic_virtblk.block_dp; + } + mpsslog("%s %s %d not found\n", mic->name, __func__, type); + assert(0); + return NULL; +} + +static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type) +{ + struct mic_device_desc *d; + int i; + void *dp = get_dp(mic, type); + + for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE; + i += mic_total_desc_size(d)) { + d = dp + i; + + /* End of list */ + if (d->type == 0) + break; + + if (d->type == -1) + continue; + + mpsslog("%s %s d-> type %d d %p\n", + mic->name, __func__, d->type, d); + + if (d->type == (__u8)type) + return d; + } + mpsslog("%s %s %d not found\n", mic->name, __func__, type); + assert(0); + return NULL; +} + +/* See comments in vhost.c for explanation of next_desc() */ +static unsigned next_desc(struct vring_desc *desc) +{ + unsigned int next; + + if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) + return -1U; + next = le16toh(desc->next); + return next; +} + +/* Sum up all the IOVEC length */ +static ssize_t +sum_iovec_len(struct mic_copy_desc *copy) +{ + ssize_t sum = 0; + int i; + + for (i = 0; i < copy->iovcnt; i++) + sum += copy->iov[i].iov_len; + return sum; +} + +static inline void verify_out_len(struct mic_info *mic, + struct mic_copy_desc *copy) +{ + if (copy->out_len != sum_iovec_len(copy)) { + mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n", + mic->name, __func__, __LINE__, + copy->out_len, sum_iovec_len(copy)); + assert(copy->out_len == sum_iovec_len(copy)); + } +} + +/* Display an iovec */ +static void +disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy, + const char *s, int line) +{ + int i; + + for (i = 0; i < copy->iovcnt; i++) + mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n", + mic->name, s, line, i, + copy->iov[i].iov_base, copy->iov[i].iov_len); +} + +static inline __u16 read_avail_idx(struct mic_vring *vr) +{ + return ACCESS_ONCE(vr->info->avail_idx); +} + +static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr, + struct mic_copy_desc *copy, ssize_t len) +{ + copy->vr_idx = tx ? 0 : 1; + copy->update_used = true; + if (type == VIRTIO_ID_NET) + copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr); + else + copy->iov[0].iov_len = len; +} + +/* Central API which triggers the copies */ +static int +mic_virtio_copy(struct mic_info *mic, int fd, + struct mic_vring *vr, struct mic_copy_desc *copy) +{ + int ret; + + ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy); + if (ret) { + mpsslog("%s %s %d errno %s ret %d\n", + mic->name, __func__, __LINE__, + strerror(errno), ret); + } + return ret; +} + +/* + * This initialization routine requires at least one + * vring i.e. vr0. vr1 is optional. + */ +static void * +init_vr(struct mic_info *mic, int fd, int type, + struct mic_vring *vr0, struct mic_vring *vr1, int num_vq) +{ + int vr_size; + char *va; + + vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES, + MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info)); + va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq, + PROT_READ, MAP_SHARED, fd, 0); + if (MAP_FAILED == va) { + mpsslog("%s %s %d mmap failed errno %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + goto done; + } + set_dp(mic, type, va); + vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END]; + vr0->info = vr0->va + + vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN); + vring_init(&vr0->vr, + MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN); + mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ", + __func__, mic->name, vr0->va, vr0->info, vr_size, + vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); + mpsslog("magic 0x%x expected 0x%x\n", + le32toh(vr0->info->magic), MIC_MAGIC + type); + assert(le32toh(vr0->info->magic) == MIC_MAGIC + type); + if (vr1) { + vr1->va = (struct mic_vring *) + &va[MIC_DEVICE_PAGE_END + vr_size]; + vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES, + MIC_VIRTIO_RING_ALIGN); + vring_init(&vr1->vr, + MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN); + mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ", + __func__, mic->name, vr1->va, vr1->info, vr_size, + vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN)); + mpsslog("magic 0x%x expected 0x%x\n", + le32toh(vr1->info->magic), MIC_MAGIC + type + 1); + assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1); + } +done: + return va; +} + +static void +wait_for_card_driver(struct mic_info *mic, int fd, int type) +{ + struct pollfd pollfd; + int err; + struct mic_device_desc *desc = get_device_desc(mic, type); + + pollfd.fd = fd; + mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n", + mic->name, __func__, type, desc->status); + while (1) { + pollfd.events = POLLIN; + pollfd.revents = 0; + err = poll(&pollfd, 1, -1); + if (err < 0) { + mpsslog("%s %s poll failed %s\n", + mic->name, __func__, strerror(errno)); + continue; + } + + if (pollfd.revents) { + mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n", + mic->name, __func__, type, desc->status); + if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { + mpsslog("%s %s poll.revents %d\n", + mic->name, __func__, pollfd.revents); + mpsslog("%s %s desc-> type %d status 0x%x\n", + mic->name, __func__, type, + desc->status); + break; + } + } + } +} + +/* Spin till we have some descriptors */ +static void +spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr) +{ + __u16 avail_idx = read_avail_idx(vr); + + while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) { +#ifdef DEBUG + mpsslog("%s %s waiting for desc avail %d info_avail %d\n", + mic->name, __func__, + le16toh(vr->vr.avail->idx), vr->info->avail_idx); +#endif + sched_yield(); + } +} + +static void * +virtio_net(void *arg) +{ + static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)]; + static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64))); + struct iovec vnet_iov[2][2] = { + { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) }, + { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } }, + { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) }, + { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } }, + }; + struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1]; + struct mic_info *mic = (struct mic_info *)arg; + char if_name[IFNAMSIZ]; + struct pollfd net_poll[MAX_NET_FD]; + struct mic_vring tx_vr, rx_vr; + struct mic_copy_desc copy; + struct mic_device_desc *desc; + int err; + + snprintf(if_name, IFNAMSIZ, "mic%d", mic->id); + mic->mic_net.tap_fd = tun_alloc(mic, if_name); + if (mic->mic_net.tap_fd < 0) + goto done; + + if (tap_configure(mic, if_name)) + goto done; + mpsslog("MIC name %s id %d\n", mic->name, mic->id); + + net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd; + net_poll[NET_FD_VIRTIO_NET].events = POLLIN; + net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd; + net_poll[NET_FD_TUN].events = POLLIN; + + if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd, + VIRTIO_ID_NET, &tx_vr, &rx_vr, + virtnet_dev_page.dd.num_vq)) { + mpsslog("%s init_vr failed %s\n", + mic->name, strerror(errno)); + goto done; + } + + copy.iovcnt = 2; + desc = get_device_desc(mic, VIRTIO_ID_NET); + + while (1) { + ssize_t len; + + net_poll[NET_FD_VIRTIO_NET].revents = 0; + net_poll[NET_FD_TUN].revents = 0; + + /* Start polling for data from tap and virtio net */ + err = poll(net_poll, 2, -1); + if (err < 0) { + mpsslog("%s poll failed %s\n", + __func__, strerror(errno)); + continue; + } + if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) + wait_for_card_driver(mic, mic->mic_net.virtio_net_fd, + VIRTIO_ID_NET); + /* + * Check if there is data to be read from TUN and write to + * virtio net fd if there is. + */ + if (net_poll[NET_FD_TUN].revents & POLLIN) { + copy.iov = iov0; + len = readv(net_poll[NET_FD_TUN].fd, + copy.iov, copy.iovcnt); + if (len > 0) { + struct virtio_net_hdr *hdr + = (struct virtio_net_hdr *)vnet_hdr[0]; + + /* Disable checksums on the card since we are on + a reliable PCIe link */ + hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; +#ifdef DEBUG + mpsslog("%s %s %d hdr->flags 0x%x ", mic->name, + __func__, __LINE__, hdr->flags); + mpsslog("copy.out_len %d hdr->gso_type 0x%x\n", + copy.out_len, hdr->gso_type); +#endif +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d read from tap 0x%lx\n", + mic->name, __func__, __LINE__, + len); +#endif + spin_for_descriptors(mic, &tx_vr); + txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©, + len); + + err = mic_virtio_copy(mic, + mic->mic_net.virtio_net_fd, &tx_vr, + ©); + if (err < 0) { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + } + if (!err) + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d wrote to net 0x%lx\n", + mic->name, __func__, __LINE__, + sum_iovec_len(©)); +#endif + /* Reinitialize IOV for next run */ + iov0[1].iov_len = MAX_NET_PKT_SIZE; + } else if (len < 0) { + disp_iovec(mic, ©, __func__, __LINE__); + mpsslog("%s %s %d read failed %s ", mic->name, + __func__, __LINE__, strerror(errno)); + mpsslog("cnt %d sum %zd\n", + copy.iovcnt, sum_iovec_len(©)); + } + } + + /* + * Check if there is data to be read from virtio net and + * write to TUN if there is. + */ + if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) { + while (rx_vr.info->avail_idx != + le16toh(rx_vr.vr.avail->idx)) { + copy.iov = iov1; + txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©, + MAX_NET_PKT_SIZE + + sizeof(struct virtio_net_hdr)); + + err = mic_virtio_copy(mic, + mic->mic_net.virtio_net_fd, &rx_vr, + ©); + if (!err) { +#ifdef DEBUG + struct virtio_net_hdr *hdr + = (struct virtio_net_hdr *) + vnet_hdr[1]; + + mpsslog("%s %s %d hdr->flags 0x%x, ", + mic->name, __func__, __LINE__, + hdr->flags); + mpsslog("out_len %d gso_type 0x%x\n", + copy.out_len, + hdr->gso_type); +#endif + /* Set the correct output iov_len */ + iov1[1].iov_len = copy.out_len - + sizeof(struct virtio_net_hdr); + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, __LINE__); + mpsslog("read from net 0x%lx\n", + sum_iovec_len(copy)); +#endif + len = writev(net_poll[NET_FD_TUN].fd, + copy.iov, copy.iovcnt); + if (len != sum_iovec_len(©)) { + mpsslog("Tun write failed %s ", + strerror(errno)); + mpsslog("len 0x%zx ", len); + mpsslog("read_len 0x%zx\n", + sum_iovec_len(©)); + } else { +#ifdef DEBUG + disp_iovec(mic, ©, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, + __LINE__); + mpsslog("wrote to tap 0x%lx\n", + len); +#endif + } + } else { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + break; + } + } + } + if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR) + mpsslog("%s: %s: POLLERR\n", __func__, mic->name); + } +done: + pthread_exit(NULL); +} + +/* virtio_console */ +#define VIRTIO_CONSOLE_FD 0 +#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1) +#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */ +#define MAX_BUFFER_SIZE PAGE_SIZE + +static void * +virtio_console(void *arg) +{ + static __u8 vcons_buf[2][PAGE_SIZE]; + struct iovec vcons_iov[2] = { + { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) }, + { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) }, + }; + struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1]; + struct mic_info *mic = (struct mic_info *)arg; + int err; + struct pollfd console_poll[MAX_CONSOLE_FD]; + int pty_fd; + char *pts_name; + ssize_t len; + struct mic_vring tx_vr, rx_vr; + struct mic_copy_desc copy; + struct mic_device_desc *desc; + + pty_fd = posix_openpt(O_RDWR); + if (pty_fd < 0) { + mpsslog("can't open a pseudoterminal master device: %s\n", + strerror(errno)); + goto _return; + } + pts_name = ptsname(pty_fd); + if (pts_name == NULL) { + mpsslog("can't get pts name\n"); + goto _close_pty; + } + printf("%s console message goes to %s\n", mic->name, pts_name); + mpsslog("%s console message goes to %s\n", mic->name, pts_name); + err = grantpt(pty_fd); + if (err < 0) { + mpsslog("can't grant access: %s %s\n", + pts_name, strerror(errno)); + goto _close_pty; + } + err = unlockpt(pty_fd); + if (err < 0) { + mpsslog("can't unlock a pseudoterminal: %s %s\n", + pts_name, strerror(errno)); + goto _close_pty; + } + console_poll[MONITOR_FD].fd = pty_fd; + console_poll[MONITOR_FD].events = POLLIN; + + console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd; + console_poll[VIRTIO_CONSOLE_FD].events = POLLIN; + + if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd, + VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr, + virtcons_dev_page.dd.num_vq)) { + mpsslog("%s init_vr failed %s\n", + mic->name, strerror(errno)); + goto _close_pty; + } + + copy.iovcnt = 1; + desc = get_device_desc(mic, VIRTIO_ID_CONSOLE); + + for (;;) { + console_poll[MONITOR_FD].revents = 0; + console_poll[VIRTIO_CONSOLE_FD].revents = 0; + err = poll(console_poll, MAX_CONSOLE_FD, -1); + if (err < 0) { + mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__, + strerror(errno)); + continue; + } + if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) + wait_for_card_driver(mic, + mic->mic_console.virtio_console_fd, + VIRTIO_ID_CONSOLE); + + if (console_poll[MONITOR_FD].revents & POLLIN) { + copy.iov = iov0; + len = readv(pty_fd, copy.iov, copy.iovcnt); + if (len > 0) { +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d read from tap 0x%lx\n", + mic->name, __func__, __LINE__, + len); +#endif + spin_for_descriptors(mic, &tx_vr); + txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr, + ©, len); + + err = mic_virtio_copy(mic, + mic->mic_console.virtio_console_fd, + &tx_vr, ©); + if (err < 0) { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + } + if (!err) + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, __LINE__); + mpsslog("%s %s %d wrote to net 0x%lx\n", + mic->name, __func__, __LINE__, + sum_iovec_len(copy)); +#endif + /* Reinitialize IOV for next run */ + iov0->iov_len = PAGE_SIZE; + } else if (len < 0) { + disp_iovec(mic, ©, __func__, __LINE__); + mpsslog("%s %s %d read failed %s ", + mic->name, __func__, __LINE__, + strerror(errno)); + mpsslog("cnt %d sum %zd\n", + copy.iovcnt, sum_iovec_len(©)); + } + } + + if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) { + while (rx_vr.info->avail_idx != + le16toh(rx_vr.vr.avail->idx)) { + copy.iov = iov1; + txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr, + ©, PAGE_SIZE); + + err = mic_virtio_copy(mic, + mic->mic_console.virtio_console_fd, + &rx_vr, ©); + if (!err) { + /* Set the correct output iov_len */ + iov1->iov_len = copy.out_len; + verify_out_len(mic, ©); +#ifdef DEBUG + disp_iovec(mic, copy, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, __LINE__); + mpsslog("read from net 0x%lx\n", + sum_iovec_len(copy)); +#endif + len = writev(pty_fd, + copy.iov, copy.iovcnt); + if (len != sum_iovec_len(©)) { + mpsslog("Tun write failed %s ", + strerror(errno)); + mpsslog("len 0x%zx ", len); + mpsslog("read_len 0x%zx\n", + sum_iovec_len(©)); + } else { +#ifdef DEBUG + disp_iovec(mic, copy, __func__, + __LINE__); + mpsslog("%s %s %d ", + mic->name, __func__, + __LINE__); + mpsslog("wrote to tap 0x%lx\n", + len); +#endif + } + } else { + mpsslog("%s %s %d mic_virtio_copy %s\n", + mic->name, __func__, __LINE__, + strerror(errno)); + break; + } + } + } + if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR) + mpsslog("%s: %s: POLLERR\n", __func__, mic->name); + } +_close_pty: + close(pty_fd); +_return: + pthread_exit(NULL); +} + +static void +add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd) +{ + char path[PATH_MAX]; + int fd, err; + + snprintf(path, PATH_MAX, "/dev/mic%d", mic->id); + fd = open(path, O_RDWR); + if (fd < 0) { + mpsslog("Could not open %s %s\n", path, strerror(errno)); + return; + } + + err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd); + if (err < 0) { + mpsslog("Could not add %d %s\n", dd->type, strerror(errno)); + close(fd); + return; + } + switch (dd->type) { + case VIRTIO_ID_NET: + mic->mic_net.virtio_net_fd = fd; + mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name); + break; + case VIRTIO_ID_CONSOLE: + mic->mic_console.virtio_console_fd = fd; + mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name); + break; + case VIRTIO_ID_BLOCK: + mic->mic_virtblk.virtio_block_fd = fd; + mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name); + break; + } +} + +static bool +set_backend_file(struct mic_info *mic) +{ + FILE *config; + char buff[PATH_MAX], *line, *evv, *p; + + snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id); + config = fopen(buff, "r"); + if (config == NULL) + return false; + do { /* look for "virtblk_backend=XXXX" */ + line = fgets(buff, PATH_MAX, config); + if (line == NULL) + break; + if (*line == '#') + continue; + p = strchr(line, '\n'); + if (p) + *p = '\0'; + } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0); + fclose(config); + if (line == NULL) + return false; + evv = strchr(line, '='); + if (evv == NULL) + return false; + mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1); + if (mic->mic_virtblk.backend_file == NULL) { + mpsslog("%s %d can't allocate memory\n", mic->name, mic->id); + return false; + } + strcpy(mic->mic_virtblk.backend_file, evv + 1); + return true; +} + +#define SECTOR_SIZE 512 +static bool +set_backend_size(struct mic_info *mic) +{ + mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0, + SEEK_END); + if (mic->mic_virtblk.backend_size < 0) { + mpsslog("%s: can't seek: %s\n", + mic->name, mic->mic_virtblk.backend_file); + return false; + } + virtblk_dev_page.blk_config.capacity = + mic->mic_virtblk.backend_size / SECTOR_SIZE; + if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0) + virtblk_dev_page.blk_config.capacity++; + + virtblk_dev_page.blk_config.capacity = + htole64(virtblk_dev_page.blk_config.capacity); + + return true; +} + +static bool +open_backend(struct mic_info *mic) +{ + if (!set_backend_file(mic)) + goto _error_exit; + mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR); + if (mic->mic_virtblk.backend < 0) { + mpsslog("%s: can't open: %s\n", mic->name, + mic->mic_virtblk.backend_file); + goto _error_free; + } + if (!set_backend_size(mic)) + goto _error_close; + mic->mic_virtblk.backend_addr = mmap(NULL, + mic->mic_virtblk.backend_size, + PROT_READ|PROT_WRITE, MAP_SHARED, + mic->mic_virtblk.backend, 0L); + if (mic->mic_virtblk.backend_addr == MAP_FAILED) { + mpsslog("%s: can't map: %s %s\n", + mic->name, mic->mic_virtblk.backend_file, + strerror(errno)); + goto _error_close; + } + return true; + + _error_close: + close(mic->mic_virtblk.backend); + _error_free: + free(mic->mic_virtblk.backend_file); + _error_exit: + return false; +} + +static void +close_backend(struct mic_info *mic) +{ + munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size); + close(mic->mic_virtblk.backend); + free(mic->mic_virtblk.backend_file); +} + +static bool +start_virtblk(struct mic_info *mic, struct mic_vring *vring) +{ + if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) { + mpsslog("%s: blk_config is not 8 byte aligned.\n", + mic->name); + return false; + } + add_virtio_device(mic, &virtblk_dev_page.dd); + if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd, + VIRTIO_ID_BLOCK, vring, NULL, + virtblk_dev_page.dd.num_vq)) { + mpsslog("%s init_vr failed %s\n", + mic->name, strerror(errno)); + return false; + } + return true; +} + +static void +stop_virtblk(struct mic_info *mic) +{ + int vr_size, ret; + + vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES, + MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info)); + ret = munmap(mic->mic_virtblk.block_dp, + MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq); + if (ret < 0) + mpsslog("%s munmap errno %d\n", mic->name, errno); + close(mic->mic_virtblk.virtio_block_fd); +} + +static __u8 +header_error_check(struct vring_desc *desc) +{ + if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) { + mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n", + __func__, __LINE__); + return -EIO; + } + if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) { + mpsslog("%s() %d: alone\n", + __func__, __LINE__); + return -EIO; + } + if (le16toh(desc->flags) & VRING_DESC_F_WRITE) { + mpsslog("%s() %d: not read\n", + __func__, __LINE__); + return -EIO; + } + return 0; +} + +static int +read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx) +{ + struct iovec iovec; + struct mic_copy_desc copy; + + iovec.iov_len = sizeof(*hdr); + iovec.iov_base = hdr; + copy.iov = &iovec; + copy.iovcnt = 1; + copy.vr_idx = 0; /* only one vring on virtio_block */ + copy.update_used = false; /* do not update used index */ + return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); +} + +static int +transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt) +{ + struct mic_copy_desc copy; + + copy.iov = iovec; + copy.iovcnt = iovcnt; + copy.vr_idx = 0; /* only one vring on virtio_block */ + copy.update_used = false; /* do not update used index */ + return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); +} + +static __u8 +status_error_check(struct vring_desc *desc) +{ + if (le32toh(desc->len) != sizeof(__u8)) { + mpsslog("%s() %d: length is not sizeof(status)\n", + __func__, __LINE__); + return -EIO; + } + return 0; +} + +static int +write_status(int fd, __u8 *status) +{ + struct iovec iovec; + struct mic_copy_desc copy; + + iovec.iov_base = status; + iovec.iov_len = sizeof(*status); + copy.iov = &iovec; + copy.iovcnt = 1; + copy.vr_idx = 0; /* only one vring on virtio_block */ + copy.update_used = true; /* Update used index */ + return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©); +} + +static void * +virtio_block(void *arg) +{ + struct mic_info *mic = (struct mic_info *)arg; + int ret; + struct pollfd block_poll; + struct mic_vring vring; + __u16 avail_idx; + __u32 desc_idx; + struct vring_desc *desc; + struct iovec *iovec, *piov; + __u8 status; + __u32 buffer_desc_idx; + struct virtio_blk_outhdr hdr; + void *fos; + + for (;;) { /* forever */ + if (!open_backend(mic)) { /* No virtblk */ + for (mic->mic_virtblk.signaled = 0; + !mic->mic_virtblk.signaled;) + sleep(1); + continue; + } + + /* backend file is specified. */ + if (!start_virtblk(mic, &vring)) + goto _close_backend; + iovec = malloc(sizeof(*iovec) * + le32toh(virtblk_dev_page.blk_config.seg_max)); + if (!iovec) { + mpsslog("%s: can't alloc iovec: %s\n", + mic->name, strerror(ENOMEM)); + goto _stop_virtblk; + } + + block_poll.fd = mic->mic_virtblk.virtio_block_fd; + block_poll.events = POLLIN; + for (mic->mic_virtblk.signaled = 0; + !mic->mic_virtblk.signaled;) { + block_poll.revents = 0; + /* timeout in 1 sec to see signaled */ + ret = poll(&block_poll, 1, 1000); + if (ret < 0) { + mpsslog("%s %d: poll failed: %s\n", + __func__, __LINE__, + strerror(errno)); + continue; + } + + if (!(block_poll.revents & POLLIN)) { +#ifdef DEBUG + mpsslog("%s %d: block_poll.revents=0x%x\n", + __func__, __LINE__, block_poll.revents); +#endif + continue; + } + + /* POLLIN */ + while (vring.info->avail_idx != + le16toh(vring.vr.avail->idx)) { + /* read header element */ + avail_idx = + vring.info->avail_idx & + (vring.vr.num - 1); + desc_idx = le16toh( + vring.vr.avail->ring[avail_idx]); + desc = &vring.vr.desc[desc_idx]; +#ifdef DEBUG + mpsslog("%s() %d: avail_idx=%d ", + __func__, __LINE__, + vring.info->avail_idx); + mpsslog("vring.vr.num=%d desc=%p\n", + vring.vr.num, desc); +#endif + status = header_error_check(desc); + ret = read_header( + mic->mic_virtblk.virtio_block_fd, + &hdr, desc_idx); + if (ret < 0) { + mpsslog("%s() %d %s: ret=%d %s\n", + __func__, __LINE__, + mic->name, ret, + strerror(errno)); + break; + } + /* buffer element */ + piov = iovec; + status = 0; + fos = mic->mic_virtblk.backend_addr + + (hdr.sector * SECTOR_SIZE); + buffer_desc_idx = next_desc(desc); + desc_idx = buffer_desc_idx; + for (desc = &vring.vr.desc[buffer_desc_idx]; + desc->flags & VRING_DESC_F_NEXT; + desc_idx = next_desc(desc), + desc = &vring.vr.desc[desc_idx]) { + piov->iov_len = desc->len; + piov->iov_base = fos; + piov++; + fos += desc->len; + } + /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */ + if (hdr.type & ~(VIRTIO_BLK_T_OUT | + VIRTIO_BLK_T_GET_ID)) { + /* + VIRTIO_BLK_T_IN - does not do + anything. Probably for documenting. + VIRTIO_BLK_T_SCSI_CMD - for + virtio_scsi. + VIRTIO_BLK_T_FLUSH - turned off in + config space. + VIRTIO_BLK_T_BARRIER - defined but not + used in anywhere. + */ + mpsslog("%s() %d: type %x ", + __func__, __LINE__, + hdr.type); + mpsslog("is not supported\n"); + status = -ENOTSUP; + + } else { + ret = transfer_blocks( + mic->mic_virtblk.virtio_block_fd, + iovec, + piov - iovec); + if (ret < 0 && + status != 0) + status = ret; + } + /* write status and update used pointer */ + if (status != 0) + status = status_error_check(desc); + ret = write_status( + mic->mic_virtblk.virtio_block_fd, + &status); +#ifdef DEBUG + mpsslog("%s() %d: write status=%d on desc=%p\n", + __func__, __LINE__, + status, desc); +#endif + } + } + free(iovec); +_stop_virtblk: + stop_virtblk(mic); +_close_backend: + close_backend(mic); + } /* forever */ + + pthread_exit(NULL); +} + +static void +reset(struct mic_info *mic) +{ +#define RESET_TIMEOUT 120 + int i = RESET_TIMEOUT; + setsysfs(mic->name, "state", "reset"); + while (i) { + char *state; + state = readsysfs(mic->name, "state"); + if (!state) + goto retry; + mpsslog("%s: %s %d state %s\n", + mic->name, __func__, __LINE__, state); + + /* + * If the shutdown was initiated by OSPM, the state stays + * in "suspended" which is also a valid condition for reset. + */ + if ((!strcmp(state, "offline")) || + (!strcmp(state, "suspended"))) { + free(state); + break; + } + free(state); +retry: + sleep(1); + i--; + } +} + +static int +get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status) +{ + if (!strcmp(shutdown_status, "nop")) + return MIC_NOP; + if (!strcmp(shutdown_status, "crashed")) + return MIC_CRASHED; + if (!strcmp(shutdown_status, "halted")) + return MIC_HALTED; + if (!strcmp(shutdown_status, "poweroff")) + return MIC_POWER_OFF; + if (!strcmp(shutdown_status, "restart")) + return MIC_RESTART; + mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status); + /* Invalid state */ + assert(0); +}; + +static int get_mic_state(struct mic_info *mic, char *state) +{ + if (!strcmp(state, "offline")) + return MIC_OFFLINE; + if (!strcmp(state, "online")) + return MIC_ONLINE; + if (!strcmp(state, "shutting_down")) + return MIC_SHUTTING_DOWN; + if (!strcmp(state, "reset_failed")) + return MIC_RESET_FAILED; + if (!strcmp(state, "suspending")) + return MIC_SUSPENDING; + if (!strcmp(state, "suspended")) + return MIC_SUSPENDED; + mpsslog("%s: BUG invalid state %s\n", mic->name, state); + /* Invalid state */ + assert(0); +}; + +static void mic_handle_shutdown(struct mic_info *mic) +{ +#define SHUTDOWN_TIMEOUT 60 + int i = SHUTDOWN_TIMEOUT, ret, stat = 0; + char *shutdown_status; + while (i) { + shutdown_status = readsysfs(mic->name, "shutdown_status"); + if (!shutdown_status) + continue; + mpsslog("%s: %s %d shutdown_status %s\n", + mic->name, __func__, __LINE__, shutdown_status); + switch (get_mic_shutdown_status(mic, shutdown_status)) { + case MIC_RESTART: + mic->restart = 1; + case MIC_HALTED: + case MIC_POWER_OFF: + case MIC_CRASHED: + free(shutdown_status); + goto reset; + default: + break; + } + free(shutdown_status); + sleep(1); + i--; + } +reset: + ret = kill(mic->pid, SIGTERM); + mpsslog("%s: %s %d kill pid %d ret %d\n", + mic->name, __func__, __LINE__, + mic->pid, ret); + if (!ret) { + ret = waitpid(mic->pid, &stat, + WIFSIGNALED(stat)); + mpsslog("%s: %s %d waitpid ret %d pid %d\n", + mic->name, __func__, __LINE__, + ret, mic->pid); + } + if (ret == mic->pid) + reset(mic); +} + +static void * +mic_config(void *arg) +{ + struct mic_info *mic = (struct mic_info *)arg; + char *state = NULL; + char pathname[PATH_MAX]; + int fd, ret; + struct pollfd ufds[1]; + char value[4096]; + + snprintf(pathname, PATH_MAX - 1, "%s/%s/%s", + MICSYSFSDIR, mic->name, "state"); + + fd = open(pathname, O_RDONLY); + if (fd < 0) { + mpsslog("%s: opening file %s failed %s\n", + mic->name, pathname, strerror(errno)); + goto error; + } + + do { + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + mpsslog("%s: Failed to seek to file start '%s': %s\n", + mic->name, pathname, strerror(errno)); + goto close_error1; + } + ret = read(fd, value, sizeof(value)); + if (ret < 0) { + mpsslog("%s: Failed to read sysfs entry '%s': %s\n", + mic->name, pathname, strerror(errno)); + goto close_error1; + } +retry: + state = readsysfs(mic->name, "state"); + if (!state) + goto retry; + mpsslog("%s: %s %d state %s\n", + mic->name, __func__, __LINE__, state); + switch (get_mic_state(mic, state)) { + case MIC_SHUTTING_DOWN: + mic_handle_shutdown(mic); + goto close_error; + case MIC_SUSPENDING: + mic->boot_on_resume = 1; + setsysfs(mic->name, "state", "suspend"); + mic_handle_shutdown(mic); + goto close_error; + case MIC_OFFLINE: + if (mic->boot_on_resume) { + setsysfs(mic->name, "state", "boot"); + mic->boot_on_resume = 0; + } + break; + default: + break; + } + free(state); + + ufds[0].fd = fd; + ufds[0].events = POLLERR | POLLPRI; + ret = poll(ufds, 1, -1); + if (ret < 0) { + mpsslog("%s: poll failed %s\n", + mic->name, strerror(errno)); + goto close_error1; + } + } while (1); +close_error: + free(state); +close_error1: + close(fd); +error: + init_mic(mic); + pthread_exit(NULL); +} + +static void +set_cmdline(struct mic_info *mic) +{ + char buffer[PATH_MAX]; + int len; + + len = snprintf(buffer, PATH_MAX, + "clocksource=tsc highres=off nohz=off "); + len += snprintf(buffer + len, PATH_MAX - len, + "cpufreq_on;corec6_off;pc3_off;pc6_off "); + len += snprintf(buffer + len, PATH_MAX - len, + "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0", + mic->id); + + setsysfs(mic->name, "cmdline", buffer); + mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer); + snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id); + mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer); +} + +static void +set_log_buf_info(struct mic_info *mic) +{ + int fd; + off_t len; + char system_map[] = "/lib/firmware/mic/System.map"; + char *map, *temp, log_buf[17] = {'\0'}; + + fd = open(system_map, O_RDONLY); + if (fd < 0) { + mpsslog("%s: Opening System.map failed: %d\n", + mic->name, errno); + return; + } + len = lseek(fd, 0, SEEK_END); + if (len < 0) { + mpsslog("%s: Reading System.map size failed: %d\n", + mic->name, errno); + close(fd); + return; + } + map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); + if (map == MAP_FAILED) { + mpsslog("%s: mmap of System.map failed: %d\n", + mic->name, errno); + close(fd); + return; + } + temp = strstr(map, "__log_buf"); + if (!temp) { + mpsslog("%s: __log_buf not found: %d\n", mic->name, errno); + munmap(map, len); + close(fd); + return; + } + strncpy(log_buf, temp - 19, 16); + setsysfs(mic->name, "log_buf_addr", log_buf); + mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf); + temp = strstr(map, "log_buf_len"); + if (!temp) { + mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno); + munmap(map, len); + close(fd); + return; + } + strncpy(log_buf, temp - 19, 16); + setsysfs(mic->name, "log_buf_len", log_buf); + mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf); + munmap(map, len); + close(fd); +} + +static void init_mic(struct mic_info *mic); + +static void +change_virtblk_backend(int x, siginfo_t *siginfo, void *p) +{ + struct mic_info *mic; + + for (mic = mic_list.next; mic != NULL; mic = mic->next) + mic->mic_virtblk.signaled = 1/* true */; +} + +static void +init_mic(struct mic_info *mic) +{ + struct sigaction ignore = { + .sa_flags = 0, + .sa_handler = SIG_IGN + }; + struct sigaction act = { + .sa_flags = SA_SIGINFO, + .sa_sigaction = change_virtblk_backend, + }; + char buffer[PATH_MAX]; + int err; + + /* + * Currently, one virtio block device is supported for each MIC card + * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon. + * The signal informs the virtio block backend about a change in the + * configuration file which specifies the virtio backend file name on + * the host. Virtio block backend then re-reads the configuration file + * and switches to the new block device. This signalling mechanism may + * not be required once multiple virtio block devices are supported by + * the MIC daemon. + */ + sigaction(SIGUSR1, &ignore, NULL); + + mic->pid = fork(); + switch (mic->pid) { + case 0: + set_log_buf_info(mic); + set_cmdline(mic); + add_virtio_device(mic, &virtcons_dev_page.dd); + add_virtio_device(mic, &virtnet_dev_page.dd); + err = pthread_create(&mic->mic_console.console_thread, NULL, + virtio_console, mic); + if (err) + mpsslog("%s virtcons pthread_create failed %s\n", + mic->name, strerror(err)); + err = pthread_create(&mic->mic_net.net_thread, NULL, + virtio_net, mic); + if (err) + mpsslog("%s virtnet pthread_create failed %s\n", + mic->name, strerror(err)); + err = pthread_create(&mic->mic_virtblk.block_thread, NULL, + virtio_block, mic); + if (err) + mpsslog("%s virtblk pthread_create failed %s\n", + mic->name, strerror(err)); + sigemptyset(&act.sa_mask); + err = sigaction(SIGUSR1, &act, NULL); + if (err) + mpsslog("%s sigaction SIGUSR1 failed %s\n", + mic->name, strerror(errno)); + while (1) + sleep(60); + case -1: + mpsslog("fork failed MIC name %s id %d errno %d\n", + mic->name, mic->id, errno); + break; + default: + if (mic->restart) { + snprintf(buffer, PATH_MAX, "boot"); + setsysfs(mic->name, "state", buffer); + mpsslog("%s restarting mic %d\n", + mic->name, mic->restart); + mic->restart = 0; + } + pthread_create(&mic->config_thread, NULL, mic_config, mic); + } +} + +static void +start_daemon(void) +{ + struct mic_info *mic; + + for (mic = mic_list.next; mic != NULL; mic = mic->next) + init_mic(mic); + + while (1) + sleep(60); +} + +static int +init_mic_list(void) +{ + struct mic_info *mic = &mic_list; + struct dirent *file; + DIR *dp; + int cnt = 0; + + dp = opendir(MICSYSFSDIR); + if (!dp) + return 0; + + while ((file = readdir(dp)) != NULL) { + if (!strncmp(file->d_name, "mic", 3)) { + mic->next = calloc(1, sizeof(struct mic_info)); + if (mic->next) { + mic = mic->next; + mic->id = atoi(&file->d_name[3]); + mic->name = malloc(strlen(file->d_name) + 16); + if (mic->name) + strcpy(mic->name, file->d_name); + mpsslog("MIC name %s id %d\n", mic->name, + mic->id); + cnt++; + } + } + } + + closedir(dp); + return cnt; +} + +void +mpsslog(char *format, ...) +{ + va_list args; + char buffer[4096]; + char ts[52], *ts1; + time_t t; + + if (logfp == NULL) + return; + + va_start(args, format); + vsprintf(buffer, format, args); + va_end(args); + + time(&t); + ts1 = ctime_r(&t, ts); + ts1[strlen(ts1) - 1] = '\0'; + fprintf(logfp, "%s: %s", ts1, buffer); + + fflush(logfp); +} + +int +main(int argc, char *argv[]) +{ + int cnt; + pid_t pid; + + myname = argv[0]; + + logfp = fopen(LOGFILE_NAME, "a+"); + if (!logfp) { + fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME); + exit(1); + } + pid = fork(); + switch (pid) { + case 0: + break; + case -1: + exit(2); + default: + exit(0); + } + + mpsslog("MIC Daemon start\n"); + + cnt = init_mic_list(); + if (cnt == 0) { + mpsslog("MIC module not loaded\n"); + exit(3); + } + mpsslog("MIC found %d devices\n", cnt); + + start_daemon(); + + exit(0); +} diff --git a/samples/mic/mpssd/mpssd.h b/samples/mic/mpssd/mpssd.h new file mode 100644 index 000000000000..f5f18b15d9a0 --- /dev/null +++ b/samples/mic/mpssd/mpssd.h @@ -0,0 +1,102 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC User Space Tools. + */ +#ifndef _MPSSD_H_ +#define _MPSSD_H_ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <dirent.h> +#include <libgen.h> +#include <pthread.h> +#include <stdarg.h> +#include <time.h> +#include <errno.h> +#include <sys/dir.h> +#include <sys/ioctl.h> +#include <sys/poll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include <sys/wait.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <netdb.h> +#include <pthread.h> +#include <signal.h> +#include <limits.h> +#include <syslog.h> +#include <getopt.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <linux/if_tun.h> +#include <linux/virtio_ids.h> + +#define MICSYSFSDIR "/sys/class/mic" +#define LOGFILE_NAME "/var/log/mpssd" +#define PAGE_SIZE 4096 + +struct mic_console_info { + pthread_t console_thread; + int virtio_console_fd; + void *console_dp; +}; + +struct mic_net_info { + pthread_t net_thread; + int virtio_net_fd; + int tap_fd; + void *net_dp; +}; + +struct mic_virtblk_info { + pthread_t block_thread; + int virtio_block_fd; + void *block_dp; + volatile sig_atomic_t signaled; + char *backend_file; + int backend; + void *backend_addr; + long backend_size; +}; + +struct mic_info { + int id; + char *name; + pthread_t config_thread; + pid_t pid; + struct mic_console_info mic_console; + struct mic_net_info mic_net; + struct mic_virtblk_info mic_virtblk; + int restart; + int boot_on_resume; + struct mic_info *next; +}; + +__attribute__((format(printf, 1, 2))) +void mpsslog(char *format, ...); +char *readsysfs(char *dir, char *entry); +int setsysfs(char *dir, char *entry, char *value); +#endif diff --git a/samples/mic/mpssd/sysfs.c b/samples/mic/mpssd/sysfs.c new file mode 100644 index 000000000000..8dd326936083 --- /dev/null +++ b/samples/mic/mpssd/sysfs.c @@ -0,0 +1,102 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC User Space Tools. + */ + +#include "mpssd.h" + +#define PAGE_SIZE 4096 + +char * +readsysfs(char *dir, char *entry) +{ + char filename[PATH_MAX]; + char value[PAGE_SIZE]; + char *string = NULL; + int fd; + int len; + + if (dir == NULL) + snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); + else + snprintf(filename, PATH_MAX, + "%s/%s/%s", MICSYSFSDIR, dir, entry); + + fd = open(filename, O_RDONLY); + if (fd < 0) { + mpsslog("Failed to open sysfs entry '%s': %s\n", + filename, strerror(errno)); + return NULL; + } + + len = read(fd, value, sizeof(value)); + if (len < 0) { + mpsslog("Failed to read sysfs entry '%s': %s\n", + filename, strerror(errno)); + goto readsys_ret; + } + if (len == 0) + goto readsys_ret; + + value[len - 1] = '\0'; + + string = malloc(strlen(value) + 1); + if (string) + strcpy(string, value); + +readsys_ret: + close(fd); + return string; +} + +int +setsysfs(char *dir, char *entry, char *value) +{ + char filename[PATH_MAX]; + char *oldvalue; + int fd, ret = 0; + + if (dir == NULL) + snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry); + else + snprintf(filename, PATH_MAX, "%s/%s/%s", + MICSYSFSDIR, dir, entry); + + oldvalue = readsysfs(dir, entry); + + fd = open(filename, O_RDWR); + if (fd < 0) { + ret = errno; + mpsslog("Failed to open sysfs entry '%s': %s\n", + filename, strerror(errno)); + goto done; + } + + if (!oldvalue || strcmp(value, oldvalue)) { + if (write(fd, value, strlen(value)) < 0) { + ret = errno; + mpsslog("Failed to write new sysfs entry '%s': %s\n", + filename, strerror(errno)); + } + } + close(fd); +done: + if (oldvalue) + free(oldvalue); + return ret; +} diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b1e455b47b82..be04b056c1bd 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5594,7 +5594,7 @@ static int selinux_setprocattr(struct task_struct *p, return error; /* Obtain a SID for the context, if one was specified. */ - if (size && str[1] && str[1] != '\n') { + if (size && str[0] && str[0] != '\n') { if (str[size-1] == '\n') { str[size-1] = 0; size--; diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index e6d4ff9fd992..736c1ea8e31e 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -148,11 +148,11 @@ SND_SOC_DAPM_OUTPUT("AOUTR"), }; static const struct snd_soc_dapm_route cs4270_dapm_routes[] = { - { "Capture", NULL, "AINA" }, - { "Capture", NULL, "AINB" }, + { "Capture", NULL, "AINL" }, + { "Capture", NULL, "AINR" }, - { "AOUTA", NULL, "Playback" }, - { "AOUTB", NULL, "Playback" }, + { "AOUTL", NULL, "Playback" }, + { "AOUTR", NULL, "Playback" }, }; /** diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 0051077a7b9d..814a118d2ef6 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2959,6 +2959,23 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } }, +/* Syntek STK1160 */ +{ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | + USB_DEVICE_ID_MATCH_INT_CLASS | + USB_DEVICE_ID_MATCH_INT_SUBCLASS, + .idVendor = 0x05e1, + .idProduct = 0x0408, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Syntek", + .product_name = "STK1160", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_AUDIO_ALIGN_TRANSFER + } +}, + /* Digidesign Mbox */ { /* Thanks to Clemens Ladisch <clemens@xxxxxxxxxx> */ diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index dcb1e9ac949c..d9d10f3c8ca3 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2450,7 +2450,7 @@ sub do_run_test { } waitpid $child_pid, 0; - $child_exit = $?; + $child_exit = $? >> 8; if (!$bug && $in_bisect) { if (defined($bisect_ret_good)) {