Re: [PATCH 3/5] Intel MIC Host Driver Changes for Virtio Devices.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 24, 2013 at 08:31:34PM -0700, Sudeep Dutt wrote:
> From: Ashutosh Dixit <ashutosh.dixit@xxxxxxxxx>
> 
> This patch introduces the host "Virtio over PCIe" interface for
> Intel MIC. It allows creating user space backends on the host and
> instantiating virtio devices for them on the Intel MIC card. A character
> device per MIC is exposed with IOCTL, mmap and poll callbacks. This allows
> the user space backend to:
> (a) add/remove a virtio device via a device page.
> (b) map (R/O) virtio rings and device page to user space.
> (c) poll for availability of data.
> (d) copy a descriptor or entire descriptor chain to/from the card.
> (e) modify virtio configuration.
> (f) handle virtio device reset.
> The buffers are copied over using CPU copies for this initial patch
> and host initiated MIC DMA support is planned for future patches.
> The avail and desc virtio rings are in host memory and the used ring
> is in card memory to maximize writes across PCIe for performance.
> 
> Co-author: Sudeep Dutt <sudeep.dutt@xxxxxxxxx>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@xxxxxxxxx>
> Signed-off-by: Caz Yokoyama <Caz.Yokoyama@xxxxxxxxx>
> Signed-off-by: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@xxxxxxxxx>
> Signed-off-by: Nikhil Rao <nikhil.rao@xxxxxxxxx>
> Signed-off-by: Harshavardhan R Kharche <harshavardhan.r.kharche@xxxxxxxxx>
> Signed-off-by: Sudeep Dutt <sudeep.dutt@xxxxxxxxx>
> Acked-by: Yaozu (Eddie) Dong <eddie.dong@xxxxxxxxx>
> Reviewed-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@xxxxxxxxx>

I decided to look at the security and ordering of ring accesses.
Doing a quick look, I think I found some issues, see comments below.
If it were possible to reuse existing ring handling code,
such issues would go away automatically.
Which brings me to the next question: have you looked at reusing
some code under drivers/vhost for host side processing?
If not, you probably should.
Is code in vringh.c generic enough to support your use-case,
and if not what exactly are the issues preventing this?

Thanks,

> ---
>  drivers/misc/mic/common/mic_device.h |   4 +
>  drivers/misc/mic/host/Makefile       |   2 +
>  drivers/misc/mic/host/mic_boot.c     |   2 +
>  drivers/misc/mic/host/mic_debugfs.c  | 137 +++++++
>  drivers/misc/mic/host/mic_fops.c     | 280 ++++++++++++++
>  drivers/misc/mic/host/mic_fops.h     |  37 ++
>  drivers/misc/mic/host/mic_main.c     |  24 ++
>  drivers/misc/mic/host/mic_virtio.c   | 703 +++++++++++++++++++++++++++++++++++
>  drivers/misc/mic/host/mic_virtio.h   | 108 ++++++
>  include/uapi/linux/Kbuild            |   1 +
>  include/uapi/linux/mic_common.h      | 165 +++++++-
>  include/uapi/linux/mic_ioctl.h       | 104 ++++++
>  12 files changed, 1566 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/misc/mic/host/mic_fops.c
>  create mode 100644 drivers/misc/mic/host/mic_fops.h
>  create mode 100644 drivers/misc/mic/host/mic_virtio.c
>  create mode 100644 drivers/misc/mic/host/mic_virtio.h
>  create mode 100644 include/uapi/linux/mic_ioctl.h
> 
> diff --git a/drivers/misc/mic/common/mic_device.h b/drivers/misc/mic/common/mic_device.h
> index 24934b1..7cdeb74 100644
> --- a/drivers/misc/mic/common/mic_device.h
> +++ b/drivers/misc/mic/common/mic_device.h
> @@ -78,4 +78,8 @@ mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
>  #define MIC_DPLO_SPAD 14
>  #define MIC_DPHI_SPAD 15
>  
> +/* These values are supposed to be in ext_params on an interrupt */
> +#define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1
> +#define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2
> +
>  #endif
> diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile
> index 0608bbb..e02abdb 100644
> --- a/drivers/misc/mic/host/Makefile
> +++ b/drivers/misc/mic/host/Makefile
> @@ -9,3 +9,5 @@ mic_host-objs += mic_sysfs.o
>  mic_host-objs += mic_boot.o
>  mic_host-objs += mic_smpt.o
>  mic_host-objs += mic_debugfs.o
> +mic_host-objs += mic_fops.o
> +mic_host-objs += mic_virtio.o
> diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
> index 6485a87..40bcb90 100644
> --- a/drivers/misc/mic/host/mic_boot.c
> +++ b/drivers/misc/mic/host/mic_boot.c
> @@ -30,6 +30,7 @@
>  #include <linux/delay.h>
>  
>  #include "mic_common.h"
> +#include "mic_virtio.h"
>  
>  /**
>   * mic_reset - Reset the MIC device.
> @@ -112,6 +113,7 @@ void mic_stop(struct mic_device *mdev, bool force)
>  {
>  	mutex_lock(&mdev->mic_mutex);
>  	if (MIC_OFFLINE != mdev->state || force) {
> +		mic_virtio_reset_devices(mdev);
>  		mic_bootparam_init(mdev);
>  		mic_reset(mdev);
>  		if (MIC_RESET_FAILED == mdev->state)
> diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
> index 5b7697e..bebc6e3 100644
> --- a/drivers/misc/mic/host/mic_debugfs.c
> +++ b/drivers/misc/mic/host/mic_debugfs.c
> @@ -32,6 +32,7 @@
>  
>  #include "mic_common.h"
>  #include "mic_debugfs.h"
> +#include "mic_virtio.h"
>  
>  /* Debugfs parent dir */
>  static struct dentry *mic_dbg;
> @@ -207,7 +208,13 @@ static const struct file_operations post_code_ops = {
>  static int dp_seq_show(struct seq_file *s, void *pos)
>  {
>  	struct mic_device *mdev = s->private;
> +	struct mic_device_desc *d;
> +	struct mic_device_ctrl *dc;
> +	struct mic_vqconfig *vqconfig;
> +	__u32 *features;
> +	__u8 *config;
>  	struct mic_bootparam *bootparam = mdev->dp;
> +	int i, j;
>  
>  	seq_printf(s, "Bootparam: magic 0x%x\n",
>  		bootparam->magic);
> @@ -222,6 +229,53 @@ static int dp_seq_show(struct seq_file *s, void *pos)
>  	seq_printf(s, "Bootparam: shutdown_card %d\n",
>  		bootparam->shutdown_card);
>  
> +	for (i = sizeof(*bootparam); i < MIC_DP_SIZE;
> +	     i += mic_total_desc_size(d)) {
> +		d = mdev->dp + i;
> +		dc = (void *)d + mic_aligned_desc_size(d);
> +
> +		/* end of list */
> +		if (d->type == 0)
> +			break;
> +
> +		if (d->type == -1)
> +			continue;
> +
> +		seq_printf(s, "Type %d ", d->type);
> +		seq_printf(s, "Num VQ %d ", d->num_vq);
> +		seq_printf(s, "Feature Len %d\n", d->feature_len);
> +		seq_printf(s, "Config Len %d ", d->config_len);
> +		seq_printf(s, "Shutdown Status %d\n", d->status);
> +
> +		for (j = 0; j < d->num_vq; j++) {
> +			vqconfig = mic_vq_config(d) + j;
> +			seq_printf(s, "vqconfig[%d]: ", j);
> +			seq_printf(s, "address 0x%llx ", vqconfig->address);
> +			seq_printf(s, "num %d ", vqconfig->num);
> +			seq_printf(s, "used address 0x%llx\n",
> +				vqconfig->used_address);
> +		}
> +
> +		features = (__u32 *) mic_vq_features(d);
> +		seq_printf(s, "Features: Host 0x%x ", features[0]);
> +		seq_printf(s, "Guest 0x%x\n", features[1]);
> +
> +		config = mic_vq_configspace(d);
> +		for (j = 0; j < d->config_len; j++)
> +			seq_printf(s, "config[%d]=%d\n", j, config[j]);
> +
> +		seq_puts(s, "Device control:\n");
> +		seq_printf(s, "Config Change %d ", dc->config_change);
> +		seq_printf(s, "Vdev reset %d\n", dc->vdev_reset);
> +		seq_printf(s, "Guest Ack %d ", dc->guest_ack);
> +		seq_printf(s, "Host ack %d\n", dc->host_ack);
> +		seq_printf(s, "Used address updated %d ",
> +			dc->used_address_updated);
> +		seq_printf(s, "Vdev 0x%llx\n", dc->vdev);
> +		seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db);
> +		seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db);
> +	}
> +
>  	return 0;
>  }
>  
> @@ -243,6 +297,86 @@ static const struct file_operations dp_ops = {
>  	.release = dp_debug_release
>  };
>  
> +static int vdev_info_seq_show(struct seq_file *s, void *unused)
> +{
> +	struct mic_device *mdev = s->private;
> +	struct list_head *pos, *tmp;
> +	struct mic_vdev *mvdev;
> +	int i, j;
> +
> +	mutex_lock(&mdev->mic_mutex);
> +	list_for_each_safe(pos, tmp, &mdev->vdev_list) {
> +		mvdev = list_entry(pos, struct mic_vdev, list);
> +		seq_printf(s, "VDEV type %d state %s in %ld out %ld\n",
> +			mvdev->virtio_id,
> +			mic_vdevup(mvdev) ? "UP" : "DOWN",
> +			mvdev->in_bytes,
> +			mvdev->out_bytes);
> +		for (i = 0; i < MIC_MAX_VRINGS; i++) {
> +			struct vring_desc *desc;
> +			struct vring_avail *avail;
> +			struct vring_used *used;
> +			int num = mvdev->vring[i].vr.num;
> +			if (!num)
> +				continue;
> +			desc = mvdev->vring[i].vr.desc;
> +			seq_printf(s, "vring i %d avail_idx %d",
> +				i, mvdev->vring[i].info->avail_idx & (num - 1));
> +			seq_printf(s, " used_idx %d num %d\n",
> +				mvdev->vring[i].info->used_idx & (num - 1),
> +				num);
> +			seq_printf(s, "vring i %d avail_idx %d used_idx %d\n",
> +				i, mvdev->vring[i].info->avail_idx,
> +				mvdev->vring[i].info->used_idx);
> +			for (j = 0; j < num; j++) {
> +				seq_printf(s, "desc[%d] addr 0x%llx len %d",
> +					j, desc->addr, desc->len);
> +				seq_printf(s, " flags 0x%x next %d\n",
> +					desc->flags,
> +					desc->next);
> +				desc++;
> +			}
> +			avail = mvdev->vring[i].vr.avail;
> +			seq_printf(s, "avail flags 0x%x idx %d\n",
> +				avail->flags, avail->idx & (num - 1));
> +			seq_printf(s, "avail flags 0x%x idx %d\n",
> +				avail->flags, avail->idx);
> +			for (j = 0; j < num; j++)
> +				seq_printf(s, "avail ring[%d] %d\n",
> +					j, avail->ring[j]);
> +			used = mvdev->vring[i].vr.used;
> +			seq_printf(s, "used flags 0x%x idx %d\n",
> +				used->flags, used->idx & (num - 1));
> +			seq_printf(s, "used flags 0x%x idx %d\n",
> +				used->flags, used->idx);
> +			for (j = 0; j < num; j++)
> +				seq_printf(s, "used ring[%d] id %d len %d\n",
> +					j, used->ring[j].id, used->ring[j].len);
> +		}
> +	}
> +	mutex_unlock(&mdev->mic_mutex);
> +
> +	return 0;
> +}
> +
> +static int vdev_info_debug_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, vdev_info_seq_show, inode->i_private);
> +}
> +
> +static int vdev_info_debug_release(struct inode *inode, struct file *file)
> +{
> +	return single_release(inode, file);
> +}
> +
> +static const struct file_operations vdev_info_ops = {
> +	.owner   = THIS_MODULE,
> +	.open    = vdev_info_debug_open,
> +	.read    = seq_read,
> +	.llseek  = seq_lseek,
> +	.release = vdev_info_debug_release
> +};
> +
>  static int msi_irq_info_seq_show(struct seq_file *s, void *pos)
>  {
>  	struct mic_device *mdev  = s->private;
> @@ -332,6 +466,9 @@ void __init mic_create_debug_dir(struct mic_device *mdev)
>  	debugfs_create_file("dp", 0444, mdev->dbg_dir,
>  		mdev, &dp_ops);
>  
> +	debugfs_create_file("vdev_info", 0444, mdev->dbg_dir,
> +		mdev, &vdev_info_ops);
> +
>  	debugfs_create_file("msi_irq_info", 0444, mdev->dbg_dir,
>  		mdev, &msi_irq_info_ops);
>  }
> diff --git a/drivers/misc/mic/host/mic_fops.c b/drivers/misc/mic/host/mic_fops.c
> new file mode 100644
> index 0000000..626a454
> --- /dev/null
> +++ b/drivers/misc/mic/host/mic_fops.c
> @@ -0,0 +1,280 @@
> +/*
> + * Intel MIC Platform Software Stack (MPSS)
> + *
> + * Copyright(c) 2013 Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License, version 2, as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
> + * USA.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + *
> + * Intel MIC Host driver.
> + *
> + */
> +#include <linux/module.h>
> +#include <linux/fs.h>
> +#include <linux/pci.h>
> +#include <linux/interrupt.h>
> +#include <linux/firmware.h>
> +#include <linux/completion.h>
> +#include <linux/poll.h>
> +#include <linux/virtio_ids.h>
> +#include <linux/mic_ioctl.h>
> +
> +#include "mic_common.h"
> +#include "mic_fops.h"
> +#include "mic_virtio.h"
> +
> +int mic_open(struct inode *inode, struct file *f)
> +{
> +	struct mic_vdev *mvdev;
> +	struct mic_device *mdev = container_of(inode->i_cdev,
> +		struct mic_device, cdev);
> +
> +	mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL);
> +	if (!mvdev)
> +		return -ENOMEM;
> +
> +	init_waitqueue_head(&mvdev->waitq);
> +	INIT_LIST_HEAD(&mvdev->list);
> +	mvdev->mdev = mdev;
> +	mvdev->virtio_id = -1;
> +
> +	f->private_data = mvdev;
> +	return 0;
> +}
> +
> +int mic_release(struct inode *inode, struct file *f)
> +{
> +	struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data;
> +
> +	if (-1 != mvdev->virtio_id)
> +		mic_virtio_del_device(mvdev);
> +	f->private_data = NULL;
> +	kfree(mvdev);
> +	return 0;
> +}
> +
> +long mic_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
> +{
> +	struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data;
> +	void __user *argp = (void __user *)arg;
> +	int ret;
> +
> +	switch (cmd) {
> +	case MIC_VIRTIO_ADD_DEVICE:
> +	{
> +		ret = mic_virtio_add_device(mvdev, argp);
> +		if (ret < 0) {
> +			dev_err(mic_dev(mvdev),
> +				"%s %d errno ret %d\n",
> +				__func__, __LINE__, ret);
> +			return ret;
> +		}
> +		break;
> +	}
> +	case MIC_VIRTIO_COPY_DESC:
> +	{
> +		struct mic_copy_desc request;
> +		struct mic_copy *copy = &request.copy;
> +
> +		ret = mic_vdev_inited(mvdev);
> +		if (ret)
> +			return ret;
> +
> +		if (copy_from_user(&request, argp, sizeof(request)))
> +			return -EFAULT;
> +
> +		dev_dbg(mic_dev(mvdev),
> +			"%s %d === iovcnt 0x%x vr_idx 0x%x desc_idx 0x%x "
> +			"used_idx 0x%x used_len 0x%x\n",
> +			__func__, __LINE__, copy->iovcnt,
> +			copy->vr_idx, copy->desc_idx,
> +			request.used_desc_idx, request.used_len);
> +
> +		ret = mic_virtio_copy_desc(mvdev, &request);
> +		if (ret < 0) {
> +			dev_err(mic_dev(mvdev),
> +				"%s %d errno ret %d\n",
> +				__func__, __LINE__, ret);
> +			return ret;
> +		}
> +		if (copy_to_user(
> +			&((struct mic_copy_desc __user *)argp)->copy.out_cookie,
> +			&copy->out_cookie, sizeof(copy->out_cookie))) {
> +			dev_err(mic_dev(mvdev), "%s %d errno ret %d\n",
> +				__func__, __LINE__, -EFAULT);
> +			return -EFAULT;
> +		}
> +		if (copy_to_user(
> +			&((struct mic_copy_desc __user *)argp)->copy.out_len,
> +			&copy->out_len, sizeof(copy->out_len))) {
> +			dev_err(mic_dev(mvdev), "%s %d errno ret %d\n",
> +				__func__, __LINE__, -EFAULT);
> +			return -EFAULT;
> +		}
> +		break;
> +	}
> +	case MIC_VIRTIO_COPY_CHAIN:
> +	{
> +		struct mic_copy request;
> +
> +		ret = mic_vdev_inited(mvdev);
> +		if (ret)
> +			return ret;
> +
> +		if (copy_from_user(&request, argp, sizeof(request)))
> +			return -EFAULT;
> +
> +		dev_dbg(mic_dev(mvdev),
> +			"%s %d === vr_idx 0x%x desc_idx 0x%x iovcnt 0x%x\n",
> +			__func__, __LINE__,
> +			request.vr_idx, request.desc_idx, request.iovcnt);
> +
> +		ret = mic_virtio_copy_chain(mvdev, &request);
> +		if (ret < 0) {
> +			dev_err(mic_dev(mvdev),
> +				"%s %d errno ret %d\n",
> +				__func__, __LINE__, ret);
> +			return ret;
> +		}
> +		if (copy_to_user(
> +			&((struct mic_copy __user *)argp)->out_cookie,
> +			&request.out_cookie, sizeof(request.out_cookie))) {
> +			dev_err(mic_dev(mvdev), "%s %d errno ret %d\n",
> +				__func__, __LINE__, -EFAULT);
> +			return -EFAULT;
> +		}
> +		if (copy_to_user(&((struct mic_copy __user *)argp)->out_len,
> +			&request.out_len,
> +			sizeof(request.out_len))) {
> +			dev_err(mic_dev(mvdev), "%s %d errno ret %d\n",
> +				__func__, __LINE__, -EFAULT);
> +			return -EFAULT;
> +		}
> +		break;
> +	}
> +	case MIC_VIRTIO_CONFIG_CHANGE:
> +	{
> +		ret = mic_vdev_inited(mvdev);
> +		if (ret)
> +			return ret;
> +
> +		ret = mic_virtio_config_change(mvdev, argp);
> +		if (ret < 0) {
> +			dev_err(mic_dev(mvdev),
> +				"%s %d errno ret %d\n",
> +				__func__, __LINE__, ret);
> +			return ret;
> +		}
> +		break;
> +	}
> +	default:
> +		return -ENOIOCTLCMD;
> +	};
> +	return 0;
> +}
> +
> +/*
> + * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
> + * not when previously enqueued buffers may be available. This means that
> + * in the card->host (TX) path, when userspace is unblocked by poll it
> + * must drain all available descriptors or it can stall.
> + */
> +unsigned int mic_poll(struct file *f, poll_table *wait)
> +{
> +	struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data;
> +	int mask = 0;
> +
> +	poll_wait(f, &mvdev->waitq, wait);
> +
> +	if (mic_vdev_inited(mvdev))
> +		mask = POLLERR;
> +	else if (mvdev->poll_wake) {
> +		mvdev->poll_wake = 0;
> +		mask = POLLIN | POLLOUT;
> +	}
> +
> +	return mask;
> +}
> +
> +static inline int
> +mic_query_offset(struct mic_vdev *mvdev, unsigned long offset,
> +	unsigned long *size, unsigned long *pa)
> +{
> +	struct mic_device *mdev = mvdev->mdev;
> +	unsigned long start = MIC_DP_SIZE;
> +	int i;
> +
> +	/*
> +	 * MMAP interface is as follows:
> +	 * offset				region
> +	 * 0x0					virtio device_page
> +	 * 0x1000				first vring
> +	 * 0x1000 + size of 1st vring		second vring
> +	 * ....
> +	 */
> +	if (!offset) {
> +		*pa = virt_to_phys(mdev->dp);
> +		*size = MIC_DP_SIZE;
> +		return 0;
> +	}
> +
> +	for (i = 0; i < mvdev->dd->num_vq; i++) {
> +		if (offset == start) {
> +			*pa = virt_to_phys(mvdev->vring[i].va);
> +			*size = mvdev->vring[i].len;
> +			return 0;
> +		}
> +		start += mvdev->vring[i].len;
> +	}
> +	return -1;
> +}
> +
> +/*
> + * Maps the device page and virtio rings to user space for readonly access.
> + */
> +int
> +mic_mmap(struct file *f, struct vm_area_struct *vma)
> +{
> +	struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data;
> +	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
> +	unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
> +	int i, err;
> +
> +	err = mic_vdev_inited(mvdev);
> +	if (err)
> +		return err;
> +
> +	if (vma->vm_flags & VM_WRITE)
> +		return -EACCES;
> +
> +	while (size_rem) {
> +		i = mic_query_offset(mvdev, offset, &size, &pa);
> +		if (i < 0)
> +			return -EINVAL;
> +		err = remap_pfn_range(vma, vma->vm_start + offset,
> +			pa >> PAGE_SHIFT, size, vma->vm_page_prot);
> +		if (err)
> +			return err;
> +		dev_dbg(mic_dev(mvdev),
> +			"%s %d type %d size 0x%lx off 0x%lx pa 0x%lx vma 0x%lx\n",
> +			__func__, __LINE__, mvdev->virtio_id, size, offset,
> +			pa, vma->vm_start + offset);
> +		size_rem -= size;
> +		offset += size;
> +	}
> +	return 0;
> +}
> diff --git a/drivers/misc/mic/host/mic_fops.h b/drivers/misc/mic/host/mic_fops.h
> new file mode 100644
> index 0000000..504506c
> --- /dev/null
> +++ b/drivers/misc/mic/host/mic_fops.h
> @@ -0,0 +1,37 @@
> +/*
> + * Intel MIC Platform Software Stack (MPSS)
> + *
> + * Copyright(c) 2013 Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License, version 2, as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
> + * USA.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + *
> + * Intel MIC Host driver.
> + *
> + */
> +#ifndef _MIC_FOPS_H_
> +#define _MIC_FOPS_H_
> +
> +int mic_open(struct inode *inode, struct file *filp);
> +int mic_release(struct inode *inode, struct file *filp);
> +ssize_t mic_read(struct file *filp, char __user *buf,
> +			size_t count, loff_t *pos);
> +long mic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
> +int mic_mmap(struct file *f, struct vm_area_struct *vma);
> +unsigned int mic_poll(struct file *f, poll_table *wait);
> +
> +#endif
> diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c
> index 70cc235..dd421d5 100644
> --- a/drivers/misc/mic/host/mic_main.c
> +++ b/drivers/misc/mic/host/mic_main.c
> @@ -37,6 +37,8 @@
>  
>  #include "mic_common.h"
>  #include "mic_debugfs.h"
> +#include "mic_fops.h"
> +#include "mic_virtio.h"
>  
>  static const char mic_driver_name[] = "mic";
>  
> @@ -79,6 +81,15 @@ struct mic_info {
>  /* g_mic - Global information about all MIC devices. */
>  static struct mic_info g_mic;
>  
> +static const struct file_operations mic_fops = {
> +	.open = mic_open,
> +	.release = mic_release,
> +	.unlocked_ioctl = mic_ioctl,
> +	.poll = mic_poll,
> +	.mmap = mic_mmap,
> +	.owner = THIS_MODULE,
> +};
> +
>  /* Initialize the device page */
>  static int mic_dp_init(struct mic_device *mdev)
>  {
> @@ -968,8 +979,20 @@ static int mic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	mic_bootparam_init(mdev);
>  
>  	mic_create_debug_dir(mdev);
> +	cdev_init(&mdev->cdev, &mic_fops);
> +	mdev->cdev.owner = THIS_MODULE;
> +	rc = cdev_add(&mdev->cdev, MKDEV(MAJOR(g_mic.dev), mdev->id), 1);
> +	if (rc) {
> +		dev_err(&pdev->dev, "cdev_add err id %d rc %d\n", mdev->id, rc);
> +		goto cleanup_debug_dir;
> +	}
>  	dev_info(&pdev->dev, "Probe successful for %s\n", mdev->name);
>  	return 0;
> +cleanup_debug_dir:
> +	mic_delete_debug_dir(mdev);
> +	mutex_lock(&mdev->mic_mutex);
> +	mic_free_irq(mdev, mdev->shutdown_cookie, mdev);
> +	mutex_unlock(&mdev->mic_mutex);
>  dp_uninit:
>  	mic_dp_uninit(mdev);
>  sysfs_put:
> @@ -1019,6 +1042,7 @@ static void mic_remove(struct pci_dev *pdev)
>  	id = mdev->id;
>  
>  	mic_stop(mdev, false);
> +	cdev_del(&mdev->cdev);
>  	mic_delete_debug_dir(mdev);
>  	mutex_lock(&mdev->mic_mutex);
>  	mic_free_irq(mdev, mdev->shutdown_cookie, mdev);
> diff --git a/drivers/misc/mic/host/mic_virtio.c b/drivers/misc/mic/host/mic_virtio.c
> new file mode 100644
> index 0000000..7282e12
> --- /dev/null
> +++ b/drivers/misc/mic/host/mic_virtio.c
> @@ -0,0 +1,703 @@
> +/*
> + * Intel MIC Platform Software Stack (MPSS)
> + *
> + * Copyright(c) 2013 Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License, version 2, as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
> + * USA.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + *
> + * Intel MIC Host driver.
> + *
> + */
> +#include <linux/module.h>
> +#include <linux/fs.h>
> +#include <linux/pci.h>
> +#include <linux/interrupt.h>
> +#include <linux/firmware.h>
> +#include <linux/completion.h>
> +#include <linux/poll.h>
> +#include <linux/sched.h>
> +#include <uapi/linux/virtio_ids.h>
> +#include <uapi/linux/virtio_net.h>
> +
> +#include "mic_common.h"
> +#include "mic_virtio.h"
> +
> +/* See comments in vhost.c for explanation of next_desc() */
> +static unsigned next_desc(struct vring_desc *desc)
> +{
> +	unsigned int next;
> +
> +	if (!(le16_to_cpu(desc->flags) & VRING_DESC_F_NEXT))
> +		return -1U;
> +	next = le16_to_cpu(desc->next);
> +	read_barrier_depends();
> +	return next;
> +}
> +
> +/*
> + * Central API which initiates the copies across the PCIe bus.
> + */
> +static int mic_virtio_copy_desc_buf(struct mic_vdev *mvdev,
> +			struct vring_desc *desc,
> +			void __user *ubuf, u32 rem_len, u32 doff, u32 *out_len)
> +{
> +	void __iomem *dbuf;
> +	int err;
> +	u32 len = le32_to_cpu(desc->len);
> +	u16 flags = le16_to_cpu(desc->flags);
> +	u64 addr = le64_to_cpu(desc->addr);
> +
> +	dbuf = mvdev->mdev->aper.va + addr + doff;
> +	*out_len = min_t(u32, rem_len, len - doff);
> +	if (flags & VRING_DESC_F_WRITE) {
> +		/*
> +		 * We are copying to IO below and the subsequent
> +		 * wmb(..) ensures that the stores have completed.

It doesn't - you would need to read card memory for this.
What wmb does is order previous stores wrt subsequent stores.
So I am guessing you really want to move this smb to
where avail ring is written.

> +		 * We should ideally use something like
> +		 * copy_from_user_toio(..) if it existed.
> +		 */
> +		if (copy_from_user(dbuf, ubuf, *out_len)) {
> +			err = -EFAULT;
> +			dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +				__func__, __LINE__, err);
> +			goto err;
> +		}
> +		mvdev->out_bytes += *out_len;
> +		wmb();
> +	} else {
> +		/*
> +		 * We are copying from IO below and the subsequent
> +		 * rmb(..) ensures that the loads have completed.
> +		 * We should ideally use something like
> +		 * copy_to_user_fromio(..) if it existed.
> +		 */
> +		if (copy_to_user(ubuf, dbuf, *out_len)) {
> +			err = -EFAULT;
> +			dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +				__func__, __LINE__, err);
> +			goto err;
> +		}
> +		mvdev->in_bytes += *out_len;
> +		rmb();
> +	}
> +	err = 0;
> +err:
> +	dev_dbg(mic_dev(mvdev),
> +		"%s: ubuf %p dbuf %p rem_len 0x%x *out_len 0x%x "
> +		"dlen 0x%x desc->writable %d err %d\n",
> +		__func__, ubuf, dbuf, rem_len, *out_len,
> +		len, flags & VRING_DESC_F_WRITE, err);
> +	return err;
> +}
> +
> +/* Iterate over the virtio descriptor chain and issue the copies */
> +static int _mic_virtio_copy(struct mic_vdev *mvdev,
> +	struct mic_copy *copy, bool chain)
> +{
> +	struct mic_vring *vr;
> +	struct vring_desc *desc;
> +	u32 desc_idx = copy->desc_idx;
> +	int ret = 0, iovcnt = copy->iovcnt;
> +	struct iovec iov;
> +	struct iovec __user *u_iov = copy->iov;
> +	u32 rem_ulen, rem_dlen, len, doff;
> +	void __user *ubuf = NULL;
> +
> +	vr = &mvdev->vring[copy->vr_idx];
> +	desc = vr->vr.desc;
> +	copy->out_len = 0;
> +	rem_dlen = le32_to_cpu(desc[desc_idx].len);
> +	rem_ulen = 0;
> +	doff = 0;
> +
> +	while (iovcnt && desc_idx != -1U) {
> +		if (!rem_ulen) {
> +			/* Copy over a new iovec */
> +			ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
> +			if (ret) {
> +				ret = -EINVAL;
> +				dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +					__func__, __LINE__, ret);
> +				break;
> +			}
> +			rem_ulen = iov.iov_len;
> +			ubuf = iov.iov_base;
> +		}
> +		ret = mic_virtio_copy_desc_buf(mvdev,
> +			&desc[desc_idx],
> +			ubuf, rem_ulen, doff, &len);
> +		if (ret)
> +			break;
> +
> +		dev_dbg(mic_dev(mvdev),
> +			"%s: desc_idx 0x%x rem_ulen 0x%x rem_dlen 0x%x "
> +			"doff 0x%x dlen 0x%x\n",
> +			__func__, desc_idx, rem_ulen, rem_dlen,
> +			doff, le32_to_cpu(desc[desc_idx].len));
> +
> +		copy->out_len += len;
> +		rem_ulen -= len;
> +		rem_dlen -= len;
> +		ubuf += len;
> +		doff += len;
> +		/* One iovec is now completed */
> +		if (!rem_ulen) {
> +			iovcnt--;
> +			u_iov++;
> +		}
> +		/* One descriptor is now completed */
> +		if (!rem_dlen) {
> +			desc_idx = next_desc(&desc[desc_idx]);
> +			if (desc_idx != -1U) {
> +				rem_dlen = le32_to_cpu(desc[desc_idx].len);
> +				doff = 0;
> +			}


looks like desc_idx here can become outside the range of
desc array.


> +		}
> +	}
> +	/*
> +	 * Return EINVAL if a chain should be processed, but we have run out
> +	 * of iovecs while there are readable descriptors remaining in the
> +	 * chain.
> +	 */
> +	if (chain && desc_idx != -1U &&
> +		!(le16_to_cpu(desc->flags) & VRING_DESC_F_WRITE)) {
> +		dev_err(mic_dev(mvdev), "%s not enough iovecs\n", __func__);
> +		ret = -EINVAL;
> +	}
> +	return ret;
> +}
> +
> +static inline void
> +mic_update_local_avail(struct mic_vdev *mvdev, u8 vr_idx)
> +{
> +	struct mic_vring *vr = &mvdev->vring[vr_idx];
> +	vr->info->avail_idx++;
> +}
> +
> +/* Update the used ring */
> +static void mic_update_used(struct mic_vdev *mvdev, u8 vr_idx,
> +	u32 used_desc_idx, u32 used_len)
> +{
> +	struct mic_vring *vr = &mvdev->vring[vr_idx];
> +	u16 used_idx;
> +	s8 db = mvdev->dc->h2c_vdev_db;
> +
> +	used_idx = vr->info->used_idx & (vr->vr.num - 1);
> +	iowrite32(used_desc_idx, &vr->vr.used->ring[used_idx].id);
> +	iowrite32(used_len, &vr->vr.used->ring[used_idx].len);
> +	wmb();
> +	iowrite16(++vr->info->used_idx, &vr->vr.used->idx);
> +	dev_dbg(mic_dev(mvdev),
> +		"%s: ======== vr_idx %d used_idx 0x%x used_len 0x%x ========\n",
> +		__func__, vr_idx, used_desc_idx, used_len);
> +	wmb();

Are you trying to make sure avail flags read below is ordered
with respect to used index write here?
If yes you need an mb() not just a wmb().


> +	/* Check if the remote device wants us to suppress interrupts */
> +	if (le16_to_cpu(vr->vr.avail->flags) & VRING_AVAIL_F_NO_INTERRUPT)
> +		return;
> +	if (db != -1)
> +		mvdev->mdev->ops->send_intr(mvdev->mdev, db);
> +}
> +
> +static inline int verify_copy_args(struct mic_vdev *mvdev,
> +		struct mic_copy *request)
> +{
> +	if (request->vr_idx >= mvdev->dd->num_vq) {
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, -EINVAL);
> +		return -EINVAL;
> +	}
> +
> +	if (request->desc_idx >=
> +		le16_to_cpu(mic_vq_config(mvdev->dd)->num)) {
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, -EINVAL);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +#define PROCESS_DESC_CHAIN	true
> +
> +/* Copy a specified number of virtio descriptors in a chain */
> +int mic_virtio_copy_desc(struct mic_vdev *mvdev,
> +		struct mic_copy_desc *request)
> +{
> +	int err;
> +	struct mutex *vr_mutex;
> +
> +	err = verify_copy_args(mvdev, &request->copy);
> +	if (err)
> +		return err;
> +
> +	vr_mutex = &mvdev->vr_mutex[request->copy.vr_idx];
> +	mutex_lock(vr_mutex);
> +	if (!mic_vdevup(mvdev)) {
> +		err = -ENODEV;
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, err);
> +		goto err;
> +	}
> +	err = _mic_virtio_copy(mvdev, &request->copy, !PROCESS_DESC_CHAIN);
> +	if (err) {
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, err);
> +	} else if (request->used_desc_idx != -1) {
> +		if (request->used_desc_idx >=
> +			le16_to_cpu(mic_vq_config(mvdev->dd)->num)) {
> +			dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +				__func__, __LINE__, -EINVAL);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +		mic_update_local_avail(mvdev, request->copy.vr_idx);
> +		mic_update_used(mvdev, request->copy.vr_idx,
> +			request->used_desc_idx, request->used_len);
> +	}
> +err:
> +	mutex_unlock(vr_mutex);
> +	return err;
> +}
> +
> +/* Copy a chain of virtio descriptors */
> +int mic_virtio_copy_chain(struct mic_vdev *mvdev,
> +	struct mic_copy *request)
> +{
> +	int err;
> +	struct mutex *vr_mutex;
> +
> +	err = verify_copy_args(mvdev, request);
> +	if (err)
> +		return err;
> +
> +	vr_mutex = &mvdev->vr_mutex[request->vr_idx];
> +	mutex_lock(vr_mutex);
> +	if (!mic_vdevup(mvdev)) {
> +		err = -ENODEV;
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, err);
> +		goto err;
> +	}
> +	err = _mic_virtio_copy(mvdev, request, PROCESS_DESC_CHAIN);
> +	if (!err) {
> +		mic_update_local_avail(mvdev, request->vr_idx);
> +		mic_update_used(mvdev, request->vr_idx,
> +			request->desc_idx, request->out_len);
> +	} else
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, err);
> +err:
> +	mutex_unlock(vr_mutex);
> +	return err;
> +}
> +
> +static void mic_virtio_init_post(struct mic_vdev *mvdev)
> +{
> +	struct mic_vqconfig *vqconfig = mic_vq_config(mvdev->dd);
> +	int i;
> +
> +	for (i = 0; i < mvdev->dd->num_vq; i++) {
> +		if (!le64_to_cpu(vqconfig[i].used_address)) {
> +			dev_warn(mic_dev(mvdev), "used_address zero??\n");
> +			continue;
> +		}
> +		mvdev->vring[i].vr.used =
> +			mvdev->mdev->aper.va +
> +			le64_to_cpu(vqconfig[i].used_address);
> +	}
> +
> +	smp_wmb();

Looking at smp_XX macros, here and elsewhere this driver only has smp_wmb.
This seems to violate SMP barrier pairing rules
in Documentation/memory-barriers.txt


> +	mvdev->dc->used_address_updated = 0;
> +
> +	dev_info(mic_dev(mvdev), "%s: device type %d LINKUP\n",
> +		__func__, mvdev->virtio_id);
> +}
> +
> +static inline void mic_virtio_device_reset(struct mic_vdev *mvdev)
> +{
> +	int i;
> +
> +	dev_info(mic_dev(mvdev), "%s: status %d device type %d RESET\n",
> +		__func__, mvdev->dd->status, mvdev->virtio_id);
> +
> +	for (i = 0; i < mvdev->dd->num_vq; i++)
> +		/*
> +		 * Avoid lockdep false positive. The + 1 is for the mic
> +		 * mutex which is held in the reset devices code path.
> +		 */
> +		mutex_lock_nested(&mvdev->vr_mutex[i], i + 1);
> +
> +	/* 0 status means "reset" */
> +	mvdev->dd->status = 0;
> +	mvdev->dc->vdev_reset = 0;
> +	mvdev->dc->host_ack = 1;
> +
> +	for (i = 0; i < mvdev->dd->num_vq; i++) {
> +		mvdev->vring[i].info->avail_idx = 0;
> +		mvdev->vring[i].info->used_idx = 0;
> +	}
> +
> +	for (i = 0; i < mvdev->dd->num_vq; i++)
> +		mutex_unlock(&mvdev->vr_mutex[i]);
> +}
> +
> +void mic_virtio_reset_devices(struct mic_device *mdev)
> +{
> +	struct list_head *pos, *tmp;
> +	struct mic_vdev *mvdev;
> +
> +	dev_info(&mdev->pdev->dev, "%s\n",  __func__);
> +
> +	WARN_ON(!mutex_is_locked(&mdev->mic_mutex));
> +	list_for_each_safe(pos, tmp, &mdev->vdev_list) {
> +		mvdev = list_entry(pos, struct mic_vdev, list);
> +		mic_virtio_device_reset(mvdev);
> +		mvdev->poll_wake = 1;
> +		wake_up(&mvdev->waitq);
> +	}
> +}
> +
> +void mic_bh_handler(struct work_struct *work)
> +{
> +	struct mic_vdev *mvdev = container_of(work, struct mic_vdev,
> +			virtio_bh_work);
> +
> +	if (mvdev->dc->used_address_updated)
> +		mic_virtio_init_post(mvdev);
> +
> +	if (mvdev->dc->vdev_reset)
> +		mic_virtio_device_reset(mvdev);
> +
> +	mvdev->poll_wake = 1;
> +	wake_up(&mvdev->waitq);
> +}
> +
> +static irqreturn_t mic_virtio_intr_handler(int irq, void *data)
> +{
> +
> +	struct mic_vdev *mvdev = data;
> +	struct mic_device *mdev = mvdev->mdev;
> +
> +	mdev->ops->ack_interrupt(mdev);
> +	schedule_work(&mvdev->virtio_bh_work);
> +	return IRQ_HANDLED;
> +}
> +
> +int mic_virtio_config_change(struct mic_vdev *mvdev,
> +			void __user *argp)
> +{
> +	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
> +	int ret = 0, retry = 100, i;
> +	struct mic_bootparam *bootparam = mvdev->mdev->dp;
> +	s8 db = bootparam->h2c_config_db;
> +
> +	mutex_lock(&mvdev->mdev->mic_mutex);
> +	for (i = 0; i < mvdev->dd->num_vq; i++)
> +		mutex_lock_nested(&mvdev->vr_mutex[i], i + 1);
> +
> +	if (db == -1 || mvdev->dd->type == -1) {
> +		ret = -EIO;
> +		goto exit;
> +	}
> +
> +	if (copy_from_user(mic_vq_configspace(mvdev->dd),
> +				argp, mvdev->dd->config_len)) {
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, -EFAULT);
> +		ret = -EFAULT;
> +		goto exit;
> +	}
> +	mvdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
> +	smp_wmb();
> +	mvdev->mdev->ops->send_intr(mvdev->mdev, db);
> +
> +	for (i = retry; i--;) {
> +		ret = wait_event_timeout(wake,
> +			mvdev->dc->guest_ack, msecs_to_jiffies(100));
> +		if (ret)
> +			break;
> +	}
> +
> +	dev_info(mic_dev(mvdev),
> +		"%s %d retry: %d\n", __func__, __LINE__, retry);
> +	mvdev->dc->config_change = 0;
> +	mvdev->dc->guest_ack = 0;
> +exit:
> +	for (i = 0; i < mvdev->dd->num_vq; i++)
> +		mutex_unlock(&mvdev->vr_mutex[i]);
> +	mutex_unlock(&mvdev->mdev->mic_mutex);
> +	return ret;
> +}
> +
> +static int mic_copy_dp_entry(struct mic_vdev *mvdev,
> +					void __user *argp,
> +					__u8 *type,
> +					struct mic_device_desc **devpage)
> +{
> +	struct mic_device *mdev = mvdev->mdev;
> +	struct mic_device_desc dd, *dd_config, *devp;
> +	struct mic_vqconfig *vqconfig;
> +	int ret = 0, i;
> +	bool slot_found = false;
> +
> +	if (copy_from_user(&dd, argp, sizeof(dd))) {
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, -EFAULT);
> +		return -EFAULT;
> +	}
> +
> +	if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE
> +		|| dd.num_vq > MIC_MAX_VRINGS) {
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, -EINVAL);
> +		return -EINVAL;
> +	}
> +
> +	dd_config = kmalloc(mic_desc_size(&dd), GFP_KERNEL);
> +	if (dd_config == NULL) {
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, -ENOMEM);
> +		return -ENOMEM;
> +	}
> +	if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
> +		ret = -EFAULT;
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, ret);
> +		goto exit;
> +	}
> +
> +	vqconfig = mic_vq_config(dd_config);
> +	for (i = 0; i < dd.num_vq; i++) {
> +		if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
> +			ret =  -EINVAL;
> +			dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +				__func__, __LINE__, ret);
> +			goto exit;
> +		}
> +	}
> +
> +	/* Find the first free device page entry */
> +	for (i = mic_aligned_size(struct mic_bootparam);
> +		i < MIC_DP_SIZE - mic_total_desc_size(dd_config);
> +		i += mic_total_desc_size(devp)) {
> +		devp = mdev->dp + i;
> +		if (devp->type == 0 || devp->type == -1) {
> +			slot_found = true;
> +			break;
> +		}
> +	}
> +	if (!slot_found) {
> +		ret =  -EINVAL;
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, ret);
> +		goto exit;
> +	}
> +
> +	/* Save off the type before doing the memcpy. Type will be set in the
> +	 * end after completing all initialization for the new device */
> +	*type = dd_config->type;
> +	dd_config->type = 0;
> +	memcpy(devp, dd_config, mic_desc_size(dd_config));
> +
> +	*devpage = devp;
> +exit:
> +	kfree(dd_config);
> +	return ret;
> +}
> +
> +static void mic_init_device_ctrl(struct mic_vdev *mvdev,
> +				struct mic_device_desc *devpage)
> +{
> +	struct mic_device_ctrl *dc;
> +
> +	dc = mvdev->dc = (void *)devpage + mic_aligned_desc_size(devpage);
> +
> +	dc->config_change = 0;
> +	dc->guest_ack = 0;
> +	dc->vdev_reset = 0;
> +	dc->host_ack = 0;
> +	dc->used_address_updated = 0;
> +	dc->c2h_vdev_db = -1;
> +	dc->h2c_vdev_db = -1;
> +}
> +
> +int mic_virtio_add_device(struct mic_vdev *mvdev,
> +			void __user *argp)
> +{
> +	struct mic_device *mdev = mvdev->mdev;
> +	struct mic_device_desc *dd;
> +	struct mic_vqconfig *vqconfig;
> +	int vr_size, i, j, ret;
> +	u8 type;
> +	s8 db;
> +	char irqname[10];
> +	struct mic_bootparam *bootparam = mdev->dp;
> +	u16 num;
> +
> +	mutex_lock(&mdev->mic_mutex);
> +
> +	ret = mic_copy_dp_entry(mvdev, argp, &type, &dd);
> +	if (ret) {
> +		mutex_unlock(&mdev->mic_mutex);
> +		return ret;
> +	}
> +
> +	mic_init_device_ctrl(mvdev, dd);
> +
> +	mvdev->dd = dd;
> +	mvdev->virtio_id = type;
> +	vqconfig = mic_vq_config(dd);
> +	INIT_WORK(&mvdev->virtio_bh_work, mic_bh_handler);
> +
> +	for (i = 0; i < dd->num_vq; i++) {
> +		struct mic_vring *vr = &mvdev->vring[i];
> +		num = le16_to_cpu(vqconfig[i].num);
> +		mutex_init(&mvdev->vr_mutex[i]);
> +		vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
> +			sizeof(struct _mic_vring_info));
> +		vr->va = (void *)
> +			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> +			get_order(vr_size));
> +		if (!vr->va) {
> +			ret = -ENOMEM;
> +			dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +				__func__, __LINE__, ret);
> +			goto err;
> +		}
> +		vr->len = vr_size;
> +		vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
> +		vr->info->magic = MIC_MAGIC + mvdev->virtio_id + i;
> +		vqconfig[i].address = mic_map_single(mdev,
> +			vr->va, vr_size);
> +		if (mic_map_error(vqconfig[i].address)) {
> +			free_pages((unsigned long)vr->va,
> +				get_order(vr_size));
> +			ret = -ENOMEM;
> +			dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +				__func__, __LINE__, ret);
> +			goto err;
> +		}
> +		vqconfig[i].address = cpu_to_le64(vqconfig[i].address);
> +
> +		vring_init(&vr->vr, num,
> +			vr->va, MIC_VIRTIO_RING_ALIGN);
> +
> +		dev_dbg(&mdev->pdev->dev,
> +			"%s %d index %d va %p info %p vr_size 0x%x\n",
> +			__func__, __LINE__, i, vr->va, vr->info, vr_size);
> +	}
> +
> +	snprintf(irqname, sizeof(irqname),
> +		"mic%dvirtio%d", mdev->id, mvdev->virtio_id);
> +	mvdev->virtio_db = mic_next_db(mdev);
> +	mvdev->virtio_cookie = mic_request_irq(mdev, mic_virtio_intr_handler,
> +			irqname, mvdev, mvdev->virtio_db, MIC_INTR_DB);
> +	if (IS_ERR(mvdev->virtio_cookie)) {
> +		ret = PTR_ERR(mvdev->virtio_cookie);
> +		dev_dbg(&mdev->pdev->dev, "request irq failed\n");
> +		goto err;
> +	}
> +
> +	mvdev->dc->c2h_vdev_db = mvdev->virtio_db;
> +
> +	list_add_tail(&mvdev->list, &mdev->vdev_list);
> +	/*
> +	 * Now that we are completely initialized, set the type to "commit"
> +	 * the addition of the new device.
> +	 * For x86 we only need a compiler barrier before dd->type. For other
> +	 * platforms  we need smp_wmb(..) since we are writing to system memory
> +	 * and type needs to be visible to all CPUs or MIC.
> +	 */
> +	smp_wmb();
> +	dd->type = type;
> +
> +	dev_info(&mdev->pdev->dev, "Added virtio device id %d\n", dd->type);
> +
> +	db = bootparam->h2c_config_db;
> +	if (db != -1)
> +		mdev->ops->send_intr(mdev, db);
> +	mutex_unlock(&mdev->mic_mutex);
> +	return 0;
> +err:
> +	vqconfig = mic_vq_config(dd);
> +	for (j = 0; j < i; j++) {
> +		mic_unmap_single(mdev, le64_to_cpu(vqconfig[j].address),
> +				mvdev->vring[j].len);
> +		free_pages((unsigned long)mvdev->vring[j].va,
> +			get_order(mvdev->vring[j].len));
> +	}
> +	mutex_unlock(&mdev->mic_mutex);
> +	return ret;
> +}
> +
> +void mic_virtio_del_device(struct mic_vdev *mvdev)
> +{
> +	struct list_head *pos, *tmp;
> +	struct mic_vdev *tmp_mvdev;
> +	struct mic_device *mdev = mvdev->mdev;
> +	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
> +	int i, ret, retry = 100;
> +	struct mic_vqconfig *vqconfig;
> +	struct mic_bootparam *bootparam = mdev->dp;
> +	s8 db;
> +
> +	mutex_lock(&mdev->mic_mutex);
> +	db = bootparam->h2c_config_db;
> +	if (db == -1)
> +		goto skip_hot_remove;
> +	dev_info(&mdev->pdev->dev,
> +		"Requesting hot remove id %d\n", mvdev->virtio_id);
> +	mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
> +	smp_wmb();
> +	mdev->ops->send_intr(mdev, db);
> +	for (i = retry; i--;) {
> +		ret = wait_event_timeout(wake,
> +			mvdev->dc->guest_ack, msecs_to_jiffies(100));
> +		if (ret)
> +			break;
> +	}
> +	dev_info(&mdev->pdev->dev,
> +		"Device id %d config_change %d guest_ack %d\n",
> +		mvdev->virtio_id, mvdev->dc->config_change,
> +		mvdev->dc->guest_ack);
> +	mvdev->dc->config_change = 0;
> +	mvdev->dc->guest_ack = 0;
> +skip_hot_remove:
> +	mic_free_irq(mdev, mvdev->virtio_cookie, mvdev);
> +	flush_work(&mvdev->virtio_bh_work);
> +	vqconfig = mic_vq_config(mvdev->dd);
> +	for (i = 0; i < mvdev->dd->num_vq; i++) {
> +		mic_unmap_single(mdev, le64_to_cpu(vqconfig[i].address),
> +				mvdev->vring[i].len);
> +		free_pages((unsigned long)mvdev->vring[i].va,
> +			get_order(mvdev->vring[i].len));
> +	}
> +
> +	list_for_each_safe(pos, tmp, &mdev->vdev_list) {
> +		tmp_mvdev = list_entry(pos, struct mic_vdev, list);
> +		if (tmp_mvdev == mvdev) {
> +			list_del(pos);
> +			dev_info(&mdev->pdev->dev,
> +				"Removing virtio device id %d\n",
> +				mvdev->virtio_id);
> +			break;
> +		}
> +	}
> +	mvdev->dd->type = -1;
> +	mutex_unlock(&mdev->mic_mutex);
> +}
> diff --git a/drivers/misc/mic/host/mic_virtio.h b/drivers/misc/mic/host/mic_virtio.h
> new file mode 100644
> index 0000000..1e2a439
> --- /dev/null
> +++ b/drivers/misc/mic/host/mic_virtio.h
> @@ -0,0 +1,108 @@
> +/*
> + * Intel MIC Platform Software Stack (MPSS)
> + *
> + * Copyright(c) 2013 Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License, version 2, as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
> + * USA.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + *
> + * Intel MIC Host driver.
> + *
> + */
> +#ifndef MIC_VIRTIO_H
> +#define MIC_VIRTIO_H
> +
> +#include <linux/types.h>
> +#include <linux/virtio_ring.h>
> +#include <linux/virtio_config.h>
> +
> +#include <linux/mic_ioctl.h>
> +
> +/*
> + * Note on endianness.
> + * 1. Host can be both BE or LE
> + * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail
> + *    rings and ioreadXX/iowriteXX to access used ring.
> + * 3. Device page exposed by host to guest contains LE values. Guest
> + *    accesses these using ioreadXX/iowriteXX etc. This way in general we
> + *    obey the virtio spec according to which guest works with native
> + *    endianness and host is aware of guest endianness and does all
> + *    required endianness conversion.
> + * 4. Data provided from user space to guest (in ADD_DEVICE and
> + *    CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be
> + *    in guest endianness.
> + */
> +
> +struct mic_vdev {
> +	int virtio_id;
> +	wait_queue_head_t waitq;
> +	struct mic_device *mdev;
> +	int poll_wake;
> +	unsigned long out_bytes;
> +	unsigned long in_bytes;
> +	struct mic_vring vring[MIC_MAX_VRINGS];
> +	struct work_struct virtio_bh_work;
> +	struct mutex vr_mutex[MIC_MAX_VRINGS];
> +	struct mic_device_desc *dd;
> +	struct mic_device_ctrl *dc;
> +	struct list_head list;
> +	int virtio_db;
> +	struct mic_irq *virtio_cookie;
> +};
> +
> +void mic_virtio_uninit(struct mic_device *mdev);
> +int mic_virtio_add_device(struct mic_vdev *mvdev,
> +			void __user *argp);
> +void mic_virtio_del_device(struct mic_vdev *mvdev);
> +int mic_virtio_config_change(struct mic_vdev *mvdev,
> +			void __user *argp);
> +int mic_virtio_copy_desc(struct mic_vdev *mvdev,
> +	struct mic_copy_desc *request);
> +void mic_virtio_reset_devices(struct mic_device *mdev);
> +int mic_virtio_copy_chain(struct mic_vdev *mvdev,
> +	struct mic_copy *request);
> +void mic_bh_handler(struct work_struct *work);
> +
> +static inline struct device *mic_dev(struct mic_vdev *mvdev)
> +{
> +	return &mvdev->mdev->pdev->dev;
> +}
> +
> +static inline int mic_vdev_inited(struct mic_vdev *mvdev)
> +{
> +	/* Device has not been created yet */
> +	if (!mvdev->dd || !mvdev->dd->type) {
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, -EINVAL);
> +		return -EINVAL;
> +	}
> +
> +	/* Device has been removed/deleted */
> +	if (mvdev->dd->type == -1) {
> +		dev_err(mic_dev(mvdev), "%s %d err %d\n",
> +			__func__, __LINE__, -ENODEV);
> +		return -ENODEV;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline bool mic_vdevup(struct mic_vdev *mvdev)
> +{
> +	return !!mvdev->dd->status;
> +}
> +#endif
> diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
> index 8f985dd..1579aab 100644
> --- a/include/uapi/linux/Kbuild
> +++ b/include/uapi/linux/Kbuild
> @@ -240,6 +240,7 @@ header-y += mei.h
>  header-y += mempolicy.h
>  header-y += meye.h
>  header-y += mic_common.h
> +header-y += mic_ioctl.h
>  header-y += mii.h
>  header-y += minix_fs.h
>  header-y += mman.h
> diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h
> index b8edede..2576d0b 100644
> --- a/include/uapi/linux/mic_common.h
> +++ b/include/uapi/linux/mic_common.h
> @@ -26,7 +26,61 @@
>  #ifndef __MIC_COMMON_H_
>  #define __MIC_COMMON_H_
>  
> -#include <linux/types.h>
> +#include <linux/virtio_ring.h>
> +
> +#ifndef __KERNEL__
> +#define ALIGN(a, x)	(((a) + (x) - 1) & ~((x) - 1))
> +#define __aligned(x)	__attribute__ ((aligned(x)))
> +#endif
> +
> +#define mic_aligned_size(x) ALIGN(sizeof(x), 8)
> +
> +
> +/**
> + * struct mic_device_desc: Virtio device information shared between the
> + * virtio driver and userspace backend
> + *
> + * @type: Device type: console/network/disk etc.  Type 0/-1 terminates.
> + * @num_vq: Number of virtqueues.
> + * @feature_len: Number of bytes of feature bits.  Multiply by 2: one for
> +   host features and one for guest acknowledgements.
> + * @config_len: Number of bytes of the config array after virtqueues.
> + * @status: A status byte, written by the Guest.
> + * @config: Start of the following variable length config.
> + */
> +struct mic_device_desc {
> +	__s8 type;
> +	__u8 num_vq;
> +	__u8 feature_len;
> +	__u8 config_len;
> +	__u8 status;
> +	__u64 config[0];
> +} __aligned(8);
> +
> +/**
> + * struct mic_device_ctrl: Per virtio device information in the device page
> + * used internally by the host and card side drivers.
> + *
> + * @vdev: Used for storing MIC vdev information by the guest.
> + * @config_change: Set to 1 by host when a config change is requested.
> + * @vdev_reset: Set to 1 by guest to indicate virtio device has been reset.
> + * @guest_ack: Set to 1 by guest to ack a command.
> + * @host_ack: Set to 1 by host to ack a command.
> + * @used_address_updated: Set to 1 by guest when the used address should be
> + * updated.
> + * @c2h_vdev_db: The doorbell number to be used by guest. Set by host.
> + * @h2c_vdev_db: The doorbell number to be used by host. Set by guest.
> + */
> +struct mic_device_ctrl {
> +	__u64 vdev;
> +	__u8 config_change;
> +	__u8 vdev_reset;
> +	__u8 guest_ack;
> +	__u8 host_ack;
> +	__u8 used_address_updated;
> +	__s8 c2h_vdev_db;
> +	__s8 h2c_vdev_db;
> +} __aligned(8);
>  
>  /**
>   * struct mic_bootparam: Virtio device independent information in device page
> @@ -47,6 +101,115 @@ struct mic_bootparam {
>  	__u8 shutdown_card;
>  } __aligned(8);
>  
> +/**
> + * struct mic_device_page: High level representation of the device page
> + *
> + * @bootparam: The bootparam structure is used for sharing information and
> + * status updates between MIC host and card drivers.
> + * @desc: Array of MIC virtio device descriptors.
> + */
> +struct mic_device_page {
> +	struct mic_bootparam bootparam;
> +	struct mic_device_desc desc[0];
> +};
> +/**
> + * struct mic_vqconfig: This is how we expect the device configuration field
> + * for a virtqueue to be laid out in config space.
> + *
> + * @address: Guest/MIC physical address of the virtio ring
> + * (avail and desc rings)
> + * @used_address: Guest/MIC physical address of the used ring
> + * @num: The number of entries in the virtio_ring
> + */
> +struct mic_vqconfig {
> +	__u64 address;
> +	__u64 used_address;
> +	__u16 num;
> +} __aligned(8);
> +
> +/* The alignment to use between consumer and producer parts of vring.
> + * This is pagesize for historical reasons. */
> +#define MIC_VIRTIO_RING_ALIGN		4096
> +
> +#define MIC_MAX_VRINGS			4
> +#define MIC_VRING_ENTRIES		128
> +
> +/*
> + * Max vring entries (power of 2) to ensure desc and avail rings
> + * fit in a single page
> + */
> +#define MIC_MAX_VRING_ENTRIES		128
> +
> +/**
> + * Max size of the desc block in bytes: includes:
> + *	- struct mic_device_desc
> + *	- struct mic_vqconfig (num_vq of these)
> + *	- host and guest features
> + *	- virtio device config space
> + */
> +#define MIC_MAX_DESC_BLK_SIZE		256
> +
> +/**
> + * struct _mic_vring_info - Host vring info exposed to userspace backend
> + *
> + * @avail_idx: host avail idx
> + * @used_idx: host used idx
> + * @magic: A magic debug cookie.
> + */
> +struct _mic_vring_info {
> +	__u16 avail_idx;
> +	__u16 used_idx;
> +	int magic;
> +};
> +
> +/**
> + * struct mic_vring - Vring information.
> + *
> + * @vr: The virtio ring.
> + * @info: Host vring information exposed to the card.
> + * @va: The va for the buffer allocated for vr and info.
> + * @len: The length of the buffer required for allocating vr and info.
> + */
> +struct mic_vring {
> +	struct vring vr;
> +	struct _mic_vring_info *info;
> +	void *va;
> +	int len;
> +};
> +
> +#define mic_aligned_desc_size(d) ALIGN(mic_desc_size(d), 8)
> +
> +#ifndef INTEL_MIC_CARD
> +static inline unsigned mic_desc_size(const struct mic_device_desc *desc)
> +{
> +	return mic_aligned_size(*desc)
> +		+ desc->num_vq * mic_aligned_size(struct mic_vqconfig)
> +		+ desc->feature_len * 2
> +		+ desc->config_len;
> +}
> +
> +static inline struct mic_vqconfig *
> +mic_vq_config(const struct mic_device_desc *desc)
> +{
> +	return (struct mic_vqconfig *)(desc + 1);
> +}
> +
> +static inline __u8 *mic_vq_features(const struct mic_device_desc *desc)
> +{
> +	return (__u8 *)(mic_vq_config(desc) + desc->num_vq);
> +}
> +
> +static inline __u8 *mic_vq_configspace(const struct mic_device_desc *desc)
> +{
> +	return mic_vq_features(desc) + desc->feature_len * 2;
> +}
> +static inline unsigned mic_total_desc_size(struct mic_device_desc *desc)
> +{
> +	return mic_aligned_desc_size(desc) +
> +		mic_aligned_size(struct mic_device_ctrl);
> +}
> +#endif
> +
>  /* Device page size */
>  #define MIC_DP_SIZE 4096
>  
> diff --git a/include/uapi/linux/mic_ioctl.h b/include/uapi/linux/mic_ioctl.h
> new file mode 100644
> index 0000000..02e1518
> --- /dev/null
> +++ b/include/uapi/linux/mic_ioctl.h
> @@ -0,0 +1,104 @@
> +/*
> + * Intel MIC Platform Software Stack (MPSS)
> + *
> + * Copyright(c) 2013 Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License, version 2, as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
> + * USA.
> + *
> + * The full GNU General Public License is included in this distribution in
> + * the file called "COPYING".
> + *
> + * Intel MIC Host driver.
> + *
> + */
> +#ifndef _MIC_IOCTL_H_
> +#define _MIC_IOCTL_H_
> +
> +#include <linux/mic_common.h>
> +
> +/*
> + * mic_copy - MIC virtio descriptor copy.
> + *
> + * @iov: An array of IOVEC structures containing user space buffers.
> + * @iovcnt: Number of IOVEC structures in iov.
> + * @vr_idx: The vring index.
> + * @desc_idx: The starting desc index.
> + * @out_cookie: A cookie returned by the driver to identify this copy.
> + * @out_len: The aggregate of the total length written to or read from
> + *	the virtio device.
> + */
> +struct mic_copy {
> +#ifdef __KERNEL__
> +	struct iovec __user *iov;
> +#else
> +	struct iovec *iov;
> +#endif
> +	int iovcnt;
> +	__u8 vr_idx;
> +	__u32 desc_idx;
> +	__u64 out_cookie;
> +	__u32 out_len;
> +};
> +
> +/*
> + * mic_copy_desc - MIC virtio copy.
> + *
> + * @copy - MIC virtio descriptor copy.
> + * @used_desc_idx - The desc index to update the used ring with.
> + *		The used index is not updated if the used_idx is -1.
> + * @used_len - The length to update the used ring with.
> + */
> +struct mic_copy_desc {
> +	struct mic_copy copy;
> +	__u32 used_desc_idx;
> +	__u32 used_len;
> +};
> +
> +/*
> + * Add a new virtio device
> + * The (struct mic_device_desc *) pointer points to a device page entry
> + *	for the virtio device consisting of:
> + *	- struct mic_device_desc
> + *	- struct mic_vqconfig (num_vq of these)
> + *	- host and guest features
> + *	- virtio device config space
> + * The total size referenced by the pointer should equal the size returned
> + * by desc_size() in mic_common.h
> + */
> +#define MIC_VIRTIO_ADD_DEVICE _IOWR('s', 1, struct mic_device_desc *)
> +
> +/*
> + * Copy the number of entries in the iovec and update the used index
> + * if requested by the user.
> + */
> +#define MIC_VIRTIO_COPY_DESC	_IOWR('s', 2, struct mic_copy_desc *)
> +
> +/*
> + * Copy iovec entries upto the length of the chain. The number of entries
> + * must be >= the length of the chain else -1 is returned and errno set
> + * to EINVAL.
> + */
> +#define MIC_VIRTIO_COPY_CHAIN _IOWR('s', 3, struct mic_copy *)
> +
> +/*
> + * Notify virtio device of a config change
> + * The (__u8 *) pointer points to config space values for the device
> + * as they should be written into the device page. The total size
> + * referenced by the pointer should equal the config_len field of struct
> + * mic_device_desc.
> + */
> +#define MIC_VIRTIO_CONFIG_CHANGE _IOWR('s', 5, __u8 *)
> +
> +#endif
> -- 
> 1.8.2.1
_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization




[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux