Re: [RFCv2 00/12] Introduce host-side virtio queue and CAIF Virtio.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



"Michael S. Tsirkin" <mst@xxxxxxxxxx> writes:

> On Wed, Dec 05, 2012 at 03:36:58PM +0100, Sjur Brændeland wrote:
>> Feedback on this patch-set is appreciated, particularly on structure
>> and code-reuse between vhost.c and the host-side virtio-queue.
>> I'd also like some suggestions on how to handle the build configuration
>> better - currently there are some unnecessary build dependencies.
>
> Rusty seems to disagree but one of the concerns people have about vhost
> is security; so I value getting as much static checking as we can.  This
> discards __user annotations so this doesn't work for me.

Sometimes, when we generalize code, we lose some type safety.  Callbacks
take void *, for example.  And it happens *all the time* with const.  We
don't create a set of parallel const-safe routines.

Extracting common code where it can be shared provides better, not worse
security, because more people will read it.  I've never audited the
vhost code, for example.

We already have a 'struct vring', we should just use that.

I meant to do more work on this, but I've run out of time :( I was
thinking of a drivers/virtio/virtio_ring.c, and in that we'd put the
wrappers for vhost_net/blk, and for CAIF, etc.  We could at least have a
variant which did the __user etc thing, even if you have to pass in
getdesc().

getu16: get_user or assignment.
getdesc: copy (and check, translate) the descriptor.
getmem/putmem: memcpy or copy_to/from_user.

(Completely untested, of course...)

Thoughts?
Rusty.


/*
 * Helpers for the host side of a virtio ring.
 *
 * Since these may be in userspace, we use (inline) accessors.
 */
#include <uapi/linux/virtio_ring.h>

/* Returns vring->num if empty, -ve on error. */
static inline int __vringh_get_head(const struct vring *vring,
				  int (*getu16)(u16 *val, u16 *p))
{
	u16 last_avail_idx, avail_idx, i, head;
	int err;

	err = getu16(&avail_idx, &vring->avail->idx);
	if (err) {
		vringh_bad("Failed to access avail idx at %p", &vq->avail->idx);
		return err;
	}

	err = getu16(&last_avail_idx, &vring_avail_event(vring));
	if (err) {
		vringh_bad("Failed to access last avail idx at %p",
			   &vring_avail_event(vring));
		return err;
	}

	if (last_avail_idx == avail_idx)
		return vring->num;

	/* Only get avail ring entries after they have been exposed by guest. */
	smp_rmb();

	i = last_avail_idx & (vring->num - 1);

	err = getu16(&head, &vq->avail->ring[i]);
	if (err) {
		vringh_bad("Failed to read head: idx %d address %p",
			   last_avail_idx, &vq->avail->ring[i]);
		return err;
	}

	if (head >= vring->num) {
		vringh_bad("Guest says index %u > %u is available",
			   head, vring->num);
		return -EINVAL;
	}
	return head;
}

struct vringh_access {
	/* Start address. */
	struct vring_desc *start;
	/* Maximum number of entries. */
	u32 max;

	/* Cached descriptor. */
	struct vring_desc desc;
};

/*
 * Initialize the vringh_access structure for this head.
 *
 * For direct buffers, the range is simply the desc[] array in the vring.
 *
 * For indirect buffers, the range is the indirect entry; check() is called
 * to vlidate this range.
 *
 * -error otherwise.
 */
static inline int __vringh_get_access(const struct vring *vring, u16 head,
				    int (*getdesc)(struct vring_desc *dst,
						   const struct vring_desc *src,
						   void *arg),
				    void *arg,
				    struct vringh_access *acc)
{
	int err;

	err = getdesc(&acc->desc, &vring->desc[head], arg);
	if (unlikely(err))
		return err;

	if (acc->desc.flags & VRING_DESC_F_INDIRECT) {
		/* We don't support chained indirects. */
		if (acc->desc.flags & VRING_DESC_F_NEXT)
			return -EINVAL;
		if (unlikely(acc->desc.len % sizeof desc))
			return -EINVAL;

		acc->start = (void *)(long)acc->desc.addr;
		acc->max = acc->desc.len / sizeof(desc);

		if (acc->max > vring->num)
			return -EINVAL;

		/* Force us to read first desc next time. */
		acc->desc.len = 0;
		acc->desc.next = 0;
		acc->desc.flags = VRING_DESC_F_NEXT;
	} else {
		acc->addr = vring->desc;
		acc->max = vring->num;
		acc->head = head;
	}
	return 0;
}

/*
 * Copy some bytes from the vring descriptor.  Returns num copied.
 *
 * You are expected to exhaust the readable descriptors (ie. stuff output
 * from the other side) before looking for writable descriptors.
 */
static inline int __vringh_pull(struct vringh_access *acc, void *dst, u32 len,
			      int (*getdesc)(struct vring_desc *dst,
					     const struct vring_desc *src,
					     void *arg),
			      int (*getmem)(void *dst, const void *src, size_t,
					    void *arg),
			      void *arg)
{
	int err, done = 0;

	while (len) {
		void *src;
		u32 rlen;

		/* Exhausted this descriptor?  Read next. */
		if (acc->desc.len == 0) {
			if (!(acc->desc.flags & VRING_DESC_F_NEXT))
				return done;

			if (acc->desc.next >= acc->max) {
				vringh_bad("Guest chained index %u > %u",
					   desc.next, acc->max);
				return -EINVAL;
			}

			acc->head = acc->desc.next;
			err = getdesc(&acc->desc, acc->start + acc->head, arg);
			if (unlikely(err))
				return err;

			/* No more readable descriptors? */
			if (unlikely(acc->desc.flags & VRING_DESC_F_WRITE))
				return done;
		}

		if (len < acc->desc.len)
			rlen = len;
		else
			rlen = acc->desc.len;

		src = (void *)(long)acc->desc.addr;
		if (unlikely(getmem(dst, src, rlen, arg)))
			return -EFAULT;

		acc->desc.len -= rlen;
		acc->desc.addr += rlen;
		len -= rlen;
		done += rlen;
	}

	return done;
}

/* Fill in iovec containing remaining readable descriptors, return num
 * (can be no more than vring->num). */
static inline int __vringh_riov(struct vringh_access *acc,
			      struct iovec riov[],
			      int (*getdesc)(struct vring_desc *dst,
					     const struct vring_desc *src,
					     void *arg),
			      void *arg)
{
	int err, count = 0;

	while (!(acc->desc.flags & VRING_DESC_F_WRITE)) {
		if (acc->desc.len != 0) {
			if (count == acc->max)
				return -ELOOP;
			riov->iov_base = (void * __user)(long)acc->desc.addr;
			riov->iov_len = acc->desc.len;
			riov++;
			count++;
		}
		if (!(acc->desc.flags & VRING_DESC_F_NEXT))
			break;
		if (acc->desc.next >= acc->max)
			return -EINVAL;

		acc->head = acc->desc.next;
		err = getdesc(&acc->desc, acc->start + acc->head, arg);
		if (unlikely(err))
			return err;
	}
	return count;
}

static inline int __vringh_push(struct vringh_access *acc,
			      const void *src, u32 len,
			      int (*getdesc)(struct vring_desc *dst,
					     const struct vring_desc *src,
					     void *arg),
			      int (*putmem)(void *dst, const void *src, size_t,
					    void *arg),
			      void *arg)
{
	int err, done = 0;

	while (len) {
		void *dst;
		u32 wlen;

		/* Exhausted this descriptor?  Read next. */
		if (acc->desc.len == 0) {
			if (!(acc->desc.flags & VRING_DESC_F_NEXT))
				return done;

			if (acc->desc.next >= acc->max) {
				vringh_bad("Guest chained index %u > %u",
					   desc.next, acc->max);
				return -EINVAL;
			}

			acc->head = acc->desc.next;
			err = getdesc(&acc->desc, acc->start + acc->head, arg);
			if (unlikely(err))
				return err;

			/* Non-writable descriptor out of order? */
			if (unlikely(!(acc->desc.flags & VRING_DESC_F_WRITE)))
				return -EINVAL;
		}

		if (len < acc->desc.len)
			wlen = len;
		else
			wlen = acc->desc.len;

		dst = (void *)(long)acc->desc.addr;
		if (unlikely(putmem(dst, src, wlen, arg)))
			return -EFAULT;

		acc->desc.len -= wlen;
		acc->desc.addr += wlen;
		len -= wlen;
		done += wlen;
	}

	return done;
}

/* Fill in iovec containing remaining writable descriptors, return num
 * (can be no more than vring->num). */
static inline int __vringh_wiov(struct vringh_access *acc,
			      struct iovec wiov[],
			      int (*getdesc)(struct vring_desc *dst,
					     const struct vring_desc *src,
					     void *arg),
			      void *arg)
{
	int err, count = 0;

	while (!(acc->desc.flags & VRING_DESC_F_WRITE)) {
		if (acc->desc.len != 0) {
			if (count == acc->max)
				return -ELOOP;
			wiov->iov_base = (void * __user)(long)acc->desc.addr;
			wiov->iov_len = acc->desc.len;
			wiov++;
			count++;
		}
		if (!(acc->desc.flags & VRING_DESC_F_NEXT))
			break;
		if (acc->desc.next >= acc->max)
			return -EINVAL;

		acc->head = acc->desc.next;
		err = getdesc(&acc->desc, acc->start + acc->head, arg);
		if (unlikely(err))
			return err;
	}
	return count;
}
_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization



[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux