[PATCH RFC 2/5] vringfd base/offset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



It turns out the lguest (and possibly kvm) want the addresses in the
ring buffer to only cover a certain part of memory, and be offset.

It makes sense that this be an ioctl.

Signed-off-by: Rusty Russell <rusty@xxxxxxxxxxxxxxx>

diff -r 08fb00b8acab Documentation/ioctl-number.txt
--- a/Documentation/ioctl-number.txt	Sat Apr 05 21:31:40 2008 +1100
+++ b/Documentation/ioctl-number.txt	Sat Apr 05 22:00:10 2008 +1100
@@ -183,6 +183,7 @@ 0xAC	00-1F	linux/raw.h
 0xAC	00-1F	linux/raw.h
 0xAD	00	Netfilter device	in development:
 					<mailto:rusty@xxxxxxxxxxxxxxx>	
+0xAE	00-01	linux/vring.h
 0xB0	all	RATIO devices		in development:
 					<mailto:vgo@xxxxxxxx>
 0xB1	00-1F	PPPoX			<mailto:mostrows@xxxxxxxxxxxxxxxxx>
diff -r 08fb00b8acab fs/vring.c
--- a/fs/vring.c	Sat Apr 05 21:31:40 2008 +1100
+++ b/fs/vring.c	Sat Apr 05 22:00:10 2008 +1100
@@ -38,6 +38,8 @@ struct vring_info
 	u16 mask;
 	u16 __user *last_used;
 	u16 last_avail;
+
+	unsigned long base, limit;
 
 	const struct vring_ops *ops;
 	void *ops_data;
@@ -120,10 +122,30 @@ static int vring_release(struct inode *i
 	return 0;
 }
 
+static int vring_ioctl(struct inode *in, struct file *filp,
+		       unsigned int cmd, unsigned long arg)
+{
+	struct vring_info *vr = filp->private_data;
+
+	switch (cmd) {
+	case VRINGSETBASE:
+		vr->base = arg;
+		break;
+	case VRINGSETLIMIT:
+		vr->limit = arg;
+		break;
+	default:
+		return -ENOTTY;
+	}
+
+	return 0;
+}
+
 static const struct file_operations vring_fops = {
 	.release	= vring_release,
 	.write		= vring_write,
 	.poll		= vring_poll,
+	.ioctl		= vring_ioctl,
 };
 
 asmlinkage long sys_vringfd(void __user *addr,
@@ -166,6 +188,8 @@ asmlinkage long sys_vringfd(void __user 
 	vr->mask = num_descs - 1;
 	vr->ops = NULL;
 	vr->used = NULL;
+	vr->limit = -1UL;
+	vr->base = 0;
 
 	err = get_user(vr->last_avail, &vr->ring.avail->idx);
 	if (err)
@@ -208,12 +232,15 @@ int vring_get_buffer(struct vring_info *
 		out_len = &dummy;
 
 	*in_len = *out_len = 0;
-	
-	if (unlikely(get_user(head, &vr->ring.avail->ring[head]) != 0))
+
+	if (unlikely(get_user(head, &vr->ring.avail->ring[vr->last_avail
+							  % vr->ring.num])))
 		return -EFAULT;
 
 	i = head;
 	do {
+		void __user *base;
+
 		if (unlikely(i >= vr->ring.num)) {
 			pr_debug("vring: bad index: %u\n", i);
 			return -EINVAL;
@@ -222,24 +249,38 @@ int vring_get_buffer(struct vring_info *
 		if (copy_from_user(&d, &vr->ring.desc[i], sizeof(d)) != 0)
 			return -EFAULT;
 
+		if (d.addr + d.len > vr->limit || (d.addr + d.len < d.addr)) {
+			pr_debug("vring: bad addr/len: %u@%p\n", 
+				 d.len, (void *)(unsigned long)d.addr);
+			return -EINVAL;
+		}
+
+		base = (void __user *)(unsigned long)d.addr + vr->base;
+
 		if (d.flags & VRING_DESC_F_WRITE) {
 			/* Check for length and iovec overflows */
-			if (!num_in)
+			if (!num_in) {
+				pr_debug("vring: writable desc %u in ring %p\n",
+				         i, vr->ring.desc);
 				return -EINVAL;
+			}
 			if (in == *num_in || *in_len + d.len < *in_len)
 				return -E2BIG;
 			in_iov[in].iov_len = d.len;
 			*in_len += d.len;
-			in_iov[in].iov_base = (void __user*)(long)d.addr;
+			in_iov[in].iov_base = base;
 			in++;
 		} else {
-			if (!num_out)
+			if (!num_out) {
+				pr_debug("vring: readable desc %u in ring %p\n",
+				         i, vr->ring.desc);
 				return -EINVAL;
+			}
 			if (out == *num_out || *out_len + d.len < *out_len)
 				return -E2BIG;
 			out_iov[out].iov_len = d.len;
 			*out_len += d.len;
-			out_iov[out].iov_base = (void __user*)(long)d.addr;
+			out_iov[out].iov_base = base;
 			out++;
 		}
 
diff -r 08fb00b8acab include/linux/vring.h
--- a/include/linux/vring.h	Sat Apr 05 21:31:40 2008 +1100
+++ b/include/linux/vring.h	Sat Apr 05 22:00:10 2008 +1100
@@ -18,7 +18,13 @@
  */
 #ifndef _LINUX_VRING_H
 #define _LINUX_VRING_H
+#include <linux/types.h>
 
+/* Ioctl defines, as in "ioctls are AEgly". */
+#define VRINGSETBASE	_IO(0xAE, 0)
+#define VRINGSETLIMIT	_IO(0xAE, 1)
+
+#ifdef __KERNEL__
 /* All members are optional */
 struct vring_ops
 {
@@ -51,4 +57,6 @@ void vring_used_buffer_atomic(struct vri
 void vring_used_buffer_atomic(struct vring_info *vr, int id, u32 len);
 
 void vring_wake(struct vring_info *vr);
+#endif /* __KERNEL__ */
+
 #endif /* _LINUX_VRING_H */
_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linux-foundation.org/mailman/listinfo/virtualization

[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux