[RFC/PATCH LGUEST X86_64 09/13] lguest64 devices

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



plain text document attachment (lguest64-device.patch)
We started working a little bit on the devices for lguest64.
This is still very much a work-in-progress and needs much more work.

Signed-off-by: Steven Rostedt <srostedt@xxxxxxxxxx>
Signed-off-by: Glauber de Oliveira Costa <glommer@xxxxxxxxx>
Cc: Chris Wright <chrisw@xxxxxxxxxxxx>



Index: work-pv/include/asm-x86_64/lguest_device.h
===================================================================
--- /dev/null
+++ work-pv/include/asm-x86_64/lguest_device.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_LGUEST_DEVICE_H
+#define _ASM_LGUEST_DEVICE_H
+/* Everything you need to know about lguest devices. */
+#include <linux/device.h>
+#include <asm/lguest.h>
+#include <asm/lguest_user.h>
+
+struct lguest_device {
+	/* Unique busid, and index into lguest_page->devices[] */
+	/* By convention, each device can use irq index+1 if it wants to. */
+	unsigned int index;
+
+	struct device dev;
+
+	/* Driver can hang data off here. */
+	void *private;
+};
+
+struct lguest_driver {
+	const char *name;
+	struct module *owner;
+	u16 device_type;
+	int (*probe)(struct lguest_device *dev);
+	void (*remove)(struct lguest_device *dev);
+
+	struct device_driver drv;
+};
+
+extern int register_lguest_driver(struct lguest_driver *drv);
+extern void unregister_lguest_driver(struct lguest_driver *drv);
+#endif /* _ASM_LGUEST_DEVICE_H */
Index: work-pv/arch/x86_64/lguest/lguest_bus.c
===================================================================
--- /dev/null
+++ work-pv/arch/x86_64/lguest/lguest_bus.c
@@ -0,0 +1,180 @@
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <asm/lguest_device.h>
+#include <asm/lguest.h>
+#include <asm/io.h>
+
+static ssize_t type_show(struct device *_dev,
+                         struct device_attribute *attr, char *buf)
+{
+	struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+	return sprintf(buf, "%hu", lguest_devices[dev->index].type);
+}
+static ssize_t features_show(struct device *_dev,
+                             struct device_attribute *attr, char *buf)
+{
+	struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+	return sprintf(buf, "%hx", lguest_devices[dev->index].features);
+}
+static ssize_t pfn_show(struct device *_dev,
+			 struct device_attribute *attr, char *buf)
+{
+	struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+	return sprintf(buf, "%llu", lguest_devices[dev->index].pfn);
+}
+static ssize_t status_show(struct device *_dev,
+                           struct device_attribute *attr, char *buf)
+{
+	struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+	return sprintf(buf, "%hx", lguest_devices[dev->index].status);
+}
+static ssize_t status_store(struct device *_dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+{
+	struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+	if (sscanf(buf, "%hi", &lguest_devices[dev->index].status) != 1)
+		return -EINVAL;
+	return count;
+}
+static struct device_attribute lguest_dev_attrs[] = {
+	__ATTR_RO(type),
+	__ATTR_RO(features),
+	__ATTR_RO(pfn),
+	__ATTR(status, 0644, status_show, status_store),
+	__ATTR_NULL
+};
+
+static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
+{
+	struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+	struct lguest_driver *drv = container_of(_drv,struct lguest_driver,drv);
+
+	return (drv->device_type == lguest_devices[dev->index].type);
+}
+
+struct lguest_bus {
+	struct bus_type bus;
+	struct device dev;
+};
+
+static struct lguest_bus lguest_bus = {
+	.bus = {
+		.name  = "lguest",
+		.match = lguest_dev_match,
+		.dev_attrs = lguest_dev_attrs,
+	},
+	.dev = {
+		.parent = NULL,
+		.bus_id = "lguest",
+	}
+};
+
+static int lguest_dev_probe(struct device *_dev)
+{
+	int ret;
+	struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+	struct lguest_driver *drv = container_of(dev->dev.driver,
+						struct lguest_driver, drv);
+
+	lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER;
+	ret = drv->probe(dev);
+	if (ret == 0)
+		lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER_OK;
+	return ret;
+}
+
+static int lguest_dev_remove(struct device *_dev)
+{
+	struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+	struct lguest_driver *drv = container_of(dev->dev.driver,
+						struct lguest_driver, drv);
+
+	if (dev->dev.driver && drv->remove)
+		drv->remove(dev);
+	put_device(&dev->dev);
+	return 0;
+}
+
+int register_lguest_driver(struct lguest_driver *drv)
+{
+	if (!lguest_devices)
+		return 0;
+
+	drv->drv.bus = &lguest_bus.bus;
+	drv->drv.name = drv->name;
+	drv->drv.owner = drv->owner;
+	drv->drv.probe = lguest_dev_probe;
+	drv->drv.remove = lguest_dev_remove;
+
+	return driver_register(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(register_lguest_driver);
+
+void unregister_lguest_driver(struct lguest_driver *drv)
+{
+	if (!lguest_devices)
+		return;
+
+	driver_unregister(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(unregister_lguest_driver);
+
+static void release_lguest_device(struct device *_dev)
+{
+	struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+
+	lguest_devices[dev->index].status |= LGUEST_DEVICE_S_REMOVED_ACK;
+	kfree(dev);
+}
+
+static void add_lguest_device(unsigned int index)
+{
+	struct lguest_device *new;
+
+	lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE;
+	new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL);
+	if (!new) {
+		printk(KERN_EMERG "Cannot allocate lguest device %u\n", index);
+		lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
+		return;
+	}
+
+	new->index = index;
+	new->private = NULL;
+	memset(&new->dev, 0, sizeof(new->dev));
+	new->dev.parent = &lguest_bus.dev;
+	new->dev.bus = &lguest_bus.bus;
+	new->dev.release = release_lguest_device;
+	sprintf(new->dev.bus_id, "%u", index);
+	if (device_register(&new->dev) != 0) {
+		printk(KERN_EMERG "Cannot register lguest device %u\n", index);
+		lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
+		kfree(new);
+	}
+}
+
+static void scan_devices(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < LGUEST_MAX_DEVICES; i++)
+		if (lguest_devices[i].type)
+			add_lguest_device(i);
+}
+
+static int __init lguest_bus_init(void)
+{
+	if (strcmp(paravirt_ops.name, "lguest") != 0)
+		return 0;
+
+	/* Devices are in page above top of "normal" mem. */
+	lguest_devices = ioremap(max_pfn << PAGE_SHIFT, PAGE_SIZE);
+
+	if (bus_register(&lguest_bus.bus) != 0
+	    || device_register(&lguest_bus.dev) != 0)
+		panic("lguest bus registration failed");
+
+	scan_devices();
+	return 0;
+}
+postcore_initcall(lguest_bus_init);
Index: work-pv/arch/x86_64/lguest/io.c
===================================================================
--- /dev/null
+++ work-pv/arch/x86_64/lguest/io.c
@@ -0,0 +1,425 @@
+/* Simple I/O model for guests, based on shared memory.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+#include <linux/types.h>
+#include <linux/futex.h>
+#include <linux/jhash.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/uaccess.h>
+#include <asm/lguest.h>
+#include <asm/lguest_user.h>
+#include "lguest.h"
+
+static struct list_head dma_hash[64];
+
+/* FIXME: allow multi-page lengths. */
+static int check_dma_list(struct lguest_guest_info *linfo,
+				const struct lguest_dma *dma)
+{
+	unsigned int i;
+
+	for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
+		if (!dma->len[i])
+			return 1;
+		if (!lguest_address_ok(linfo, dma->addr[i]))
+			goto kill;
+		if (dma->len[i] > PAGE_SIZE)
+			goto kill;
+		/* We could do over a page, but is it worth it? */
+		if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE)
+			goto kill;
+	}
+	return 1;
+
+kill:
+	kill_guest(linfo, "bad DMA entry: %u@%#llx", dma->len[i], dma->addr[i]);
+	return 0;
+}
+
+static unsigned int hash(const union futex_key *key)
+{
+	return jhash2((u32*)&key->both.word,
+		      (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
+		      key->both.offset)
+		% ARRAY_SIZE(dma_hash);
+}
+
+/* Must hold read lock on dmainfo owner's current->mm->mmap_sem */
+static void unlink_dma(struct lguest_dma_info *dmainfo)
+{
+	BUG_ON(!mutex_is_locked(&lguest_lock));
+	dmainfo->interrupt = 0;
+	list_del(&dmainfo->list);
+	drop_futex_key_refs(&dmainfo->key);
+}
+
+static inline int key_eq(const union futex_key *a, const union futex_key *b)
+{
+	return (a->both.word == b->both.word
+		&& a->both.ptr == b->both.ptr
+		&& a->both.offset == b->both.offset);
+}
+
+static u32 unbind_dma(struct lguest_guest_info *linfo,
+		      const union futex_key *key,
+		      unsigned long dmas)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < LGUEST_MAX_DMA; i++) {
+		if (key_eq(key, &linfo->dma[i].key) && dmas == linfo->dma[i].dmas) {
+			unlink_dma(&linfo->dma[i]);
+			ret = 1;
+			break;
+		}
+	}
+	return ret;
+}
+
+u32 bind_dma(struct lguest_guest_info *linfo, unsigned long addr,
+				unsigned long dmas, u16 numdmas, u8 interrupt)
+{
+	unsigned int i;
+	u32 ret = 0;
+	union futex_key key;
+
+	printk("inside the handler, with args: %lx, %lx, %x, %x\n",addr,dmas,numdmas,interrupt);
+	if (interrupt >= LGUEST_IRQS)
+		return 0;
+
+	mutex_lock(&lguest_lock);
+	down_read(&current->mm->mmap_sem);
+	printk("Trying to get futex key...  ");
+	if (get_futex_key((u32 __user *)addr, &key) != 0) {
+		kill_guest(linfo, "bad dma address %#lx", addr);
+		goto unlock;
+	}
+	printk("Got it.\n");
+	get_futex_key_refs(&key);
+
+	if (interrupt == 0)
+		ret = unbind_dma(linfo, &key, dmas);
+	else {
+		for (i = 0; i < LGUEST_MAX_DMA; i++) {
+			if (linfo->dma[i].interrupt == 0) {
+				linfo->dma[i].dmas = dmas;
+				linfo->dma[i].num_dmas = numdmas;
+				linfo->dma[i].next_dma = 0;
+				linfo->dma[i].key = key;
+				linfo->dma[i].guest_id = linfo->guest_id;
+				linfo->dma[i].interrupt = interrupt;
+				list_add(&linfo->dma[i].list,
+					 &dma_hash[hash(&key)]);
+				ret = 1;
+				printk("Will return, holding a reference\n");
+				goto unlock;
+			}
+		}
+	}
+	printk("Will return, _without_ a reference\n");
+	drop_futex_key_refs(&key);
+unlock:
+ 	up_read(&current->mm->mmap_sem);
+	mutex_unlock(&lguest_lock);
+	return ret;
+}
+/* lhread from another guest */
+static int lhread_other(struct lguest_guest_info *linfo,
+			void *buf, u32 addr, unsigned bytes)
+{
+	if (addr + bytes < addr
+	    || !lguest_address_ok(linfo, addr+bytes)
+	    || access_process_vm(linfo->tsk, addr, buf, bytes, 0) != bytes) {
+		memset(buf, 0, bytes);
+		kill_guest(linfo, "bad address in registered DMA struct");
+		return 0;
+	}
+	return 1;
+}
+
+/* lhwrite to another guest */
+static int lhwrite_other(struct lguest_guest_info *linfo, u32 addr,
+			 const void *buf, unsigned bytes)
+{
+	if (addr + bytes < addr
+	    || !lguest_address_ok(linfo, addr+bytes)
+	    || (access_process_vm(linfo->tsk, addr, (void *)buf, bytes, 1)
+		!= bytes)) {
+		kill_guest(linfo, "bad address writing to registered DMA");
+		return 0;
+	}
+	return 1;
+}
+
+static u32 copy_data(const struct lguest_dma *src,
+		     const struct lguest_dma *dst,
+		     struct page *pages[])
+{
+	unsigned int totlen, si, di, srcoff, dstoff;
+	void *maddr = NULL;
+
+	totlen = 0;
+	si = di = 0;
+	srcoff = dstoff = 0;
+	while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si]
+	       && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) {
+		u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff);
+
+		if (!maddr)
+			maddr = kmap(pages[di]);
+
+		/* FIXME: This is not completely portable, since
+		   archs do different things for copy_to_user_page. */
+		if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE,
+				   (void *__user)src->addr[si], len) != 0) {
+			totlen = 0;
+			break;
+		}
+
+		totlen += len;
+		srcoff += len;
+		dstoff += len;
+		if (srcoff == src->len[si]) {
+			si++;
+			srcoff = 0;
+		}
+		if (dstoff == dst->len[di]) {
+			kunmap(pages[di]);
+			maddr = NULL;
+			di++;
+			dstoff = 0;
+		}
+	}
+
+	if (maddr)
+		kunmap(pages[di]);
+
+	return totlen;
+}
+
+/* Src is us, ie. current. */
+static u32 do_dma(struct lguest_guest_info *srclg, const struct lguest_dma *src,
+		  struct lguest_guest_info *dstlg, const struct lguest_dma *dst)
+{
+	int i;
+	u32 ret;
+	struct page *pages[LGUEST_MAX_DMA_SECTIONS];
+
+	if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src))
+		return 0;
+
+	/* First get the destination pages */
+	for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
+		if (dst->len[i] == 0)
+			break;
+		if (get_user_pages(dstlg->tsk, dstlg->mm,
+				   dst->addr[i], 1, 1, 1, pages+i, NULL)
+		    != 1) {
+			ret = 0;
+			goto drop_pages;
+		}
+	}
+
+	/* Now copy until we run out of src or dst. */
+	ret = copy_data(src, dst, pages);
+
+drop_pages:
+	while (--i >= 0)
+		put_page(pages[i]);
+	return ret;
+}
+
+/* We cache one process to wakeup: helps for batching & wakes outside locks. */
+void set_wakeup_process(struct lguest_guest_info *linfo,
+						struct task_struct *p)
+{
+	if (p == linfo->wake)
+		return;
+
+	if (linfo->wake) {
+		wake_up_process(linfo->wake);
+		put_task_struct(linfo->wake);
+	}
+	linfo->wake = p;
+	if (linfo->wake)
+		get_task_struct(linfo->wake);
+}
+
+static int dma_transfer(struct lguest_guest_info *srclg,
+			unsigned long udma,
+			struct lguest_dma_info *dst)
+{
+#if 0
+	struct lguest_dma dst_dma, src_dma;
+	struct lguest_guest_info *dstlg;
+	u32 i, dma = 0;
+
+	dstlg = &lguests[dst->guest_id];
+	/* Get our dma list. */
+	lhread(srclg, &src_dma, udma, sizeof(src_dma));
+
+	/* We can't deadlock against them dmaing to us, because this
+	 * is all under the lguest_lock. */
+	down_read(&dstlg->mm->mmap_sem);
+
+	for (i = 0; i < dst->num_dmas; i++) {
+		dma = (dst->next_dma + i) % dst->num_dmas;
+		if (!lhread_other(dstlg, &dst_dma,
+				  dst->dmas + dma * sizeof(struct lguest_dma),
+				  sizeof(dst_dma))) {
+			goto fail;
+		}
+		if (!dst_dma.used_len)
+			break;
+	}
+	if (i != dst->num_dmas) {
+		unsigned long used_lenp;
+		unsigned int ret;
+
+		ret = do_dma(srclg, &src_dma, dstlg, &dst_dma);
+		/* Put used length in src. */
+		lhwrite_u32(srclg,
+			    udma+offsetof(struct lguest_dma, used_len), ret);
+		if (ret == 0 && src_dma.len[0] != 0)
+			goto fail;
+
+		/* Make sure destination sees contents before length. */
+		mb();
+		used_lenp = dst->dmas
+			+ dma * sizeof(struct lguest_dma)
+			+ offsetof(struct lguest_dma, used_len);
+		lhwrite_other(dstlg, used_lenp, &ret, sizeof(ret));
+		dst->next_dma++;
+	}
+ 	up_read(&dstlg->mm->mmap_sem);
+
+	/* Do this last so dst doesn't simply sleep on lock. */
+	set_bit(dst->interrupt, dstlg->irqs_pending);
+	set_wakeup_process(srclg, dstlg->tsk);
+	return i == dst->num_dmas;
+
+fail:
+	up_read(&dstlg->mm->mmap_sem);
+#endif
+	return 0;
+}
+
+int send_dma(struct lguest_guest_info *linfo, unsigned long addr,
+							unsigned long udma)
+{
+	union futex_key key;
+	int pending = 0, empty = 0;
+
+	printk("inside send_dma, with args: %lx, %lx\n",addr,udma);
+again:
+	mutex_lock(&lguest_lock);
+	down_read(&current->mm->mmap_sem);
+	if (get_futex_key((u32 __user *)addr, &key) != 0) {
+		kill_guest(linfo, "bad sending DMA address");
+		goto unlock;
+	}
+	/* Shared mapping?  Look for other guests... */
+	if (key.shared.offset & 1) {
+		struct lguest_dma_info *i, *n;
+		list_for_each_entry_safe(i, n, &dma_hash[hash(&key)], list) {
+			if (i->guest_id == linfo->guest_id)
+				continue;
+			if (!key_eq(&key, &i->key))
+				continue;
+
+			empty += dma_transfer(linfo, udma, i);
+			break;
+		}
+		if (empty == 1) {
+			/* Give any recipients one chance to restock. */
+			up_read(&current->mm->mmap_sem);
+			mutex_unlock(&lguest_lock);
+			yield();
+			empty++;
+			goto again;
+		}
+		pending = 0;
+	} else {
+		/* Private mapping: tell our userspace. */
+		linfo->dma_is_pending = 1;
+		linfo->pending_dma = udma;
+		linfo->pending_addr = addr;
+		pending = 1;
+	}
+unlock:
+	up_read(&current->mm->mmap_sem);
+	mutex_unlock(&lguest_lock);
+	printk("Returning send_dma with pending: %x\n",pending);
+	return pending;
+}
+void release_all_dma(struct lguest_guest_info *linfo)
+{
+	unsigned int i;
+
+	BUG_ON(!mutex_is_locked(&lguest_lock));
+
+	down_read(&linfo->mm->mmap_sem);
+	for (i = 0; i < LGUEST_MAX_DMA; i++) {
+		if (linfo->dma[i].interrupt)
+			unlink_dma(&linfo->dma[i]);
+	}
+	up_read(&linfo->mm->mmap_sem);
+}
+
+/* Userspace wants a dma buffer from this guest. */
+unsigned long get_dma_buffer(struct lguest_guest_info *linfo,
+			     unsigned long addr, unsigned long *interrupt)
+{
+	unsigned long ret = 0;
+	union futex_key key;
+	struct lguest_dma_info *i;
+
+	mutex_lock(&lguest_lock);
+	down_read(&current->mm->mmap_sem);
+	if (get_futex_key((u32 __user *)addr, &key) != 0) {
+		kill_guest(linfo, "bad registered DMA buffer");
+		goto unlock;
+	}
+	list_for_each_entry(i, &dma_hash[hash(&key)], list) {
+		if (key_eq(&key, &i->key) && i->guest_id == linfo->guest_id) {
+			unsigned int j;
+			for (j = 0; j < i->num_dmas; j++) {
+				struct lguest_dma dma;
+
+				ret = i->dmas + j * sizeof(struct lguest_dma);
+				lhread(linfo, &dma, ret, sizeof(dma));
+				if (dma.used_len == 0)
+					break;
+			}
+			*interrupt = i->interrupt;
+			break;
+		}
+	}
+unlock:
+	up_read(&current->mm->mmap_sem);
+	mutex_unlock(&lguest_lock);
+	return ret;
+}
+
+void lguest_io_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(dma_hash); i++)
+		INIT_LIST_HEAD(&dma_hash[i]);
+}

--

_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxx
https://lists.osdl.org/mailman/listinfo/virtualization


[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux