Add support for shiming a userspace PCI object in qemu-kvm to represent the VBUS object located in the kernel. This presents a PCI bridge to the system using standard PCI mechanisms such as MSI interrupt routing. A guest may then optionally install a PCI driver for this bridge to gain access to the virtual-devices behind it. This is the userspace support for the kernel VBUS project, release v3 which you can find here: http://lkml.org/lkml/2009/4/21/408 Note that this patch uses MSIs, which has several implications: 1) This patch has a prerequisite on: http://git.kernel.org/?p=linux/kernel/git/ghaskins/vbus/kvm-userspace.git;a=commitdiff;h=2e0a71f5b234b288bad1904e8f9e1d1e6504d4b9 At the time of this writing, this patch has been accepted upstream into Avi's tree, but has not yet synced out to kernel.org 2) We need to surface 8 IRQ vectors on the bridge to fully represent 8 levels of priority. However, we utilize multi-MSI which may have spotty support in the guests (in fact x86 linux doesnt support multi-MSI). In these cases, the guest will only see one queue and therefore only have one flat priority. We will move to surfacing these GSIs as MSI-X in a future release to work around this issue. Signed-off-by: Gregory Haskins <ghaskins@xxxxxxxxxx> --- libkvm/libkvm.c | 14 ++++ libkvm/libkvm.h | 2 + qemu/Makefile.target | 1 qemu/hw/pc.c | 5 ++ qemu/hw/pci.h | 3 + qemu/hw/vbus.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++ qemu/hw/vbus.h | 2 + 7 files changed, 183 insertions(+), 0 deletions(-) create mode 100644 qemu/hw/vbus.c create mode 100644 qemu/hw/vbus.h diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c index 0610e3f..8b49096 100644 --- a/libkvm/libkvm.c +++ b/libkvm/libkvm.c @@ -1440,3 +1440,17 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm, return ret; } #endif + +int kvm_vbus_assign_gsi(kvm_context_t kvm, int queue, int gsi) +{ + int r; + struct kvm_vbus_gsi data = { + .queue = queue, + .gsi = gsi, + }; + + r = ioctl(kvm->vm_fd, KVM_VBUS_ASSIGN_GSI, &data); + if (r == -1) + r = -errno; + return r; +} diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h index ce6f054..c4320bd 100644 --- a/libkvm/libkvm.h +++ b/libkvm/libkvm.h @@ -856,6 +856,8 @@ int kvm_commit_irq_routes(kvm_context_t kvm); */ int kvm_get_irq_route_gsi(kvm_context_t kvm); +int kvm_vbus_assign_gsi(kvm_context_t kvm, int queue, int gsi); + #ifdef KVM_CAP_DEVICE_MSIX int kvm_assign_set_msix_nr(kvm_context_t kvm, struct kvm_assigned_msix_nr *msix_nr); diff --git a/qemu/Makefile.target b/qemu/Makefile.target index f89a9ec..5a6cc7a 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -547,6 +547,7 @@ OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o dma-helpers.o # virtio has to be here due to weird dependency between PCI and virtio-net. # need to fix this properly OBJS+=virtio.o virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o +OBJS+=vbus.o OBJS+=fw_cfg.o ifdef CONFIG_KVM OBJS+=kvm.o kvm-all.o diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c index 19d75b9..e90310a 100644 --- a/qemu/hw/pc.c +++ b/qemu/hw/pc.c @@ -38,6 +38,7 @@ #include "virtio-console.h" #include "hpet_emul.h" #include "device-assignment.h" +#include "vbus.h" #include "qemu-kvm.h" @@ -1076,6 +1077,10 @@ vga_bios_error: } } + if (pci_enabled) { + pci_vbus_init(pci_bus); + } + for(i = 0; i < nb_nics; i++) { NICInfo *nd = &nd_table[i]; diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h index 890a41b..2274df3 100644 --- a/qemu/hw/pci.h +++ b/qemu/hw/pci.h @@ -71,6 +71,9 @@ extern target_phys_addr_t pci_mem_base; #define PCI_DEVICE_ID_VIRTIO_BALLOON 0x1002 #define PCI_DEVICE_ID_VIRTIO_CONSOLE 0x1003 +#define PCI_VENDOR_ID_NOVELL 0x11da +#define PCI_DEVICE_ID_VIRTUAL_BUS 0x2000 + typedef void PCIConfigWriteFunc(PCIDevice *pci_dev, uint32_t address, uint32_t data, int len); typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev, diff --git a/qemu/hw/vbus.c b/qemu/hw/vbus.c new file mode 100644 index 0000000..62cfdfd --- /dev/null +++ b/qemu/hw/vbus.c @@ -0,0 +1,156 @@ +/* + * Add in-kernel "vbus" device support by surfacing a PCI->OTHER bridge + * + * Copyright (c) 2009, Novell Inc, Gregory Haskins <ghaskins@xxxxxxxxxx> + * + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/io.h> +#include <sys/types.h> +#include <sys/stat.h> +#include "qemu-kvm.h" +#include "hw.h" +#include "pci.h" + +#include "vbus.h" + +#define PCI_CAP_ID_MSI 0x05 + +#define PCI_MSI_FLAGS 2 /* Various flags */ +#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */ +#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */ +#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */ +#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */ +#define PCI_MSI_FLAGS_MASKBIT 0x100 /* 64-bit mask bits allowed */ +#define PCI_MSI_RFU 3 /* Rest of capability flags */ +#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ +#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ +#define PCI_MSI_MASK_BIT 16 /* Mask bits register */ + +struct VirtualBus { + PCIDevice dev; + struct kvm_irq_routing_entry irq[8]; + int enabled:1; +}; + +static struct VirtualBus *to_bus(PCIDevice *dev) +{ + return (struct VirtualBus*)dev; +} + +static int +vbus_pci_cap_init(PCIDevice *dev) +{ + int offset = dev->cap.start; + + dev->cap.length = 0; + + memset(&dev->config[offset], 0, PCI_CAPABILITY_CONFIG_MSI_LENGTH); + dev->config[offset] = PCI_CAP_ID_MSI; + dev->config[offset+PCI_MSI_FLAGS] = 0x06; /* request 8 vectors */ + dev->cap.length += PCI_CAPABILITY_CONFIG_MSI_LENGTH; + + return 0; +} + +static void +vbus_pci_cap_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) +{ + struct VirtualBus *bus = to_bus(dev); + unsigned int pos = dev->cap.start; + unsigned int ctrl = pos + PCI_MSI_FLAGS; + + pci_default_cap_write_config(dev, addr, val, len); + + /* Check if this is not a write to the control register. */ + if (!(addr <= ctrl && (addr + len) > ctrl)) + return; + + /* + * We only get here if this is a write to the control register, + * but we only emulate the PIO side-effects if this is the first + * time we have seen an MSI_ENABLE operation. I.e. all MSI_DISABLE + * and subsequent MSI_ENABLE operations are ignored + */ + if (!bus->enabled && val & 1) { + int i, total; + uint8_t flags = dev->config[pos+PCI_MSI_FLAGS]; + + total = 1 << ((flags & PCI_MSI_FLAGS_QSIZE) >> 4); + + if (total > 8) + total = 8; + + /* We need to register a GSI for each vector returned */ + for (i = 0; i < total; i++) { + struct kvm_irq_routing_entry *irq = &bus->irq[i]; + uint32_t addr; + uint16_t data; + + addr = *(uint32_t *)&dev->config[pos + PCI_MSI_ADDRESS_LO]; + + data = *(uint16_t *)&dev->config[pos + PCI_MSI_DATA_32]; + data += i; + + irq->u.msi.address_lo = addr; + irq->u.msi.address_hi = 0; + irq->u.msi.data = data; + + irq->type = KVM_IRQ_ROUTING_MSI; + + irq->gsi = kvm_get_irq_route_gsi(kvm_context); + if (irq->gsi < 0) { + perror("vbus: kvm_get_irq_route_gsi"); + return; + } + + kvm_add_routing_entry(kvm_context, irq); + if (kvm_commit_irq_routes(kvm_context) < 0) { + perror("vbus: kvm_commit_irq_routes"); + return; + } + + kvm_vbus_assign_gsi(kvm_context, i, irq->gsi); + } + + bus->enabled = 1; + } +} + +void +pci_vbus_init(PCIBus *bus) +{ + struct VirtualBus *_bus; + PCIDevice *dev; + uint8_t *config; + + if (!kvm_check_extension(kvm_context, KVM_CAP_VBUS)) + return; + + dev = pci_register_device(bus, "vbus", sizeof(*_bus), + -1, NULL, NULL); + if (!dev) { + perror("vbus present but PCI allocation failed"); + return; + } + + config = dev->config; + pci_config_set_vendor_id(config, PCI_VENDOR_ID_NOVELL); + pci_config_set_device_id(config, PCI_DEVICE_ID_VIRTUAL_BUS); + pci_config_set_class(config, PCI_CLASS_BRIDGE_OTHER); + + pci_enable_capability_support(dev, 0, + NULL, + vbus_pci_cap_write_config, + vbus_pci_cap_init); + + _bus = to_bus(dev); + + memset(&_bus->irq[0], 0, sizeof(_bus->irq)); + + _bus->enabled = 0; +} diff --git a/qemu/hw/vbus.h b/qemu/hw/vbus.h new file mode 100644 index 0000000..8abc3e6 --- /dev/null +++ b/qemu/hw/vbus.h @@ -0,0 +1,2 @@ + +void pci_vbus_init(PCIBus *bus); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html