Hi Alex, On 2011-01-28 01:45, Alex Williamson wrote: > On Thu, 2011-01-27 at 12:56 +0100, Andrà Weidemann wrote: >> Hi Alex, >> >> On 26.01.2011 06:12, Alex Williamson wrote: >> >>> So while your initial results are promising, my guess is that you're >>> using card specific drivers and still need to consider some of the >>> harder problems with generic support for vga assignment. I hacked on >>> this for a bit trying to see if I could get vga assignment working >>> with the vfio driver. Setting up the legacy access and preventing >>> qemu from stealing it back should get you basic vga modes and might >>> even allow the option rom to run to initialize the card for pre-boot. >>> I was able to get this far on a similar ATI card. I never hard much >>> luck with other cards though, and I was never able to get the vesa >>> extensions working. Thanks, >> >> Do you mind sharing these patches? > > Attached. > We are about to try some pass-through with an NVIDA card. So I already hacked on your vfio patch to make it build against current devices assignment code. Some questions arose while studying the code: ... > --- /dev/null > +++ b/hw/vfio-vga.c > @@ -0,0 +1,291 @@ > +/* > + * vfio VGA device assignment support > + * > + * Copyright Red Hat, Inc. 2010 > + * > + * Authors: > + * Alex Williamson <alex.williamson@xxxxxxxxxx> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + * Based on qemu-kvm device-assignment: > + * Adapted for KVM by Qumranet. > + * Copyright (c) 2007, Neocleus, Alex Novik (alex@xxxxxxxxxxxx) > + * Copyright (c) 2007, Neocleus, Guy Zana (guy@xxxxxxxxxxxx) > + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@xxxxxxxxxxxx) > + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@xxxxxxxxxx) > + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@xxxxxxxxxx) > + */ > + > +#include <stdio.h> > +#include <unistd.h> > +#include <sys/io.h> > +#include <sys/mman.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include "event_notifier.h" > +#include "hw.h" > +#include "memory.h" > +#include "monitor.h" > +#include "pc.h" > +#include "qemu-error.h" > +#include "sysemu.h" > +#include "vfio.h" > +#include <pci/header.h> > +#include <pci/types.h> > +#include <linux/types.h> > +#include "linux-vfio.h" > + > +//#define DEBUG_VFIO_VGA > +#ifdef DEBUG_VFIO_VGA > +#define DPRINTF(fmt, ...) \ > + do { printf("vfio-vga: " fmt, ## __VA_ARGS__); } while (0) > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif > + > +/* > + * VGA setup > + */ > +static void vfio_vga_write(VFIODevice *vdev, uint32_t addr, > + uint32_t val, int len) > +{ > + DPRINTF("%s 0x%x %d - 0x%x\n", __func__, 0xa0000 + addr, len, val); > + switch (len) { > + case 1: > + *(uint8_t *)(vdev->vga_mmio + addr) = (uint8_t)val; > + break; > + case 2: > + *(uint16_t *)(vdev->vga_mmio + addr) = (uint16_t)val; > + break; > + case 4: > + *(uint32_t *)(vdev->vga_mmio + addr) = val; > + break; > + } > +} > + > +static void vfio_vga_writeb(void *opaque, target_phys_addr_t addr, uint32_t val) > +{ > + vfio_vga_write(opaque, addr, val, 1); > +} > + > +static void vfio_vga_writew(void *opaque, target_phys_addr_t addr, uint32_t val) > +{ > + vfio_vga_write(opaque, addr, val, 2); > +} > + > +static void vfio_vga_writel(void *opaque, target_phys_addr_t addr, uint32_t val) > +{ > + vfio_vga_write(opaque, addr, val, 4); > +} > + > +static CPUWriteMemoryFunc * const vfio_vga_writes[] = { > + &vfio_vga_writeb, > + &vfio_vga_writew, > + &vfio_vga_writel > +}; > + > +static uint32_t vfio_vga_read(VFIODevice *vdev, uint32_t addr, int len) > +{ > + uint32_t val = 0xffffffff; > + switch (len) { > + case 1: > + val = (uint32_t)*(uint8_t *)(vdev->vga_mmio + addr); > + break; > + case 2: > + val = (uint32_t)*(uint16_t *)(vdev->vga_mmio + addr); > + break; > + case 4: > + val = *(uint32_t *)(vdev->vga_mmio + addr); > + break; > + } > + DPRINTF("%s 0x%x %d = 0x%x\n", __func__, 0xa0000 + addr, len, val); > + return val; > +} > + > +static uint32_t vfio_vga_readb(void *opaque, target_phys_addr_t addr) > +{ > + return vfio_vga_read(opaque, addr, 1); > +} > + > +static uint32_t vfio_vga_readw(void *opaque, target_phys_addr_t addr) > +{ > + return vfio_vga_read(opaque, addr, 2); > +} > + > +static uint32_t vfio_vga_readl(void *opaque, target_phys_addr_t addr) > +{ > + return vfio_vga_read(opaque, addr, 4); > +} > + > +static CPUReadMemoryFunc * const vfio_vga_reads[] = { > + &vfio_vga_readb, > + &vfio_vga_readw, > + &vfio_vga_readl > +}; > + > +static void vfio_vga_out(VFIODevice *vdev, uint32_t addr, uint32_t val, int len) > +{ > + DPRINTF("%s 0x%x %d - 0x%x\n", __func__, addr, len, val); > + ioperm(0x3b0, 0x30, 1); /* XXX fix me */ Why do you have to re-establish the ioperms here on each access? Are we just lacking the use of generic kvm ioperm management? > + switch (len) { > + case 1: > + outb(val, addr); > + break; > + case 2: > + outw(val, addr); > + break; > + case 4: > + outl(val, addr); > + break; > + } > +} > + > +static void vfio_vga_outb(void *opaque, uint32_t addr, uint32_t val) > +{ > + vfio_vga_out(opaque, addr, val, 1); > +} > + > +static void vfio_vga_outw(void *opaque, uint32_t addr, uint32_t val) > +{ > + vfio_vga_out(opaque, addr, val, 2); > +} > + > +static void vfio_vga_outl(void *opaque, uint32_t addr, uint32_t val) > +{ > + vfio_vga_out(opaque, addr, val, 4); > +} > + > +static uint32_t vfio_vga_in(VFIODevice *vdev, uint32_t addr, int len) > +{ > + uint32_t val = 0xffffffff; > + ioperm(0x3b0, 0x30, 1); /* XXX fix me */ > + switch (len) { > + case 1: > + val = inb(addr); > + break; > + case 2: > + val = inw(addr); > + break; > + case 4: > + val = inl(addr); > + break; > + } > + DPRINTF("%s 0x%x, %d = 0x%x\n", __func__, addr, len, val); > + return val; > +} > + > +static uint32_t vfio_vga_inb(void *opaque, uint32_t addr) > +{ > + return vfio_vga_in(opaque, addr, 1); > +} > + > +static uint32_t vfio_vga_inw(void *opaque, uint32_t addr) > +{ > + return vfio_vga_in(opaque, addr, 2); > +} > + > +static uint32_t vfio_vga_inl(void *opaque, uint32_t addr) > +{ > + return vfio_vga_in(opaque, addr, 4); > +} > + > +int vfio_vga_setup(VFIODevice *vdev) > +{ > + char buf[256]; > + int ret; > + > + if (vga_interface_type != VGA_NONE) { > + fprintf(stderr, > + "VGA devie assigned without -vga none param, no ISA VGA\n"); > + return -1; > + } > + > + vdev->vga_fd = open("/dev/vga_arbiter", O_RDWR); > + if (vdev->vga_fd < 0) { > + fprintf(stderr, "%s - Failed to open vga arbiter (%s)\n", > + __func__, strerror(errno)); > + return -1; > + } > + ret = read(vdev->vga_fd, buf, sizeof(buf)); > + if (ret <= 0) { > + fprintf(stderr, "%s - Failed to read from vga arbiter (%s)\n", > + __func__, strerror(errno)); > + close(vdev->vga_fd); > + return -1; > + } > + buf[ret - 1] = 0; > + vdev->vga_orig = qemu_strdup(buf); > + > + snprintf(buf, sizeof(buf), "target PCI:%04x:%02x:%02x.%x", > + vdev->host.seg, vdev->host.bus, vdev->host.dev, vdev->host.func); > + ret = write(vdev->vga_fd, buf, strlen(buf)); > + if (ret != strlen(buf)) { > + fprintf(stderr, "%s - Failed to write to vga arbiter (%s)\n", > + __func__, strerror(errno)); > + close(vdev->vga_fd); > + return -1; > + } > + snprintf(buf, sizeof(buf), "decodes io+mem"); > + ret = write(vdev->vga_fd, buf, strlen(buf)); > + if (ret != strlen(buf)) { > + fprintf(stderr, "%s - Failed to write to vga arbiter (%s)\n", > + __func__, strerror(errno)); > + close(vdev->vga_fd); > + return -1; > + } OK, so we grab the assigned adapter and make it handle legacy io+mem. I guess this approach only works with a single guest with an assigned adapter. Would it be possible and not extremely costly to do some on-demand grabbing of the range to share it with multiple VMs? And what about the host? When does Linux release the legacy range? Always or only when a specific (!=vga/vesa) framebuffer driver is loaded? Is there some other way to pass the legacy accesses from the guest to a specific adapter without going via the host's legacy area? I.e. do some adapters allow remapping? > + > + vdev->vga_mmio_fd = open("/dev/mem", O_RDWR); > + if (vdev->vga_mmio_fd < 0) { > + fprintf(stderr, "%s - Failed to open /dev/mem (%s)\n", > + __func__, strerror(errno)); > + return -1; > + } > + vdev->vga_mmio = mmap(NULL, 0x40000, PROT_READ | PROT_WRITE, > + MAP_SHARED, vdev->vga_mmio_fd, 0xa0000); > + if (vdev->vga_mmio == MAP_FAILED) { > + fprintf(stderr, "%s - mmap failed (%s)\n", __func__, strerror(errno)); > + return -1; > + } > + > +#if 1 > + vdev->vga_io = cpu_register_io_memory(vfio_vga_reads, > + vfio_vga_writes, vdev); > + cpu_register_physical_memory(0xa0000, 0x20000, vdev->vga_io); > + qemu_register_coalesced_mmio(0xa0000, 0x20000); > +#else > + cpu_register_physical_memory(0xa0000, 0x20000, > + qemu_ram_map(&vdev->pdev.qdev, "VGA", 0x20000, vdev->vga_mmio)); > + qemu_register_coalesced_mmio(0xa0000, 0x20000); > +#endif To make the second case work, we would have to track the mode switches of the guest via legacy VGA interfaces and switch the mapping on the fly, right? > + > + register_ioport_write(0x3b0, 0x30, 1, vfio_vga_outb, vdev); > + register_ioport_write(0x3b0, 0x30, 2, vfio_vga_outw, vdev); > + register_ioport_write(0x3b0, 0x30, 4, vfio_vga_outl, vdev); > + register_ioport_read(0x3b0, 0x30, 1, vfio_vga_inb, vdev); > + register_ioport_read(0x3b0, 0x30, 2, vfio_vga_inw, vdev); > + register_ioport_read(0x3b0, 0x30, 4, vfio_vga_inl, vdev); > + if (ioperm(0x3b0, 0x30, 1)) { > + fprintf(stderr, "%s - ioperm failed (%s)\n", __func__, strerror(errno)); > + return -1; > + } > + return 0; > +} > + > +void vfio_vga_exit(VFIODevice *vdev) > +{ > + if (!vdev->vga_io) > + return; > + > + isa_unassign_ioport(0x3b0, 0x30); > + qemu_unregister_coalesced_mmio(0xa0000, 0x20000); > + cpu_register_physical_memory(0xa0000, 0x20000, IO_MEM_UNASSIGNED); > + cpu_unregister_io_memory(vdev->vga_io); > + munmap(vdev->vga_mmio, 0x40000); > + close(vdev->vga_mmio_fd); > + qemu_free(vdev->vga_orig); > + close(vdev->vga_fd); > +} > + Thanks, Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html