Hi Kim,
1) I agree with Alex on the fact I would prefer to have the qemu-side platform device code separated from PCI device one, both using a separate generic helper code. Indeed calling functions referencing BAR in the platform device does not look natural to me; although I understand the code reuse rationale. This is also how the kernel side code is laid out.
2) about nr_regions. What is the number of nr_regions do you expect for platform_devices? bars[] possible overshoot as already mentionned if nr_regions > 6.
3) I understand you build nr_regions MemoryRegion (each mmaped on the hpa). However I understand you only attach a single one to the SysBusDevice in vfio_platform_realize:
sysbus_init_mmio(sbdev, &vdev->bars[0].mmap_mem); this MemRegion is assigned a gpa in virt.c? is it the correct understanding? What is the mem hierarchy that is targetted. In PCI there are subregions. In this device, there would be a single layer or hierarchy?
4) vfio_unmap_bars should destroy the platform device mmaped regions (vdev->mmap_mem and munmap the correspond hva (currently on slow regions are munmapped and destroyed)
Best Regards1) I agree with Alex on the fact I would prefer to have the qemu-side platform device code separated from PCI device one, both using a separate generic helper code. Indeed calling functions referencing BAR in the platform device does not look natural to me; although I understand the code reuse rationale. This is also how the kernel side code is laid out.
2) about nr_regions. What is the number of nr_regions do you expect for platform_devices? bars[] possible overshoot as already mentionned if nr_regions > 6.
3) I understand you build nr_regions MemoryRegion (each mmaped on the hpa). However I understand you only attach a single one to the SysBusDevice in vfio_platform_realize:
sysbus_init_mmio(sbdev, &vdev->bars[0].mmap_mem); this MemRegion is assigned a gpa in virt.c? is it the correct understanding? What is the mem hierarchy that is targetted. In PCI there are subregions. In this device, there would be a single layer or hierarchy?
4) vfio_unmap_bars should destroy the platform device mmaped regions (vdev->mmap_mem and munmap the correspond hva (currently on slow regions are munmapped and destroyed)
On 26 February 2014 03:37, Kim Phillips <kim.phillips@xxxxxxxxxx> wrote:
We basically add support for the SysBusDevice type in addition to the
existing PCIDevice support. This involves taking common code from the
existing vfio_initfn(), and putting it under a new vfio_find_get_group(),
that both vfio_initfn() and the new vfio_platform_realize() call.
Since realize() returns void, unlike PCIDevice's initfn(),
error codes are moved into the error message text with %m.
Some exceptions to the PCI path are added for platform devices,
mostly in the form of early returns, since less setup is needed.
I chose to reuse VFIODevice's config_size to determine whether
the device was a PCI device or a platform device, which might
need to change.
Currently only MMIO access is supported at this time. It works
because of qemu's stage 1 translation, but needs to be in stage 2
in case other entities than the guest OS want to access it.
A KVM patch to address this is in the works.
The perceived path for future QEMU development is:
- support allocating a variable number of regions
- VFIODevice's bars[] become dynamically allocated *regions
- VFIOBAR's device fd replaced with parent VFIODevice ptr,
to facilitate BAR r/w ops calling vfio_eoi()
- add support for interrupts
- verify and test platform dev unmap path
- test existing PCI path for any regressions
- add support for creating platform devices on the qemu command line
- currently device address specification is hardcoded for test
development on Calxeda Midway's fff51000.ethernet device
- reset is not supported and registration of reset functions is
bypassed for platform devices.
- there is no standard means of resetting a platform device,
unsure if it suffices to be handled at device--VFIO binding time
Signed-off-by: Kim Phillips <kim.phillips@xxxxxxxxxx>
---
hw/misc/vfio.c | 180 ++++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 145 insertions(+), 35 deletions(-)
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 8db182f..eed24db 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -32,6 +32,7 @@
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci.h"
+#include "hw/sysbus.h"
#include "qemu-common.h"
#include "qemu/error-report.h"
#include "qemu/event_notifier.h"
@@ -166,7 +167,10 @@ typedef struct VFIOMSIXInfo {
} VFIOMSIXInfo;
typedef struct VFIODevice {
- PCIDevice pdev;
+ union {
+ PCIDevice pdev;
+ SysBusDevice sbdev;
+ };
int fd;
VFIOINTx intx;
unsigned int config_size;
@@ -180,6 +184,8 @@ typedef struct VFIODevice {
VFIOMSIXInfo *msix;
int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
int interrupt; /* Current interrupt type */
+ char *name; /* platform device name, e.g., fff51000.ethernet */
+ int nr_regions; /* platform devices' number of regions */
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */
PCIHostDeviceAddress host;
@@ -2497,8 +2503,6 @@ empty_region:
memory_region_init(submem, OBJECT(vdev), name, 0);
}
- memory_region_add_subregion(mem, offset, submem);
-
return ret;
}
@@ -2552,6 +2556,7 @@ static void vfio_map_bar(VFIODevice *vdev, int nr)
&bar->mmap_mem, &bar->mmap, size, 0, name)) {
error_report("%s unsupported. Performance may be slow", name);
}
+ memory_region_add_subregion(&bar->mem, 0, &bar->mmap_mem);
if (vdev->msix && vdev->msix->table_bar == nr) {
unsigned start;
@@ -2566,15 +2571,38 @@ static void vfio_map_bar(VFIODevice *vdev, int nr)
&vdev->msix->mmap, size, start, name)) {
error_report("%s unsupported. Performance may be slow", name);
}
+ memory_region_add_subregion(&bar->mem, start, &vdev->msix->mmap_mem);
}
vfio_bar_quirk_setup(vdev, nr);
}
+static void vfio_map_region(VFIODevice *vdev, int nr)
+{
+ VFIOBAR *bar = &vdev->bars[nr];
+ unsigned size = bar->size;
+ char name[64];
+
+ snprintf(name, sizeof(name), "VFIO %s region %d mmap", vdev->name, nr);
+
+ if (vfio_mmap_bar(vdev, bar, &bar->mem,
+ &bar->mmap_mem, &bar->mmap, size, 0, name)) {
+ error_report("error mmapping %s: %m", name);
+ }
+}
+
static void vfio_map_bars(VFIODevice *vdev)
{
int i;
+ if (!vdev->config_size) {
+ /* platform device */
+ for (i = 0; i < vdev->nr_regions; i++) {
+ vfio_map_region(vdev, i);
+ }
+ return;
+ }
+
for (i = 0; i < PCI_ROM_SLOT; i++) {
vfio_map_bar(vdev, i);
}
@@ -3144,7 +3172,8 @@ static void vfio_pci_reset_handler(void *opaque)
QLIST_FOREACH(group, &group_list, next) {
QLIST_FOREACH(vdev, &group->device_list, next) {
- if (vdev->needs_reset) {
+ /* HACK: restrict reset to PCI devices (have config_size) for now */
+ if (vdev->config_size && vdev->needs_reset) {
vfio_pci_hot_reset_multi(vdev);
}
}
@@ -3418,25 +3447,18 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
- if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
- error_report("vfio: Um, this isn't a PCI device");
- goto error;
- }
-
+ vdev->nr_regions = dev_info.num_regions;
vdev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
- if (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) {
+ if (dev_info.num_regions > PCI_NUM_REGIONS) ||
+ ((dev_info.flags & VFIO_DEVICE_FLAGS_PCI) &&
+ (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1)) {
error_report("vfio: unexpected number of io regions %u",
dev_info.num_regions);
goto error;
}
- if (dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) {
- error_report("vfio: unexpected number of irqs %u", dev_info.num_irqs);
- goto error;
- }
-
- for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
+ for (i = 0; i < dev_info.num_regions; i++) {
reg_info.index = i;
ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info);
@@ -3458,6 +3480,15 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
QLIST_INIT(&vdev->bars[i].quirks);
}
+ /* following is for PCI devices, at least for now */
+ if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI))
+ return 0;
+
+ if (dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) {
+ error_report("vfio: unexpected number of irqs %u", dev_info.num_irqs);
+ goto error;
+ }
+
reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info);
@@ -3659,32 +3690,25 @@ static void vfio_unregister_err_notifier(VFIODevice *vdev)
event_notifier_cleanup(&vdev->err_notifier);
}
-static int vfio_initfn(PCIDevice *pdev)
+static VFIOGroup *vfio_find_get_group(char *path)
{
- VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
VFIOGroup *group;
- char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+ char iommu_group_path[PATH_MAX], *group_name;
ssize_t len;
struct stat st;
int groupid;
- int ret;
- /* Check that the host device exists */
- snprintf(path, sizeof(path),
- "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
- vdev->host.domain, vdev->host.bus, vdev->host.slot,
- vdev->host.function);
if (stat(path, &st) < 0) {
- error_report("vfio: error: no such host device: %s", path);
- return -errno;
+ error_report("vfio: error: no such host device: %s: %m", path);
+ return NULL;
}
strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
len = readlink(path, iommu_group_path, PATH_MAX);
if (len <= 0) {
- error_report("vfio: error no iommu_group for device");
- return -errno;
+ error_report("vfio: error no iommu_group for device: %m");
+ return NULL;
}
iommu_group_path[len] = 0;
@@ -3692,18 +3716,37 @@ static int vfio_initfn(PCIDevice *pdev)
if (sscanf(group_name, "%d", &groupid) != 1) {
error_report("vfio: error reading %s: %m", path);
- return -errno;
+ return NULL;
}
- DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
- vdev->host.bus, vdev->host.slot, vdev->host.function, groupid);
-
group = vfio_get_group(groupid);
if (!group) {
- error_report("vfio: failed to get group %d", groupid);
- return -ENOENT;
+ error_report("vfio: failed to get group %d: %m", groupid);
+ return NULL;
}
+ return group;
+}
+
+static int vfio_initfn(PCIDevice *pdev)
+{
+ VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+ VFIOGroup *group;
+ char path[PATH_MAX];
+ int ret;
+
+ /* Check that the host device exists */
+ snprintf(path, sizeof(path),
+ "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
+ vdev->host.domain, vdev->host.bus, vdev->host.slot,
+ vdev->host.function);
+
+ group = vfio_find_get_group(path);
+
+ DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
+ vdev->host.bus, vdev->host.slot, vdev->host.function,
+ group->groupid);
+
snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
vdev->host.domain, vdev->host.bus, vdev->host.slot,
vdev->host.function);
@@ -3916,3 +3959,70 @@ static void register_vfio_pci_dev_type(void)
}
type_init(register_vfio_pci_dev_type)
+
+
+/*
+ * VFIO platform devices
+ */
+static void vfio_platform_realize(DeviceState *dev, Error **errp)
+{
+ SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
+ VFIODevice *vdev = DO_UPCAST(VFIODevice, sbdev, sbdev);
+ VFIOGroup *group;
+ char path[PATH_MAX];
+ int ret;
+
+ vdev->name = malloc(PATH_MAX);
+ strcpy(vdev->name, "fff51000.ethernet");
+
+ /* Check that the host device exists */
+ snprintf(path, sizeof(path),
+ "/sys/bus/platform/devices/%s/", vdev->name);
+
+ group = vfio_find_get_group(path);
+ if (!group) {
+ error_report("vfio: failed to get group for device %s", path);
+ return;
+ }
+
+ strcpy(path, vdev->name);
+ ret = vfio_get_device(group, path, vdev);
+ if (ret) {
+ error_report("vfio: failed to get device %s", path);
+ vfio_put_group(group);
+ return;
+ }
+
+ vfio_map_bars(vdev);
+
+ sysbus_init_mmio(sbdev, &vdev->bars[0].mmap_mem);
+}
+
+static const VMStateDescription vfio_platform_vmstate = {
+ .name = "vfio-platform",
+ .unmigratable = 1,
+};
+
+static void vfio_platform_dev_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = vfio_platform_realize;
+ dc->vmsd = &vfio_platform_vmstate;
+ dc->desc = "VFIO-based platform device assignment";
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static const TypeInfo vfio_platform_dev_info = {
+ .name = "vfio-platform",
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(VFIODevice),
+ .class_init = vfio_platform_dev_class_init,
+};
+
+static void register_vfio_platform_dev_type(void)
+{
+ type_register_static(&vfio_platform_dev_info);
+}
+
+type_init(register_vfio_platform_dev_type)
--
1.9.0
_______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm