On Fri, Sep 20, 2013 at 11:07:00AM +0200, Peter Krempa wrote: > Prefer using VFIO (if available) to the legacy KVM device passthrough. > > With this patch a PCI passthrough device without the driver configured > will be started with VFIO if it's available on the host. If not legacy > KVM passthrough is checked and error is reported if it's not available. > --- > docs/formatdomain.html.in | 9 ++++---- > src/conf/domain_conf.h | 2 +- > src/qemu/qemu_command.c | 2 +- > src/qemu/qemu_hotplug.c | 26 +++++++++++++++++++++- > src/qemu/qemu_process.c | 18 ++++++++++++++- > .../qemuxml2argv-hostdev-pci-address-device.args | 2 +- > .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args | 2 +- > tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args | 4 ++-- > 8 files changed, 52 insertions(+), 13 deletions(-) > > diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in > index 3689399..6f3f7cf 100644 > --- a/docs/formatdomain.html.in > +++ b/docs/formatdomain.html.in > @@ -2755,11 +2755,10 @@ > backend, which is compatible with UEFI SecureBoot) or "kvm" > (for the legacy device assignment handled directly by the KVM > kernel module)<span class="since">Since 1.0.5 (QEMU and KVM > - only, requires kernel 3.6 or newer)</span>. Currently, "kvm" > - is the default used by libvirt when not explicitly provided, > - but since the two are functionally equivalent, this default > - could be changed in the future with no impact to domains that > - don't specify anything. > + only, requires kernel 3.6 or newer)</span>. The default, when > + the driver name is not explicitly specified, is to check wether > + VFIO is available and use it if it's the case. If VFIO is not > + available, the legacy "kvm" assignment is attempted. > </dd> > <dt><code>readonly</code></dt> > <dd>Indicates that the device is readonly, only supported by SCSI host > diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h > index 9414ebf..43d8746 100644 > --- a/src/conf/domain_conf.h > +++ b/src/conf/domain_conf.h > @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType { > > /* the backend driver used for PCI hostdev devices */ > typedef enum { > - VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */ > + VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */ > VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM, /* force legacy kvm style */ > VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO, /* force vfio */ > > diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c > index 1411533..0d7c02e 100644 > --- a/src/qemu/qemu_command.c > +++ b/src/qemu/qemu_command.c > @@ -5395,13 +5395,13 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def, > > switch ((virDomainHostdevSubsysPciBackendType) > dev->source.subsys.u.pci.backend) { > - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: > case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: > virBufferAddLit(&buf, "pci-assign"); > if (configfd && *configfd) > virBufferAsprintf(&buf, ",configfd=%s", configfd); > break; > > + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: > case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: > virBufferAddLit(&buf, "vfio-pci"); > break; The code calling this function should have translated 'DEFAULT' into either 'KVM' or 'VFIO', so if we see 'DEFAULT' here shouldn't we raise an error. > diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c > index 9320ab9..5a8fa44 100644 > --- a/src/qemu/qemu_hotplug.c > +++ b/src/qemu/qemu_hotplug.c > @@ -1144,6 +1144,31 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, > return -1; > > switch ((virDomainHostdevSubsysPciBackendType) *backend) { > + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: > + if (qemuHostdevHostSupportsPassthroughVFIO() && > + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { > + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO; > + /* VFIO requires all of the guest's memory to be locked resident. > + * In this case, the guest's memory may already be locked, but it > + * doesn't hurt to "change" the limit to the same value. > + */ > + if (vm->def->mem.hard_limit) > + virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit); > + else > + virProcessSetMaxMemLock(vm->pid, > + vm->def->mem.max_balloon + (1024 * 1024)); > + } else if (qemuHostdevHostSupportsPassthroughLegacy() && > + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) || > + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) { > + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; > + } else { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > + _("host doesn't support passthrough of " > + "host PCI devices")); > + } > + > + break; > + > case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: > if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { > virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > @@ -1170,7 +1195,6 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver, > > break; > > - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: > case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: > if (!qemuHostdevHostSupportsPassthroughLegacy()) { > virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > index e36ab99..9648aa0 100644 > --- a/src/qemu/qemu_process.c > +++ b/src/qemu/qemu_process.c > @@ -3730,6 +3730,23 @@ int qemuProcessStart(virConnectPtr conn, > } > > switch ((virDomainHostdevSubsysPciBackendType) *backend) { > + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: > + if (supportsPassthroughVFIO && > + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) { > + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO; > + } else if (supportsPassthroughKVM && > + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) || > + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) { > + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM; > + } else { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > + _("host doesn't support passthrough of " > + "host PCI devices")); > + goto cleanup; > + } > + > + break; There's alot of duplication between here & the hotplug code - can we get this logic shared in a helper function ? > + > case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO: > if (!supportsPassthroughVFIO) { > virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > @@ -3738,7 +3755,6 @@ int qemuProcessStart(virConnectPtr conn, > } > break; > > - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT: > case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM: > if (!supportsPassthroughKVM) { > virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args > index 214b246..557b733 100644 > --- a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args > +++ b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args > @@ -2,5 +2,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ > /usr/bin/qemu -S -M \ > pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \ > unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ > -/dev/HostVG/QEMUGuest2 -device pci-assign,host=06:12.5,id=hostdev0,\ > +/dev/HostVG/QEMUGuest2 -device vfio-pci,host=06:12.5,id=hostdev0,\ > bus=pci.0,addr=0x3 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 > diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args > index 184811b..3a3963c 100644 > --- a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args > +++ b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args > @@ -3,5 +3,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ > -M pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \ > unix:/tmp/test-monitor,server,nowait -no-acpi -boot c \ > -usb -hda /dev/HostVG/QEMUGuest1 \ > --device pci-assign,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ > +-device vfio-pci,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \ > -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 > diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args > index c850613..615047a 100644 > --- a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args > +++ b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args > @@ -7,7 +7,7 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \ > -net user,vlan=0,name=hostnet0 \ > -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:24:a5:9e,bus=pci.0,addr=0x4,\ > romfile=/etc/fake/bootrom.bin -net user,vlan=1,name=hostnet1 \ > --device pci-assign,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ > --device pci-assign,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ > +-device vfio-pci,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \ > +-device vfio-pci,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\ > romfile=/etc/fake/bootrom.bin \ > -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7 Why is the XML here changing ? Shouldn't the test suite be stable in what it generates regardless of what the host happens to support ? Danielx -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list