For virtio disks and interfaces, qemu allows users to enable or disable ioeventfd feature. This means, qemu can execute domain code, while another thread waits for I/O event. Basically, in some cases it is win, in some loss. This feature is available via 'ioeventfd' attribute in disk and interface <driver> element. It accepts 'on' and 'off'. Leaving this attribute out defaults to hypervisor decision. --- diff to v1: -reverted to 'ioeventfd' attribute: https://www.redhat.com/archives/libvir-list/2011-June/msg00712.html docs/formatdomain.html.in | 34 +++++++++++++- docs/schemas/domain.rng | 14 ++++++ src/conf/domain_conf.c | 49 +++++++++++++++++++- src/conf/domain_conf.h | 11 ++++ src/libvirt_private.syms | 2 + src/qemu/qemu_capabilities.c | 3 + src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 13 +++++ tests/qemuhelptest.c | 3 +- .../qemuxml2argv-disk-ioeventfd.args | 11 ++++ .../qemuxml2argv-disk-ioeventfd.xml | 50 ++++++++++++++++++++ tests/qemuxml2argvtest.c | 4 ++ 12 files changed, 191 insertions(+), 4 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index ab39417..39e1a85 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -785,7 +785,7 @@ </disk> ... <disk type='network'> - <driver name="qemu" type="raw" io="threads"/> + <driver name="qemu" type="raw" io="threads" ioeventfd="on"/> <source protocol="sheepdog" name="image_name"> <host name="hostname" port="7000"/> </source> @@ -869,6 +869,20 @@ policies on I/O; qemu guests support "threads" and "native". <span class="since">Since 0.8.8</span> </li> + <li> + The optional <code>ioeventfd</code> attribute allows users to + set <a href='https://patchwork.kernel.org/patch/43390/'> + domain I/O asynchronous handling</a> for disk device. + The default is left to the discretion of the hypervisor. + Accepted values are "on" and "off". Enabling this allows + qemu to execute VM while a separate thread handles I/O. + Typically guests experiencing high system CPU utilization + during I/O will benefit from this. On the other hand, + on overloaded host it could increase guest I/O latency. + <span class="since">Since 0.9.3 (QEMU and KVM only)</span> + <b>In general you should leave this option alone, unless you + are very certain you know what you are doing.</b> + </li> </ul> </dd> <dt><code>boot</code></dt> @@ -1649,7 +1663,7 @@ qemu-kvm -net nic,model=? /dev/null <source network='default'/> <target dev='vnet1'/> <model type='virtio'/> - <b><driver name='vhost' txmode='iothread'/></b> + <b><driver name='vhost' txmode='iothread' ioeventfd='on'/></b> </interface> </devices> ...</pre> @@ -1700,6 +1714,22 @@ qemu-kvm -net nic,model=? /dev/null <b>In general you should leave this option alone, unless you are very certain you know what you are doing.</b> </dd> + <dt><code>ioeventfd</code></dt> + <dd> + This optional attribute allows users to set + <a href='https://patchwork.kernel.org/patch/43390/'> + domain I/O asynchronous handling</a> for interface device. + The default is left to the discretion of the hypervisor. + Accepted values are "on" and "off". Enabling this allows + qemu to execute VM while a separate thread handles I/O. + Typically guests experiencing high system CPU utilization + during I/O will benefit from this. On the other hand, + on overloaded host it could increase guest I/O latency. + <span class="since">Since 0.9.3 (QEMU and KVM only)</span><br/><br/> + + <b>In general you should leave this option alone, unless you + are very certain you know what you are doing.</b> + </dd> </dl> <h5><a name="elementsNICSTargetOverride">Overriding the target element</a></h5> diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index 6de024e..891662d 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -778,6 +778,9 @@ <optional> <ref name="driverIO"/> </optional> + <optional> + <ref name="ioeventfd"/> + </optional> <empty/> </element> </define> @@ -817,6 +820,14 @@ </choice> </attribute> </define> + <define name="ioeventfd"> + <attribute name="ioeventfd"> + <choice> + <value>on</value> + <value>off</value> + </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <choice> @@ -1117,6 +1128,9 @@ </choice> </attribute> </optional> + <optional> + <ref name="ioeventfd"/> + </optional> <empty/> </element> </optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 5360863..2234857 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -163,6 +163,11 @@ VIR_ENUM_IMPL(virDomainDiskIo, VIR_DOMAIN_DISK_IO_LAST, "default", "native", "threads") +VIR_ENUM_IMPL(virDomainIoEventFd, VIR_DOMAIN_IO_EVENT_FD_LAST, + "default", + "on", + "off") + VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST, "ide", @@ -2013,6 +2018,7 @@ virDomainDiskDefParseXML(virCapsPtr caps, char *cachetag = NULL; char *error_policy = NULL; char *iotag = NULL; + char *ioeventfd = NULL; char *devaddr = NULL; virStorageEncryptionPtr encryption = NULL; char *serial = NULL; @@ -2128,6 +2134,7 @@ virDomainDiskDefParseXML(virCapsPtr caps, cachetag = virXMLPropString(cur, "cache"); error_policy = virXMLPropString(cur, "error_policy"); iotag = virXMLPropString(cur, "io"); + ioeventfd = virXMLPropString(cur, "ioeventfd"); } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) { def->readonly = 1; } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) { @@ -2264,6 +2271,24 @@ virDomainDiskDefParseXML(virCapsPtr caps, } } + if (ioeventfd) { + if (def->bus != VIR_DOMAIN_DISK_BUS_VIRTIO) { + virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("disk ioeventfd mode supported " + "only for virtio bus")); + goto error; + } + + int i; + if ((i = virDomainIoEventFdTypeFromString(ioeventfd)) <= 0) { + virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown disk ioeventfd mode '%s'"), + ioeventfd); + goto error; + } + def->ioeventfd=i; + } + if (devaddr) { if (virDomainParseLegacyDeviceAddress(devaddr, &def->info.addr.pci) < 0) { @@ -2326,6 +2351,7 @@ cleanup: VIR_FREE(cachetag); VIR_FREE(error_policy); VIR_FREE(iotag); + VIR_FREE(ioeventfd); VIR_FREE(devaddr); VIR_FREE(serial); virStorageEncryptionFree(encryption); @@ -2713,6 +2739,7 @@ virDomainNetDefParseXML(virCapsPtr caps, char *model = NULL; char *backend = NULL; char *txmode = NULL; + char *ioeventfd = NULL; char *filter = NULL; char *internal = NULL; char *devaddr = NULL; @@ -2802,6 +2829,7 @@ virDomainNetDefParseXML(virCapsPtr caps, } else if (xmlStrEqual (cur->name, BAD_CAST "driver")) { backend = virXMLPropString(cur, "name"); txmode = virXMLPropString(cur, "txmode"); + ioeventfd = virXMLPropString(cur, "ioeventfd"); } else if (xmlStrEqual (cur->name, BAD_CAST "filterref")) { filter = virXMLPropString(cur, "filter"); VIR_FREE(filterparams); @@ -3018,6 +3046,16 @@ virDomainNetDefParseXML(virCapsPtr caps, } def->driver.virtio.txmode = m; } + if (ioeventfd) { + int i; + if ((i = virDomainIoEventFdTypeFromString(ioeventfd)) <= 0) { + virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown interface ioeventfd mode '%s'"), + ioeventfd); + goto error; + } + def->driver.virtio.ioeventfd = i; + } } if (filter != NULL) { @@ -3057,6 +3095,7 @@ cleanup: VIR_FREE(model); VIR_FREE(backend); VIR_FREE(txmode); + VIR_FREE(ioeventfd); VIR_FREE(filter); VIR_FREE(type); VIR_FREE(internal); @@ -8291,6 +8330,7 @@ virDomainDiskDefFormat(virBufferPtr buf, const char *cachemode = virDomainDiskCacheTypeToString(def->cachemode); const char *error_policy = virDomainDiskErrorPolicyTypeToString(def->error_policy); const char *iomode = virDomainDiskIoTypeToString(def->iomode); + const char *ioeventfd = virDomainIoEventFdTypeToString(def->ioeventfd); if (!type) { virDomainReportError(VIR_ERR_INTERNAL_ERROR, @@ -8322,7 +8362,8 @@ virDomainDiskDefFormat(virBufferPtr buf, " <disk type='%s' device='%s'>\n", type, device); - if (def->driverName || def->driverType || def->cachemode) { + if (def->driverName || def->driverType || def->cachemode || + def->ioeventfd) { virBufferAsprintf(buf, " <driver"); if (def->driverName) virBufferAsprintf(buf, " name='%s'", def->driverName); @@ -8334,6 +8375,8 @@ virDomainDiskDefFormat(virBufferPtr buf, virBufferAsprintf(buf, " error_policy='%s'", error_policy); if (def->iomode) virBufferAsprintf(buf, " io='%s'", iomode); + if (def->ioeventfd) + virBufferAsprintf(buf, " ioeventfd='%s'", ioeventfd); virBufferAsprintf(buf, "/>\n"); } @@ -8624,6 +8667,10 @@ virDomainNetDefFormat(virBufferPtr buf, virBufferAsprintf(buf, " txmode='%s'", virDomainNetVirtioTxModeTypeToString(def->driver.virtio.txmode)); } + if (def->driver.virtio.ioeventfd) { + virBufferAsprintf(buf, " ioeventfd='%s'", + virDomainIoEventFdTypeToString(def->driver.virtio.ioeventfd)); + } virBufferAddLit(buf, "/>\n"); } } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index ff5c28d..994ff91 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -206,6 +206,14 @@ enum virDomainDiskIo { VIR_DOMAIN_DISK_IO_LAST }; +enum virDomainIoEventFd { + VIR_DOMAIN_IO_EVENT_FD_DEFAULT = 0, + VIR_DOMAIN_IO_EVENT_FD_ON, + VIR_DOMAIN_IO_EVENT_FD_OFF, + + VIR_DOMAIN_IO_EVENT_FD_LAST +}; + /* Stores the virtual disk configuration */ typedef struct _virDomainDiskDef virDomainDiskDef; typedef virDomainDiskDef *virDomainDiskDefPtr; @@ -225,6 +233,7 @@ struct _virDomainDiskDef { int error_policy; int bootIndex; int iomode; + int ioeventfd; unsigned int readonly : 1; unsigned int shared : 1; virDomainDeviceInfo info; @@ -361,6 +370,7 @@ struct _virDomainNetDef { struct { enum virDomainNetBackendType name; /* which driver backend to use */ enum virDomainNetVirtioTxModeType txmode; + enum virDomainIoEventFd ioeventfd; } virtio; } driver; union { @@ -1554,6 +1564,7 @@ VIR_ENUM_DECL(virDomainDiskCache) VIR_ENUM_DECL(virDomainDiskErrorPolicy) VIR_ENUM_DECL(virDomainDiskProtocol) VIR_ENUM_DECL(virDomainDiskIo) +VIR_ENUM_DECL(virDomainIoEventFd) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainControllerModel) VIR_ENUM_DECL(virDomainFS) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 03d2ddb..5c8d272 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -292,6 +292,8 @@ virDomainHostdevDefFree; virDomainHostdevModeTypeToString; virDomainHostdevSubsysTypeToString; virDomainInputDefFree; +virDomainIoEventFdTypeFromString; +virDomainIoEventFdTypeToString; virDomainLeaseIndex; virDomainLeaseInsert; virDomainLeaseInsertPreAlloc; diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 28c89b5..ad62a07 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -121,6 +121,7 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST, "device-qxl-vga", "pci-multifunction", /* 60 */ + "virtio-blk-pci.ioeventfd", ); struct qemu_feature_flags { @@ -1207,6 +1208,8 @@ qemuCapsParseDeviceStr(const char *str, virBitmapPtr flags) qemuCapsSet(flags, QEMU_CAPS_VIRTIO_TX_ALG); if (strstr(str, "name \"qxl-vga\"")) qemuCapsSet(flags, QEMU_CAPS_DEVICE_QXL_VGA); + if (strstr(str, "virtio-blk-pci.ioeventfd")) + qemuCapsSet(flags, QEMU_CAPS_VIRTIO_IOEVENTFD); return 0; } diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index e6d2fa3..0b9c8be 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -96,6 +96,7 @@ enum qemuCapsFlags { QEMU_CAPS_VIRTIO_TX_ALG = 58, /* -device virtio-net-pci,tx=string */ QEMU_CAPS_DEVICE_QXL_VGA = 59, /* Is the primary and vga campatible qxl device named qxl-vga? */ QEMU_CAPS_PCI_MULTIFUNCTION = 60, /* -device multifunction=on|off */ + QEMU_CAPS_VIRTIO_IOEVENTFD = 61, /* IOeventFD feature: virtio-{net|blk}-pci.ioeventfd=on/off */ QEMU_CAPS_LAST, /* this must always be the last item */ }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 6346243..f7c06f8 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -1287,6 +1287,16 @@ qemuBuildDeviceAddressStr(virBufferPtr buf, return 0; } +static int +qemuBuildIoEventFdStr(virBufferPtr buf, + enum virDomainIoEventFd use, + virBitmapPtr qemuCaps) +{ + if (use && qemuCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_IOEVENTFD)) + virBufferAsprintf(buf, ",ioeventfd=%s", + virDomainIoEventFdTypeToString(use)); + return 0; +} #define QEMU_SERIAL_PARAM_ACCEPTED_CHARS \ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_" @@ -1554,6 +1564,7 @@ qemuBuildDriveDevStr(virDomainDiskDefPtr disk, break; case VIR_DOMAIN_DISK_BUS_VIRTIO: virBufferAddLit(&opt, "virtio-blk-pci"); + qemuBuildIoEventFdStr(&opt, disk->ioeventfd, qemuCaps); qemuBuildDeviceAddressStr(&opt, &disk->info, qemuCaps); break; case VIR_DOMAIN_DISK_BUS_USB: @@ -1777,6 +1788,8 @@ qemuBuildNicDevStr(virDomainNetDefPtr net, goto error; } } + if (usingVirtio) + qemuBuildIoEventFdStr(&buf, net->driver.virtio.ioeventfd, qemuCaps); if (vlan == -1) virBufferAsprintf(&buf, ",netdev=host%s", net->info.alias); else diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c index 327a0c7..119e771 100644 --- a/tests/qemuhelptest.c +++ b/tests/qemuhelptest.c @@ -475,7 +475,8 @@ mymain(void) QEMU_CAPS_CCID_PASSTHRU, QEMU_CAPS_CHARDEV_SPICEVMC, QEMU_CAPS_DEVICE_QXL_VGA, - QEMU_CAPS_VIRTIO_TX_ALG); + QEMU_CAPS_VIRTIO_TX_ALG, + QEMU_CAPS_VIRTIO_IOEVENTFD); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args new file mode 100644 index 0000000..c512f15 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.args @@ -0,0 +1,11 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ +/usr/bin/qemu -S -M pc-0.13 -m 1024 -smp 1 -nodefaults \ +-monitor unix:/tmp/test-monitor,server,nowait -no-acpi \ +-boot dc -device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x6 \ +-drive file=/var/lib/libvirt/images/f14.img,if=none,id=drive-virtio-disk0 \ +-device virtio-blk-pci,ioeventfd=on,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 \ +-drive file=/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso,if=none,media=cdrom,id=drive-ide0-1-0 \ +-device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 \ +-device virtio-net-pci,tx=bh,ioeventfd=off,vlan=0,id=net0,mac=52:54:00:e5:48:58,bus=pci.0,addr=0x3 \ +-net user,vlan=0,name=hostnet0 -serial pty -usb -vnc 127.0.0.1:-809 -std-vga \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml new file mode 100644 index 0000000..c565c9f --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-ioeventfd.xml @@ -0,0 +1,50 @@ +<domain type='qemu'> + <name>test</name> + <memory>1048576</memory> + <vcpu>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type> + <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' ioeventfd='on'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' unit='0'/> + </disk> + <interface type='user'> + <mac address='52:54:00:e5:48:58'/> + <model type='virtio'/> + <driver name='vhost' txmode='iothread' ioeventfd='off'/> + </interface> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <serial type='pty'> + <target port='0'/> + </serial> + <console type='pty'> + <target type='serial' port='0'/> + </console> + <graphics type='vnc' port='5091' autoport='no' listen='127.0.0.1'/> + <video> + <model type='vga' vram='9216' heads='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + </video> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index bd07efa..782664a 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -350,6 +350,10 @@ mymain(void) DO_TEST("disk-aio", false, QEMU_CAPS_DRIVE, QEMU_CAPS_DRIVE_AIO, QEMU_CAPS_DRIVE_CACHE_V2, QEMU_CAPS_DRIVE_FORMAT); + DO_TEST("disk-ioeventfd", false, + QEMU_CAPS_DRIVE, QEMU_CAPS_VIRTIO_IOEVENTFD, + QEMU_CAPS_VIRTIO_TX_ALG, QEMU_CAPS_DEVICE); + DO_TEST("graphics-vnc", false, NONE); DO_TEST("graphics-vnc-socket", false, NONE); -- 1.7.5.rc3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list