For virtio disks and interfaces, qemu allows users to enable or disable ioeventfd feature. This means, qemu can execute domain code, while another thread waits for I/O event. Basically, in some cases it is win, in some loss. This feature is available via 'asyncio' attribute in disk and interface <driver> element. It accepts 'on' and 'off'. Leaving this attribute out defaults to hypervisor decision. --- this is rework as suggested: https://www.redhat.com/archives/libvir-list/2011-May/msg01269.html docs/formatdomain.html.in | 34 ++++++++++++- docs/schemas/domain.rng | 14 +++++ src/conf/domain_conf.c | 49 ++++++++++++++++++- src/conf/domain_conf.h | 11 ++++ src/libvirt_private.syms | 2 + src/qemu/qemu_capabilities.c | 3 + src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 23 +++++++++ tests/qemuhelptest.c | 3 +- .../qemuxml2argv-disk-asyncio.args | 11 ++++ .../qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml | 51 ++++++++++++++++++++ tests/qemuxml2argvtest.c | 3 + 12 files changed, 201 insertions(+), 4 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 98fb2b4..8d23740 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -767,7 +767,7 @@ </disk> ... <disk type='network'> - <driver name="qemu" type="raw" io="threads"/> + <driver name="qemu" type="raw" io="threads" asyncio="on"/> <source protocol="sheepdog" name="image_name"> <host name="hostname" port="7000"/> </source> @@ -851,6 +851,20 @@ policies on I/O; qemu guests support "threads" and "native". <span class="since">Since 0.8.8</span> </li> + <li> + The optional <code>asyncio</code> attribute allows users to + set <a href='https://patchwork.kernel.org/patch/43390/'> + domain I/O asynchronous handling</a> for disk device. + The default is left to the discretion of the hypervisor. + Accepted values are "on" and "off". Enabling this allows + qemu to execute VM while a separate thread handles I/O. + Typically guests experiencing high system CPU utilization + during I/O will benefit from this. On the other hand, + on overloaded host it could increase guest I/O latency. + <span class="since">Since 0.9.3 (QEMU and KVM only)</span> + <b>In general you should leave this option alone, unless you + are very certain you know what you are doing.</b> + </li> </ul> </dd> <dt><code>boot</code></dt> @@ -1631,7 +1645,7 @@ qemu-kvm -net nic,model=? /dev/null <source network='default'/> <target dev='vnet1'/> <model type='virtio'/> - <b><driver name='vhost' txmode='iothread'/></b> + <b><driver name='vhost' txmode='iothread' asyncio='on'/></b> </interface> </devices> ...</pre> @@ -1682,6 +1696,22 @@ qemu-kvm -net nic,model=? /dev/null <b>In general you should leave this option alone, unless you are very certain you know what you are doing.</b> </dd> + <dt><code>asyncio</code></dt> + <dd> + This optional attribute allows users to set + <a href='https://patchwork.kernel.org/patch/43390/'> + domain I/O asynchronous handling</a> for interface device. + The default is left to the discretion of the hypervisor. + Accepted values are "on" and "off". Enabling this allows + qemu to execute VM while a separate thread handles I/O. + Typically guests experiencing high system CPU utilization + during I/O will benefit from this. On the other hand, + on overloaded host it could increase guest I/O latency. + <span class="since">Since 0.9.3 (QEMU and KVM only)</span><br/><br/> + + <b>In general you should leave this option alone, unless you + are very certain you know what you are doing.</b> + </dd> </dl> <h5><a name="elementsNICSTargetOverride">Overriding the target element</a></h5> diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index 0be0371..08b92ed 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -758,6 +758,9 @@ <optional> <ref name="driverIO"/> </optional> + <optional> + <ref name="asyncIO"/> + </optional> <empty/> </element> </define> @@ -797,6 +800,14 @@ </choice> </attribute> </define> + <define name="asyncIO"> + <attribute name="asyncio"> + <choice> + <value>on</value> + <value>off</value> + </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <choice> @@ -1097,6 +1108,9 @@ </choice> </attribute> </optional> + <optional> + <ref name="asyncIO"/> + </optional> <empty/> </element> </optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 65d4f89..2b81f2b 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -163,6 +163,11 @@ VIR_ENUM_IMPL(virDomainDiskIo, VIR_DOMAIN_DISK_IO_LAST, "native", "threads") +VIR_ENUM_IMPL(virDomainAsyncIo, VIR_DOMAIN_ASYNC_IO_LAST, + "default", + "on", + "off") + VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST, "ide", "fdc", @@ -2001,6 +2006,7 @@ virDomainDiskDefParseXML(virCapsPtr caps, char *cachetag = NULL; char *error_policy = NULL; char *iotag = NULL; + char *asyncio = NULL; char *devaddr = NULL; virStorageEncryptionPtr encryption = NULL; char *serial = NULL; @@ -2116,6 +2122,7 @@ virDomainDiskDefParseXML(virCapsPtr caps, cachetag = virXMLPropString(cur, "cache"); error_policy = virXMLPropString(cur, "error_policy"); iotag = virXMLPropString(cur, "io"); + asyncio = virXMLPropString(cur, "asyncio"); } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) { def->readonly = 1; } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) { @@ -2252,6 +2259,24 @@ virDomainDiskDefParseXML(virCapsPtr caps, } } + if (asyncio) { + if (def->bus != VIR_DOMAIN_DISK_BUS_VIRTIO) { + virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("disk asyncio mode supported " + "only for virtio bus")); + goto error; + } + + int i; + if ((i = virDomainAsyncIoTypeFromString(asyncio)) <= 0) { + virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown disk asyncio mode '%s'"), + asyncio); + goto error; + } + def->asyncio=i; + } + if (devaddr) { if (virDomainParseLegacyDeviceAddress(devaddr, &def->info.addr.pci) < 0) { @@ -2314,6 +2339,7 @@ cleanup: VIR_FREE(cachetag); VIR_FREE(error_policy); VIR_FREE(iotag); + VIR_FREE(asyncio); VIR_FREE(devaddr); VIR_FREE(serial); virStorageEncryptionFree(encryption); @@ -2701,6 +2727,7 @@ virDomainNetDefParseXML(virCapsPtr caps, char *model = NULL; char *backend = NULL; char *txmode = NULL; + char *asyncio = NULL; char *filter = NULL; char *internal = NULL; char *devaddr = NULL; @@ -2790,6 +2817,7 @@ virDomainNetDefParseXML(virCapsPtr caps, } else if (xmlStrEqual (cur->name, BAD_CAST "driver")) { backend = virXMLPropString(cur, "name"); txmode = virXMLPropString(cur, "txmode"); + asyncio = virXMLPropString(cur, "asyncio"); } else if (xmlStrEqual (cur->name, BAD_CAST "filterref")) { filter = virXMLPropString(cur, "filter"); VIR_FREE(filterparams); @@ -3006,6 +3034,16 @@ virDomainNetDefParseXML(virCapsPtr caps, } def->driver.virtio.txmode = m; } + if (asyncio) { + int i; + if ((i = virDomainAsyncIoTypeFromString(asyncio)) <= 0) { + virDomainReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown interface asyncio mode '%s'"), + asyncio); + goto error; + } + def->driver.virtio.asyncio = i; + } } if (filter != NULL) { @@ -3045,6 +3083,7 @@ cleanup: VIR_FREE(model); VIR_FREE(backend); VIR_FREE(txmode); + VIR_FREE(asyncio); VIR_FREE(filter); VIR_FREE(type); VIR_FREE(internal); @@ -8175,6 +8214,7 @@ virDomainDiskDefFormat(virBufferPtr buf, const char *cachemode = virDomainDiskCacheTypeToString(def->cachemode); const char *error_policy = virDomainDiskErrorPolicyTypeToString(def->error_policy); const char *iomode = virDomainDiskIoTypeToString(def->iomode); + const char *asyncio = virDomainAsyncIoTypeToString(def->asyncio); if (!type) { virDomainReportError(VIR_ERR_INTERNAL_ERROR, @@ -8206,7 +8246,8 @@ virDomainDiskDefFormat(virBufferPtr buf, " <disk type='%s' device='%s'>\n", type, device); - if (def->driverName || def->driverType || def->cachemode) { + if (def->driverName || def->driverType || def->cachemode || + def->asyncio) { virBufferAsprintf(buf, " <driver"); if (def->driverName) virBufferAsprintf(buf, " name='%s'", def->driverName); @@ -8218,6 +8259,8 @@ virDomainDiskDefFormat(virBufferPtr buf, virBufferAsprintf(buf, " error_policy='%s'", error_policy); if (def->iomode) virBufferAsprintf(buf, " io='%s'", iomode); + if (def->asyncio) + virBufferAsprintf(buf, " asyncio='%s'", asyncio); virBufferAsprintf(buf, "/>\n"); } @@ -8508,6 +8551,10 @@ virDomainNetDefFormat(virBufferPtr buf, virBufferAsprintf(buf, " txmode='%s'", virDomainNetVirtioTxModeTypeToString(def->driver.virtio.txmode)); } + if (def->driver.virtio.asyncio) { + virBufferAsprintf(buf, " asyncio='%s'", + virDomainAsyncIoTypeToString(def->driver.virtio.asyncio)); + } virBufferAddLit(buf, "/>\n"); } } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 41c8136..01a98c9 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -206,6 +206,14 @@ enum virDomainDiskIo { VIR_DOMAIN_DISK_IO_LAST }; +enum virDomainAsyncIo { + VIR_DOMAIN_ASYNC_IO_DEFAULT = 0, + VIR_DOMAIN_ASYNC_IO_ON, + VIR_DOMAIN_ASYNC_IO_OFF, + + VIR_DOMAIN_ASYNC_IO_LAST +}; + /* Stores the virtual disk configuration */ typedef struct _virDomainDiskDef virDomainDiskDef; typedef virDomainDiskDef *virDomainDiskDefPtr; @@ -225,6 +233,7 @@ struct _virDomainDiskDef { int error_policy; int bootIndex; int iomode; + int asyncio; unsigned int readonly : 1; unsigned int shared : 1; virDomainDeviceInfo info; @@ -361,6 +370,7 @@ struct _virDomainNetDef { struct { enum virDomainNetBackendType name; /* which driver backend to use */ enum virDomainNetVirtioTxModeType txmode; + enum virDomainAsyncIo asyncio; } virtio; } driver; union { @@ -1521,6 +1531,7 @@ VIR_ENUM_DECL(virDomainDiskCache) VIR_ENUM_DECL(virDomainDiskErrorPolicy) VIR_ENUM_DECL(virDomainDiskProtocol) VIR_ENUM_DECL(virDomainDiskIo) +VIR_ENUM_DECL(virDomainAsyncIo) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainControllerModel) VIR_ENUM_DECL(virDomainFS) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 737cd31..f3a44f6 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -205,6 +205,8 @@ dnsmasqSave; virDiskNameToBusDeviceIndex; virDiskNameToIndex; virDomainAssignDef; +virDomainAsyncIoTypeFromString; +virDomainAsyncIoTypeToString; virDomainChrConsoleTargetTypeFromString; virDomainChrConsoleTargetTypeToString; virDomainChrDefForeach; diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 28c89b5..ad62a07 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -121,6 +121,7 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST, "device-qxl-vga", "pci-multifunction", /* 60 */ + "virtio-blk-pci.ioeventfd", ); struct qemu_feature_flags { @@ -1207,6 +1208,8 @@ qemuCapsParseDeviceStr(const char *str, virBitmapPtr flags) qemuCapsSet(flags, QEMU_CAPS_VIRTIO_TX_ALG); if (strstr(str, "name \"qxl-vga\"")) qemuCapsSet(flags, QEMU_CAPS_DEVICE_QXL_VGA); + if (strstr(str, "virtio-blk-pci.ioeventfd")) + qemuCapsSet(flags, QEMU_CAPS_VIRTIO_IOEVENTFD); return 0; } diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index e6d2fa3..0b9c8be 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -96,6 +96,7 @@ enum qemuCapsFlags { QEMU_CAPS_VIRTIO_TX_ALG = 58, /* -device virtio-net-pci,tx=string */ QEMU_CAPS_DEVICE_QXL_VGA = 59, /* Is the primary and vga campatible qxl device named qxl-vga? */ QEMU_CAPS_PCI_MULTIFUNCTION = 60, /* -device multifunction=on|off */ + QEMU_CAPS_VIRTIO_IOEVENTFD = 61, /* IOeventFD feature: virtio-{net|blk}-pci.ioeventfd=on/off */ QEMU_CAPS_LAST, /* this must always be the last item */ }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cb81354..8273e7e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -1287,6 +1287,26 @@ qemuBuildDeviceAddressStr(virBufferPtr buf, return 0; } +static int +qemuBuildAsyncIoStr(virBufferPtr buf, + enum virDomainAsyncIo use, + virBitmapPtr qemuCaps) +{ + if (qemuCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_IOEVENTFD)) { + switch (use) { + case VIR_DOMAIN_ASYNC_IO_ON: + case VIR_DOMAIN_ASYNC_IO_OFF: + virBufferAsprintf(buf, ",ioeventfd=%s", + virDomainAsyncIoTypeToString(use)); + break; + default: + /* In other cases (_DEFAULT, _LAST) we don't + * want to add anything */ + break; + } + } + return 0; +} #define QEMU_SERIAL_PARAM_ACCEPTED_CHARS \ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_" @@ -1552,6 +1572,7 @@ qemuBuildDriveDevStr(virDomainDiskDefPtr disk, break; case VIR_DOMAIN_DISK_BUS_VIRTIO: virBufferAddLit(&opt, "virtio-blk-pci"); + qemuBuildAsyncIoStr(&opt, disk->asyncio, qemuCaps); qemuBuildDeviceAddressStr(&opt, &disk->info, qemuCaps); break; case VIR_DOMAIN_DISK_BUS_USB: @@ -1774,6 +1795,8 @@ qemuBuildNicDevStr(virDomainNetDefPtr net, goto error; } } + if (usingVirtio) + qemuBuildAsyncIoStr(&buf, net->driver.virtio.asyncio, qemuCaps); if (vlan == -1) virBufferAsprintf(&buf, ",netdev=host%s", net->info.alias); else diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c index 327a0c7..119e771 100644 --- a/tests/qemuhelptest.c +++ b/tests/qemuhelptest.c @@ -475,7 +475,8 @@ mymain(void) QEMU_CAPS_CCID_PASSTHRU, QEMU_CAPS_CHARDEV_SPICEVMC, QEMU_CAPS_DEVICE_QXL_VGA, - QEMU_CAPS_VIRTIO_TX_ALG); + QEMU_CAPS_VIRTIO_TX_ALG, + QEMU_CAPS_VIRTIO_IOEVENTFD); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.args b/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.args new file mode 100644 index 0000000..c512f15 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.args @@ -0,0 +1,11 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ +/usr/bin/qemu -S -M pc-0.13 -m 1024 -smp 1 -nodefaults \ +-monitor unix:/tmp/test-monitor,server,nowait -no-acpi \ +-boot dc -device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x6 \ +-drive file=/var/lib/libvirt/images/f14.img,if=none,id=drive-virtio-disk0 \ +-device virtio-blk-pci,ioeventfd=on,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0 \ +-drive file=/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso,if=none,media=cdrom,id=drive-ide0-1-0 \ +-device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 \ +-device virtio-net-pci,tx=bh,ioeventfd=off,vlan=0,id=net0,mac=52:54:00:e5:48:58,bus=pci.0,addr=0x3 \ +-net user,vlan=0,name=hostnet0 -serial pty -usb -vnc 127.0.0.1:-809 -std-vga \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml b/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml new file mode 100644 index 0000000..5a16bd1 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-asyncio.xml @@ -0,0 +1,51 @@ +<domain type='qemu'> + <name>test</name> + <memory>1048576</memory> + <vcpu>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type> + <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' asyncio='on'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' unit='0'/> + </disk> + <interface type='user'> + <mac address='52:54:00:e5:48:58'/> + <model type='virtio'/> + <driver name='vhost' txmode='iothread' asyncio='off'/> + </interface> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <serial type='pty'> + <target port='0'/> + </serial> + <console type='pty'> + <target type='serial' port='0'/> + </console> + <graphics type='vnc' port='5091' autoport='no' listen='127.0.0.1'/> + <video> + <model type='vga' vram='9216' heads='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + </video> + </devices> +</domain> + diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index b8fd468..489025f 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -333,6 +333,9 @@ mymain(void) DO_TEST("disk-aio", false, QEMU_CAPS_DRIVE, QEMU_CAPS_DRIVE_AIO, QEMU_CAPS_DRIVE_CACHE_V2, QEMU_CAPS_DRIVE_FORMAT); + DO_TEST("disk-asyncio", false, + QEMU_CAPS_DRIVE, QEMU_CAPS_VIRTIO_IOEVENTFD, + QEMU_CAPS_VIRTIO_TX_ALG, QEMU_CAPS_DEVICE); DO_TEST("graphics-vnc", false, NONE); DO_TEST("graphics-vnc-socket", false, NONE); -- 1.7.5.rc3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list