In order for QEMU vCPU (and other) threads to run with RT scheduler, libvirt needs to take care of that so QEMU doesn't have to run privileged. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986 Signed-off-by: Martin Kletzander <mkletzan@xxxxxxxxxx> --- docs/formatdomain.html.in | 16 ++ docs/schemas/domaincommon.rng | 44 +++++ src/conf/domain_conf.c | 183 ++++++++++++++++++++- src/conf/domain_conf.h | 24 +++ .../qemuxml2argv-cputune-iothreadsched-toomuch.xml | 38 +++++ .../qemuxml2argv-cputune-iothreadsched.xml | 39 +++++ .../qemuxml2argv-cputune-vcpusched-overlap.xml | 38 +++++ tests/qemuxml2argvtest.c | 2 + .../qemuxml2xmlout-cputune-iothreadsched.xml | 39 +++++ tests/qemuxml2xmltest.c | 1 + 10 files changed, 423 insertions(+), 1 deletion(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched-toomuch.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-vcpusched-overlap.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cputune-iothreadsched.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index d8144ea..3381dfe 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -550,6 +550,8 @@ <quota>-1</quota> <emulator_period>1000000</emulator_period> <emulator_quota>-1</emulator_quota> + <vcpusched vcpus='0-4,^3' scheduler='fifo' priority='1'/> + <iothreadsched iothreads='2' scheduler='batch'/> </cputune> ... </domain> @@ -652,6 +654,20 @@ <span class="since">Only QEMU driver support since 0.10.0</span> </dd> + <dt><code>vcpusched</code> and <code>iothreadsched</code></dt> + <dd> + The optional <code>vcpusched</code> elements specifie the scheduler + (values <code>batch</code>, <code>idle</code>, <code>fifo</code>, + <code>rr</code>) for particular vCPU/IOThread threads (based on + <code>vcpus</code> and <code>iothreads</code>, leaving out + <code>vcpus</code>/<code>iothreads</code> sets the default). + For real-time schedulers (<code>fifo</code>, <code>rr</code>), + priority must be specified as well (and is ignored for + non-real-time ones). The value range for the priority depends + on the host kernel (usually 1-99). + <span class="since">Since 1.2.12</span> + </dd> + </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d467dce..98766dc 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -815,10 +815,54 @@ </attribute> </element> </zeroOrMore> + <zeroOrMore> + <element name="vcpusched"> + <optional> + <attribute name="vcpus"> + <ref name='cpuset'/> + </attribute> + </optional> + <ref name="schedparam"/> + </element> + </zeroOrMore> + <zeroOrMore> + <element name="iothreadsched"> + <optional> + <attribute name="iothreads"> + <ref name='cpuset'/> + </attribute> + </optional> + <ref name="schedparam"/> + </element> + </zeroOrMore> </interleave> </element> </define> + <define name="schedparam"> + <choice> + <group> + <attribute name="scheduler"> + <choice> + <value>batch</value> + <value>idle</value> + </choice> + </attribute> + </group> + <group> + <attribute name="scheduler"> + <choice> + <value>fifo</value> + <value>rr</value> + </choice> + </attribute> + <attribute name="priority"> + <ref name="unsignedShort"/> + </attribute> + </group> + </choice> + </define> + <!-- All the NUMA related tunables would go in the numatune --> <define name="numatune"> <element name="numatune"> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 77319dc..3fb68b3 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -772,6 +772,13 @@ VIR_ENUM_IMPL(virDomainLoader, "rom", "pflash") +VIR_ENUM_IMPL(virDomainThreadSched, VIR_DOMAIN_THREAD_SCHED_LAST, + "other", /* default */ + "batch", + "idle", + "fifo", + "rr") + /* Internal mapping: subset of block job types that can be present in * <mirror> XML (remaining types are not two-phase). */ VIR_ENUM_DECL(virDomainBlockJob) @@ -2234,6 +2241,14 @@ void virDomainDefFree(virDomainDefPtr def) virDomainVcpuPinDefArrayFree(def->cputune.iothreadspin, def->cputune.niothreadspin); + for (i = 0; i < def->cputune.nvcpusched; i++) + virBitmapFree(def->cputune.vcpusched[i].ids); + VIR_FREE(def->cputune.vcpusched); + + for (i = 0; i < def->cputune.niothreadsched; i++) + virBitmapFree(def->cputune.iothreadsched[i].ids); + VIR_FREE(def->cputune.iothreadsched); + virDomainNumatuneFree(def->numatune); virSysinfoDefFree(def->sysinfo); @@ -12595,6 +12610,70 @@ virDomainLoaderDefParseXML(xmlNodePtr node, return ret; } +static int +virDomainThreadSchedParse(xmlNodePtr node, + unsigned int minid, + unsigned int maxid, + const char *name, + virDomainThreadSchedParamPtr sp) +{ + char *tmp = NULL; + int sched = 0; + + tmp = virXMLPropString(node, name); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, + _("Missing attribute '%s' in element '%sched'"), + name, name); + goto error; + } + + if (!virBitmapParse(tmp, 0, &sp->ids, + VIR_DOMAIN_CPUMASK_LEN) || + virBitmapIsAllClear(sp->ids) || + virBitmapNextSetBit(sp->ids, -1) < minid || + virBitmapLastSetBit(sp->ids) > maxid) { + + virReportError(VIR_ERR_XML_ERROR, + _("Invalid value of '%s': %s"), + name, tmp); + goto error; + } + VIR_FREE(tmp); + + tmp = virXMLPropString(node, "scheduler"); + if (tmp) { + if ((sched = virDomainThreadSchedTypeFromString(tmp)) <= 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Invalid scheduler attribute: '%s'"), + tmp); + goto error; + } + sp->scheduler = sched; + + VIR_FREE(tmp); + if (sp->scheduler >= VIR_DOMAIN_THREAD_SCHED_FIFO) { + tmp = virXMLPropString(node, "priority"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing scheduler priority")); + goto error; + } + if (virStrToLong_i(tmp, NULL, 10, &sp->priority) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid value for element priority")); + goto error; + } + VIR_FREE(tmp); + } + } + + return 0; + + error: + VIR_FREE(tmp); + return -1; +} static virDomainDefPtr virDomainDefParseXML(xmlDocPtr xml, @@ -13139,6 +13218,77 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes); + if ((n = virXPathNodeSet("./cputune/vcpusched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract vcpusched nodes")); + goto error; + } + if (n) { + if (n > def->maxvcpus) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many vcpusched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.vcpusched, n) < 0) + goto error; + def->cputune.nvcpusched = n; + + for (i = 0; i < def->cputune.nvcpusched; i++) { + if (virDomainThreadSchedParse(nodes[i], + 0, def->maxvcpus - 1, + "vcpus", + &def->cputune.vcpusched[i]) < 0) + goto error; + + for (j = 0; j < i; j++) { + if (virBitmapOverlaps(def->cputune.vcpusched[i].ids, + def->cputune.vcpusched[j].ids)) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("vcpusched attributes 'vcpus' " + "must not overlap")); + goto error; + } + } + } + } + VIR_FREE(nodes); + + if ((n = virXPathNodeSet("./cputune/iothreadsched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract iothreadsched nodes")); + goto error; + } + if (n) { + if (n > def->iothreads) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many iothreadsched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.iothreadsched, n) < 0) + goto error; + def->cputune.niothreadsched = n; + + for (i = 0; i < def->cputune.niothreadsched; i++) { + if (virDomainThreadSchedParse(nodes[i], + 1, def->iothreads, + "iothreads", + &def->cputune.iothreadsched[i]) < 0) + goto error; + + for (j = 0; j < i; j++) { + if (virBitmapOverlaps(def->cputune.iothreadsched[i].ids, + def->cputune.iothreadsched[j].ids)) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("iothreadsched attributes 'iothreads' " + "must not overlap")); + goto error; + } + } + } + } + VIR_FREE(nodes); /* analysis of cpu handling */ if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { @@ -19434,7 +19584,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, def->cputune.period || def->cputune.quota || def->cputune.emulatorpin || def->cputune.emulator_period || def->cputune.emulator_quota || - def->cputune.niothreadspin) { + def->cputune.niothreadspin || + def->cputune.vcpusched || def->cputune.iothreadsched) { virBufferAddLit(buf, "<cputune>\n"); cputune = true; } @@ -19503,6 +19654,36 @@ virDomainDefFormatInternal(virDomainDefPtr def, VIR_FREE(cpumask); } + for (i = 0; i < def->cputune.nvcpusched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.vcpusched[i]; + char *ids = NULL; + + if (!(ids = virBitmapFormat(sp->ids))) + goto error; + virBufferAsprintf(buf, "<vcpusched vcpus='%s' scheduler='%s'", + ids, virDomainThreadSchedTypeToString(sp->scheduler)); + VIR_FREE(ids); + + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + + for (i = 0; i < def->cputune.niothreadsched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.iothreadsched[i]; + char *ids = NULL; + + if (!(ids = virBitmapFormat(sp->ids))) + goto error; + virBufferAsprintf(buf, "<iothreadsched iothreads='%s' scheduler='%s'", + ids, virDomainThreadSchedTypeToString(sp->scheduler)); + VIR_FREE(ids); + + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + virBufferAdjustIndent(buf, -2); if (cputune) virBufferAddLit(buf, "</cputune>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 93f2314..6be70bf 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1810,6 +1810,24 @@ typedef enum { VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST } virDomainCpuPlacementMode; +typedef enum { + VIR_DOMAIN_THREAD_SCHED_OTHER = 0, + VIR_DOMAIN_THREAD_SCHED_BATCH, + VIR_DOMAIN_THREAD_SCHED_IDLE, + VIR_DOMAIN_THREAD_SCHED_FIFO, + VIR_DOMAIN_THREAD_SCHED_RR, + + VIR_DOMAIN_THREAD_SCHED_LAST +} virDomainThreadSched; + +typedef struct _virDomainThreadSchedParam virDomainThreadSchedParam; +typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr; +struct _virDomainThreadSchedParam { + virBitmapPtr ids; + virDomainThreadSched scheduler; + int priority; +}; + typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr; struct _virDomainTimerCatchupDef { @@ -1997,6 +2015,11 @@ struct _virDomainCputune { virDomainVcpuPinDefPtr emulatorpin; size_t niothreadspin; virDomainVcpuPinDefPtr *iothreadspin; + + size_t nvcpusched; + virDomainThreadSchedParamPtr vcpusched; + size_t niothreadsched; + virDomainThreadSchedParamPtr iothreadsched; }; typedef struct _virDomainBlkiotune virDomainBlkiotune; @@ -2844,6 +2867,7 @@ VIR_ENUM_DECL(virDomainRNGModel) VIR_ENUM_DECL(virDomainRNGBackend) VIR_ENUM_DECL(virDomainTPMModel) VIR_ENUM_DECL(virDomainTPMBackend) +VIR_ENUM_DECL(virDomainThreadSched) /* from libvirt.h */ VIR_ENUM_DECL(virDomainState) VIR_ENUM_DECL(virDomainNostateReason) diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched-toomuch.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched-toomuch.xml new file mode 100644 index 0000000..1540969 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched-toomuch.xml @@ -0,0 +1,38 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219136</memory> + <currentMemory unit='KiB'>219136</currentMemory> + <vcpu placement='static'>2</vcpu> + <iothreads>1</iothreads> + <cputune> + <shares>2048</shares> + <period>1000000</period> + <quota>-1</quota> + <vcpupin vcpu='0' cpuset='0'/> + <vcpupin vcpu='1' cpuset='1'/> + <emulatorpin cpuset='1'/> + <vcpusched vcpus='0-1' scheduler='fifo' priority='1'/> + <iothreadsched iothreads='2' scheduler='batch'/> + </cputune> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'/> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched.xml new file mode 100644 index 0000000..0a3ffc7 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched.xml @@ -0,0 +1,39 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219136</memory> + <currentMemory unit='KiB'>219136</currentMemory> + <vcpu placement='static'>2</vcpu> + <iothreads>4</iothreads> + <cputune> + <shares>2048</shares> + <period>1000000</period> + <quota>-1</quota> + <vcpupin vcpu='0' cpuset='0'/> + <vcpupin vcpu='1' cpuset='1'/> + <emulatorpin cpuset='1'/> + <vcpusched vcpus='0-1' scheduler='fifo' priority='1'/> + <iothreadsched iothreads='1-3,^2' scheduler='batch'/> + <iothreadsched iothreads='2' scheduler='batch'/> + </cputune> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'/> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune-vcpusched-overlap.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune-vcpusched-overlap.xml new file mode 100644 index 0000000..cbbe7dc --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune-vcpusched-overlap.xml @@ -0,0 +1,38 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219136</memory> + <currentMemory unit='KiB'>219136</currentMemory> + <vcpu placement='static'>4</vcpu> + <iothreads>1</iothreads> + <cputune> + <shares>2048</shares> + <period>1000000</period> + <quota>-1</quota> + <vcpupin vcpu='0' cpuset='0'/> + <vcpupin vcpu='1' cpuset='1'/> + <emulatorpin cpuset='1'/> + <vcpusched vcpus='0-1' scheduler='fifo' priority='1'/> + <vcpusched vcpus='1-2' scheduler='fifo' priority='1'/> + </cputune> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'/> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index f864c2a..253428a 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -1253,6 +1253,8 @@ mymain(void) DO_TEST("blkiotune-device", QEMU_CAPS_NAME); DO_TEST("cputune", QEMU_CAPS_NAME); DO_TEST("cputune-zero-shares", QEMU_CAPS_NAME); + DO_TEST_PARSE_ERROR("cputune-iothreadsched-toomuch", QEMU_CAPS_NAME); + DO_TEST_PARSE_ERROR("cputune-vcpusched-overlap", QEMU_CAPS_NAME); DO_TEST("numatune-memory", NONE); DO_TEST_PARSE_ERROR("numatune-memory-invalid-nodeset", NONE); diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-cputune-iothreadsched.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-cputune-iothreadsched.xml new file mode 100644 index 0000000..9f61336 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-cputune-iothreadsched.xml @@ -0,0 +1,39 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219136</memory> + <currentMemory unit='KiB'>219136</currentMemory> + <vcpu placement='static'>2</vcpu> + <iothreads>4</iothreads> + <cputune> + <shares>2048</shares> + <period>1000000</period> + <quota>-1</quota> + <vcpupin vcpu='0' cpuset='0'/> + <vcpupin vcpu='1' cpuset='1'/> + <emulatorpin cpuset='1'/> + <vcpusched vcpus='0-1' scheduler='fifo' priority='1'/> + <iothreadsched iothreads='1,3' scheduler='batch'/> + <iothreadsched iothreads='2' scheduler='batch'/> + </cputune> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'/> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index d3dfd9e..915ef54 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -310,6 +310,7 @@ mymain(void) DO_TEST("blkiotune-device"); DO_TEST("cputune"); DO_TEST("cputune-zero-shares"); + DO_TEST_DIFFERENT("cputune-iothreadsched"); DO_TEST("cputune-numatune"); DO_TEST("vcpu-placement-static"); -- 2.3.0 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list