In order for QEMU vCPU (and other) threads to run with RT scheduler, libvirt needs to take care of that so QEMU doesn't have to run privileged. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986 Signed-off-by: Martin Kletzander <mkletzan@xxxxxxxxxx> --- docs/formatdomain.html.in | 13 +++ docs/schemas/domaincommon.rng | 33 ++++++ src/conf/domain_conf.c | 137 +++++++++++++++++++++++- src/conf/domain_conf.h | 24 +++++ tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 4 + 5 files changed, 210 insertions(+), 1 deletion(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index d8c31e0..60dad76 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -550,6 +550,8 @@ <quota>-1</quota> <emulator_period>1000000</emulator_period> <emulator_quota>-1</emulator_quota> + <vcpusched scheduler='fifo' priority='1'/> + <iothreadsched scheduler='idle'/> </cputune> ... </domain> @@ -656,6 +658,17 @@ <span class="since">Only QEMU driver support since 0.10.0</span> </dd> + <dt><code>vcpusched</code> and <code>iothreadsched</code></dt> + <dd> + The optional <code>vcpusched</code> element specifies the scheduler + (values <code>batch</code>, <code>idle</code>, <code>fifo</code>, + <code>rr</code> and <code>other</code>, which is default and ignored) + for all vCPU/IOThread threads. For real-time schedulers + (<code>fifo</code>, <code>rr</code>), priority can be specified as + well. The value range for the priority depends on the host kernel. + <span class="since">Since 1.2.12</span> + </dd> + </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d250e6f..6653737 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -815,10 +815,43 @@ </attribute> </element> </zeroOrMore> + <zeroOrMore> + <element name="vcpusched"> + <attribute name="vcpu"> + <ref name="vcpuid"/> + </attribute> + <ref name="schedparam"/> + </element> + </zeroOrMore> + <zeroOrMore> + <element name="iothreadsched"> + <attribute name="iothread"> + <ref name="unsignedInt"/> + </attribute> + <ref name="schedparam"/> + </element> + </zeroOrMore> </interleave> </element> </define> + <define name="schedparam"> + <attribute name="scheduler"> + <choice> + <value>other</value> + <value>batch</value> + <value>idle</value> + <value>fifo</value> + <value>rr</value> + </choice> + </attribute> + <optional> + <attribute name="priority"> + <ref name="unsignedShort"/> + </attribute> + </optional> + </define> + <!-- All the NUMA related tunables would go in the numatune --> <define name="numatune"> <element name="numatune"> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 96d80a9..16adbf1 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -795,6 +795,13 @@ VIR_ENUM_IMPL(virDomainLoader, "rom", "pflash") +VIR_ENUM_IMPL(virDomainThreadSched, VIR_DOMAIN_THREAD_SCHED_LAST, + "other", /* default */ + "batch", + "idle", + "fifo", + "rr") + /* Internal mapping: subset of block job types that can be present in * <mirror> XML (remaining types are not two-phase). */ VIR_ENUM_DECL(virDomainBlockJob) @@ -2260,6 +2267,9 @@ void virDomainDefFree(virDomainDefPtr def) virDomainVcpuPinDefArrayFree(def->cputune.iothreadspin, def->cputune.niothreadspin); + VIR_FREE(def->cputune.vcpusched); + VIR_FREE(def->cputune.iothreadsched); + virDomainNumatuneFree(def->numatune); virSysinfoDefFree(def->sysinfo); @@ -12653,6 +12663,65 @@ virDomainLoaderDefParseXML(xmlNodePtr node, return ret; } +static int +virDomainThreadSchedParse(xmlNodePtr node, + unsigned int maxid, + const char *name, + virDomainThreadSchedParamPtr sp) +{ + char *tmp = NULL; + int sched = 0; + + tmp = virXMLPropString(node, name); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid value for attribute %s"), name); + goto error; + } + + if (virStrToLong_uip(tmp, NULL, 10, &sp->id) < 0 || + sp->id >= maxid) { + virReportError(VIR_ERR_XML_ERROR, + _("vcpu must be positive integer smaller than " + "maximum of vcpus, not '%s'"), + tmp); + goto error; + } + VIR_FREE(tmp); + + tmp = virXMLPropString(node, "scheduler"); + if (tmp) { + if ((sched = virDomainThreadSchedTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("invalid vcpu scheduler: '%s'"), tmp); + goto error; + } + sp->scheduler = sched; + + VIR_FREE(tmp); + if (sp->scheduler >= VIR_DOMAIN_THREAD_SCHED_FIFO) { + tmp = virXMLPropString(node, "priority"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("missing scheduler priority")); + goto error; + } + if (virStrToLong_i(tmp, NULL, 10, &sp->priority) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("invalid value for element priority")); + goto error; + } + VIR_FREE(tmp); + } + } + + return 0; + + error: + VIR_FREE(tmp); + return -1; +} + static virDomainDefPtr virDomainDefParseXML(xmlDocPtr xml, xmlNodePtr root, @@ -13201,6 +13270,51 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes); + if ((n = virXPathNodeSet("./cputune/vcpusched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract vcpusched nodes")); + goto error; + } + if (n) { + if (n > def->maxvcpus) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many vcpusched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.vcpusched, n) < 0) + goto error; + def->cputune.nvcpusched = n; + + for (i = 0; i < def->cputune.nvcpusched; i++) { + if (virDomainThreadSchedParse(nodes[i], def->maxvcpus, "vcpu", + &def->cputune.vcpusched[i]) < 0) + goto error; + } + } + + if ((n = virXPathNodeSet("./cputune/iothreadsched", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract iothreadsched nodes")); + goto error; + } + if (n) { + if (n > def->iothreads) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("too many iothreadsched nodes in cputune")); + goto error; + } + + if (VIR_ALLOC_N(def->cputune.iothreadsched, n) < 0) + goto error; + def->cputune.niothreadsched = n; + + for (i = 0; i < def->cputune.niothreadsched; i++) { + if (virDomainThreadSchedParse(nodes[i], def->iothreads + 1, "iothread", + &def->cputune.iothreadsched[i]) < 0) + goto error; + } + } /* analysis of cpu handling */ if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { @@ -19519,7 +19633,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, def->cputune.period || def->cputune.quota || def->cputune.emulatorpin || def->cputune.emulator_period || def->cputune.emulator_quota || - def->cputune.niothreadspin) { + def->cputune.niothreadspin || + def->cputune.vcpusched || def->cputune.iothreadsched) { virBufferAddLit(buf, "<cputune>\n"); cputune = true; } @@ -19592,6 +19707,26 @@ virDomainDefFormatInternal(virDomainDefPtr def, VIR_FREE(cpumask); } + for (i = 0; i < def->cputune.nvcpusched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.vcpusched[i]; + virBufferAsprintf(buf, "<vcpusched vcpu='%d' scheduler='%s'", + sp->id, + virDomainThreadSchedTypeToString(sp->scheduler)); + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + + for (i = 0; i < def->cputune.niothreadsched; i++) { + virDomainThreadSchedParamPtr sp = &def->cputune.iothreadsched[i]; + virBufferAsprintf(buf, "<iothreadsched iothread='%d' scheduler='%s'", + sp->id, + virDomainThreadSchedTypeToString(sp->scheduler)); + if (sp->priority) + virBufferAsprintf(buf, " priority='%d'", sp->priority); + virBufferAddLit(buf, "/>\n"); + } + virBufferAdjustIndent(buf, -2); if (cputune) virBufferAddLit(buf, "</cputune>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5cc42d1..636e2d1 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1815,6 +1815,24 @@ typedef enum { VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST } virDomainCpuPlacementMode; +typedef enum { + VIR_DOMAIN_THREAD_SCHED_OTHER = 0, + VIR_DOMAIN_THREAD_SCHED_BATCH, + VIR_DOMAIN_THREAD_SCHED_IDLE, + VIR_DOMAIN_THREAD_SCHED_FIFO, + VIR_DOMAIN_THREAD_SCHED_RR, + + VIR_DOMAIN_THREAD_SCHED_LAST +} virDomainThreadSched; + +typedef struct _virDomainThreadSchedParam virDomainThreadSchedParam; +typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr; +struct _virDomainThreadSchedParam { + unsigned int id; + virDomainThreadSched scheduler; + int priority; +}; + typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr; struct _virDomainTimerCatchupDef { @@ -2002,6 +2020,11 @@ struct _virDomainCputune { virDomainVcpuPinDefPtr emulatorpin; size_t niothreadspin; virDomainVcpuPinDefPtr *iothreadspin; + + size_t nvcpusched; + virDomainThreadSchedParamPtr vcpusched; + size_t niothreadsched; + virDomainThreadSchedParamPtr iothreadsched; }; typedef struct _virDomainBlkiotune virDomainBlkiotune; @@ -2807,6 +2830,7 @@ VIR_ENUM_DECL(virDomainRNGModel) VIR_ENUM_DECL(virDomainRNGBackend) VIR_ENUM_DECL(virDomainTPMModel) VIR_ENUM_DECL(virDomainTPMBackend) +VIR_ENUM_DECL(virDomainThreadSched) /* from libvirt.h */ VIR_ENUM_DECL(virDomainState) VIR_ENUM_DECL(virDomainNostateReason) diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml index 813d201..243092b 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml @@ -4,6 +4,7 @@ <memory unit='KiB'>219136</memory> <currentMemory unit='KiB'>219136</currentMemory> <vcpu placement='static'>2</vcpu> + <iothreads>1</iothreads> <cputune> <shares>2048</shares> <period>1000000</period> @@ -11,6 +12,9 @@ <vcpupin vcpu='0' cpuset='0'/> <vcpupin vcpu='1' cpuset='1'/> <emulatorpin cpuset='1'/> + <vcpusched vcpu='0' scheduler='fifo' priority='1'/> + <vcpusched vcpu='1' scheduler='batch'/> + <iothreadsched iothread='0' scheduler='idle'/> </cputune> <os> <type arch='i686' machine='pc'>hvm</type> -- 2.2.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list