On 10.02.2015 16:35, Martin Kletzander wrote: > In order for QEMU vCPU (and other) threads to run with RT scheduler, > libvirt needs to take care of that so QEMU doesn't have to run privileged. > > Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1178986 > > Signed-off-by: Martin Kletzander <mkletzan@xxxxxxxxxx> > --- > docs/formatdomain.html.in | 16 ++ > docs/schemas/domaincommon.rng | 44 +++++ > src/conf/domain_conf.c | 183 ++++++++++++++++++++- > src/conf/domain_conf.h | 24 +++ > .../qemuxml2argv-cputune-iothreadsched-toomuch.xml | 38 +++++ > .../qemuxml2argv-cputune-iothreadsched.xml | 39 +++++ > .../qemuxml2argv-cputune-vcpusched-overlap.xml | 38 +++++ > tests/qemuxml2argvtest.c | 2 + > .../qemuxml2xmlout-cputune-iothreadsched.xml | 39 +++++ > tests/qemuxml2xmltest.c | 1 + > 10 files changed, 423 insertions(+), 1 deletion(-) > create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched-toomuch.xml > create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-iothreadsched.xml > create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cputune-vcpusched-overlap.xml > create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cputune-iothreadsched.xml > > diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in > index d8144ea..3381dfe 100644 > --- a/docs/formatdomain.html.in > +++ b/docs/formatdomain.html.in > @@ -550,6 +550,8 @@ > <quota>-1</quota> > <emulator_period>1000000</emulator_period> > <emulator_quota>-1</emulator_quota> > + <vcpusched vcpus='0-4,^3' scheduler='fifo' priority='1'/> > + <iothreadsched iothreads='2' scheduler='batch'/> > </cputune> > ... > </domain> > @@ -652,6 +654,20 @@ > <span class="since">Only QEMU driver support since 0.10.0</span> > </dd> > > + <dt><code>vcpusched</code> and <code>iothreadsched</code></dt> > + <dd> > + The optional <code>vcpusched</code> elements specifie the scheduler s/specifie/specifies/ And maybe s/scheduler/scheduler type/? > + (values <code>batch</code>, <code>idle</code>, <code>fifo</code>, > + <code>rr</code>) for particular vCPU/IOThread threads (based on > + <code>vcpus</code> and <code>iothreads</code>, leaving out > + <code>vcpus</code>/<code>iothreads</code> sets the default). > + For real-time schedulers (<code>fifo</code>, <code>rr</code>), > + priority must be specified as well (and is ignored for > + non-real-time ones). The value range for the priority depends > + on the host kernel (usually 1-99). > + <span class="since">Since 1.2.12</span> > + </dd> > + > </dl> > > > diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng > index d467dce..98766dc 100644 > --- a/docs/schemas/domaincommon.rng > +++ b/docs/schemas/domaincommon.rng > @@ -815,10 +815,54 @@ > </attribute> > </element> > </zeroOrMore> > + <zeroOrMore> > + <element name="vcpusched"> > + <optional> > + <attribute name="vcpus"> > + <ref name='cpuset'/> > + </attribute> > + </optional> > + <ref name="schedparam"/> > + </element> > + </zeroOrMore> > + <zeroOrMore> > + <element name="iothreadsched"> > + <optional> > + <attribute name="iothreads"> > + <ref name='cpuset'/> > + </attribute> > + </optional> > + <ref name="schedparam"/> > + </element> > + </zeroOrMore> > </interleave> > </element> > </define> > > + <define name="schedparam"> > + <choice> > + <group> > + <attribute name="scheduler"> > + <choice> > + <value>batch</value> > + <value>idle</value> > + </choice> > + </attribute> > + </group> > + <group> > + <attribute name="scheduler"> > + <choice> > + <value>fifo</value> > + <value>rr</value> > + </choice> > + </attribute> > + <attribute name="priority"> > + <ref name="unsignedShort"/> > + </attribute> > + </group> > + </choice> > + </define> > + Have we returned to the rule where each new XML extension requires ACK from at least one of Daniels? > <!-- All the NUMA related tunables would go in the numatune --> > <define name="numatune"> > <element name="numatune"> > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c > index 77319dc..3fb68b3 100644 > --- a/src/conf/domain_conf.c > +++ b/src/conf/domain_conf.c > @@ -772,6 +772,13 @@ VIR_ENUM_IMPL(virDomainLoader, > "rom", > "pflash") > > +VIR_ENUM_IMPL(virDomainThreadSched, VIR_DOMAIN_THREAD_SCHED_LAST, > + "other", /* default */ > + "batch", > + "idle", > + "fifo", > + "rr") > + > /* Internal mapping: subset of block job types that can be present in > * <mirror> XML (remaining types are not two-phase). */ > VIR_ENUM_DECL(virDomainBlockJob) > @@ -2234,6 +2241,14 @@ void virDomainDefFree(virDomainDefPtr def) > virDomainVcpuPinDefArrayFree(def->cputune.iothreadspin, > def->cputune.niothreadspin); > > + for (i = 0; i < def->cputune.nvcpusched; i++) > + virBitmapFree(def->cputune.vcpusched[i].ids); > + VIR_FREE(def->cputune.vcpusched); > + > + for (i = 0; i < def->cputune.niothreadsched; i++) > + virBitmapFree(def->cputune.iothreadsched[i].ids); > + VIR_FREE(def->cputune.iothreadsched); > + > virDomainNumatuneFree(def->numatune); > > virSysinfoDefFree(def->sysinfo); > @@ -12595,6 +12610,70 @@ virDomainLoaderDefParseXML(xmlNodePtr node, > return ret; > } > > +static int > +virDomainThreadSchedParse(xmlNodePtr node, > + unsigned int minid, > + unsigned int maxid, > + const char *name, > + virDomainThreadSchedParamPtr sp) > +{ > + char *tmp = NULL; > + int sched = 0; > + > + tmp = virXMLPropString(node, name); > + if (!tmp) { > + virReportError(VIR_ERR_XML_ERROR, > + _("Missing attribute '%s' in element '%sched'"), > + name, name); > + goto error; > + } > + > + if (!virBitmapParse(tmp, 0, &sp->ids, > + VIR_DOMAIN_CPUMASK_LEN) || > + virBitmapIsAllClear(sp->ids) || > + virBitmapNextSetBit(sp->ids, -1) < minid || > + virBitmapLastSetBit(sp->ids) > maxid) { > + > + virReportError(VIR_ERR_XML_ERROR, > + _("Invalid value of '%s': %s"), > + name, tmp); > + goto error; > + } > + VIR_FREE(tmp); > + > + tmp = virXMLPropString(node, "scheduler"); > + if (tmp) { > + if ((sched = virDomainThreadSchedTypeFromString(tmp)) <= 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Invalid scheduler attribute: '%s'"), > + tmp); > + goto error; > + } > + sp->scheduler = sched; > + > + VIR_FREE(tmp); > + if (sp->scheduler >= VIR_DOMAIN_THREAD_SCHED_FIFO) { > + tmp = virXMLPropString(node, "priority"); > + if (!tmp) { > + virReportError(VIR_ERR_XML_ERROR, "%s", > + _("Missing scheduler priority")); > + goto error; > + } > + if (virStrToLong_i(tmp, NULL, 10, &sp->priority) < 0) { > + virReportError(VIR_ERR_XML_ERROR, "%s", > + _("Invalid value for element priority")); > + goto error; > + } > + VIR_FREE(tmp); > + } > + } > + > + return 0; > + > + error: > + VIR_FREE(tmp); > + return -1; > +} > > static virDomainDefPtr > virDomainDefParseXML(xmlDocPtr xml, > @@ -13139,6 +13218,77 @@ virDomainDefParseXML(xmlDocPtr xml, > } > VIR_FREE(nodes); > > + if ((n = virXPathNodeSet("./cputune/vcpusched", ctxt, &nodes)) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("cannot extract vcpusched nodes")); > + goto error; > + } > + if (n) { > + if (n > def->maxvcpus) { > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("too many vcpusched nodes in cputune")); > + goto error; > + } > + > + if (VIR_ALLOC_N(def->cputune.vcpusched, n) < 0) > + goto error; > + def->cputune.nvcpusched = n; > + > + for (i = 0; i < def->cputune.nvcpusched; i++) { > + if (virDomainThreadSchedParse(nodes[i], > + 0, def->maxvcpus - 1, > + "vcpus", > + &def->cputune.vcpusched[i]) < 0) > + goto error; > + > + for (j = 0; j < i; j++) { > + if (virBitmapOverlaps(def->cputune.vcpusched[i].ids, > + def->cputune.vcpusched[j].ids)) { > + virReportError(VIR_ERR_XML_DETAIL, "%s", > + _("vcpusched attributes 'vcpus' " > + "must not overlap")); > + goto error; > + } > + } > + } > + } > + VIR_FREE(nodes); > + > + if ((n = virXPathNodeSet("./cputune/iothreadsched", ctxt, &nodes)) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("cannot extract iothreadsched nodes")); > + goto error; > + } > + if (n) { > + if (n > def->iothreads) { > + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("too many iothreadsched nodes in cputune")); > + goto error; > + } > + > + if (VIR_ALLOC_N(def->cputune.iothreadsched, n) < 0) > + goto error; > + def->cputune.niothreadsched = n; > + > + for (i = 0; i < def->cputune.niothreadsched; i++) { > + if (virDomainThreadSchedParse(nodes[i], > + 1, def->iothreads, > + "iothreads", > + &def->cputune.iothreadsched[i]) < 0) > + goto error; > + > + for (j = 0; j < i; j++) { > + if (virBitmapOverlaps(def->cputune.iothreadsched[i].ids, > + def->cputune.iothreadsched[j].ids)) { > + virReportError(VIR_ERR_XML_DETAIL, "%s", > + _("iothreadsched attributes 'iothreads' " > + "must not overlap")); > + goto error; > + } > + } > + } > + } > + VIR_FREE(nodes); Once parsed, do we also need a check in virDomainDefCheckABIStability()? Or can the sched parameters change on migration? I guess they can, but seems like you dig more into this area than me recently. > > /* analysis of cpu handling */ > if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { > @@ -19434,7 +19584,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, > def->cputune.period || def->cputune.quota || > def->cputune.emulatorpin || > def->cputune.emulator_period || def->cputune.emulator_quota || > - def->cputune.niothreadspin) { > + def->cputune.niothreadspin || > + def->cputune.vcpusched || def->cputune.iothreadsched) { > virBufferAddLit(buf, "<cputune>\n"); > cputune = true; > } > @@ -19503,6 +19654,36 @@ virDomainDefFormatInternal(virDomainDefPtr def, > VIR_FREE(cpumask); > } > > + for (i = 0; i < def->cputune.nvcpusched; i++) { > + virDomainThreadSchedParamPtr sp = &def->cputune.vcpusched[i]; > + char *ids = NULL; > + > + if (!(ids = virBitmapFormat(sp->ids))) > + goto error; > + virBufferAsprintf(buf, "<vcpusched vcpus='%s' scheduler='%s'", > + ids, virDomainThreadSchedTypeToString(sp->scheduler)); > + VIR_FREE(ids); > + > + if (sp->priority) > + virBufferAsprintf(buf, " priority='%d'", sp->priority); > + virBufferAddLit(buf, "/>\n"); > + } > + > + for (i = 0; i < def->cputune.niothreadsched; i++) { > + virDomainThreadSchedParamPtr sp = &def->cputune.iothreadsched[i]; > + char *ids = NULL; > + > + if (!(ids = virBitmapFormat(sp->ids))) > + goto error; > + virBufferAsprintf(buf, "<iothreadsched iothreads='%s' scheduler='%s'", > + ids, virDomainThreadSchedTypeToString(sp->scheduler)); > + VIR_FREE(ids); > + > + if (sp->priority) > + virBufferAsprintf(buf, " priority='%d'", sp->priority); > + virBufferAddLit(buf, "/>\n"); > + } > + > virBufferAdjustIndent(buf, -2); > if (cputune) > virBufferAddLit(buf, "</cputune>\n"); > diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h > index 93f2314..6be70bf 100644 > --- a/src/conf/domain_conf.h > +++ b/src/conf/domain_conf.h > @@ -1810,6 +1810,24 @@ typedef enum { > VIR_DOMAIN_CPU_PLACEMENT_MODE_LAST > } virDomainCpuPlacementMode; > > +typedef enum { > + VIR_DOMAIN_THREAD_SCHED_OTHER = 0, > + VIR_DOMAIN_THREAD_SCHED_BATCH, > + VIR_DOMAIN_THREAD_SCHED_IDLE, > + VIR_DOMAIN_THREAD_SCHED_FIFO, > + VIR_DOMAIN_THREAD_SCHED_RR, > + > + VIR_DOMAIN_THREAD_SCHED_LAST > +} virDomainThreadSched; > + > +typedef struct _virDomainThreadSchedParam virDomainThreadSchedParam; > +typedef virDomainThreadSchedParam *virDomainThreadSchedParamPtr; > +struct _virDomainThreadSchedParam { > + virBitmapPtr ids; > + virDomainThreadSched scheduler; > + int priority; > +}; > + > typedef struct _virDomainTimerCatchupDef virDomainTimerCatchupDef; > typedef virDomainTimerCatchupDef *virDomainTimerCatchupDefPtr; > struct _virDomainTimerCatchupDef { > @@ -1997,6 +2015,11 @@ struct _virDomainCputune { > virDomainVcpuPinDefPtr emulatorpin; > size_t niothreadspin; > virDomainVcpuPinDefPtr *iothreadspin; > + > + size_t nvcpusched; > + virDomainThreadSchedParamPtr vcpusched; > + size_t niothreadsched; > + virDomainThreadSchedParamPtr iothreadsched; > }; > > typedef struct _virDomainBlkiotune virDomainBlkiotune; > @@ -2844,6 +2867,7 @@ VIR_ENUM_DECL(virDomainRNGModel) > VIR_ENUM_DECL(virDomainRNGBackend) > VIR_ENUM_DECL(virDomainTPMModel) > VIR_ENUM_DECL(virDomainTPMBackend) > +VIR_ENUM_DECL(virDomainThreadSched) Yet another ENUM_DECL without corresponding libvirt_private.syms change. > /* from libvirt.h */ > VIR_ENUM_DECL(virDomainState) > VIR_ENUM_DECL(virDomainNostateReason) ACK with all of that fixed. Michal -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list