This adds per-device weights to <blkiotune>. Note that the cgroups implementation only supports weights per block device, and not per-file within the device; hence this option must be global to the domain definition rather than tied to individual <device>/<disk> entries: <domain ...> <blkiotune> <device> <path>/path/to/block</path> <weight>1000</weight> </device> </blkiotune> ... This patch also adds a parameter --device-weight to virsh command blkiotune for setting/getting blkiotune.weight_device for any hypervisor that supports it. All <device> entries under <blkiotune> are concatenated into a single string attribute under virDomain{Get,Set}BlkioParameters, named "device_weight". Signed-off-by: Hu Tao <hutao@xxxxxxxxxxxxxx> Signed-off-by: Eric Blake <eblake@xxxxxxxxxx> --- docs/formatdomain.html.in | 29 ++++++++- docs/schemas/domaincommon.rng | 12 ++++ include/libvirt/libvirt.h.in | 10 +++ src/conf/domain_conf.c | 138 ++++++++++++++++++++++++++++++++++++++++- src/conf/domain_conf.h | 17 +++++ src/libvirt_private.syms | 2 + tools/virsh.c | 44 ++++++++++++-- tools/virsh.pod | 6 ++- 8 files changed, 246 insertions(+), 12 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index cbad196..99c5add 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -505,6 +505,14 @@ ... <blkiotune> <weight>800</weight> + <device> + <path>/dev/sda</path> + <weight>1000</weight> + </device> + <device> + <path>/dev/sdb</path> + <weight>500</weight> + </device> </blkiotune> ... </domain> @@ -514,10 +522,25 @@ <dt><code>blkiotune</code></dt> <dd> The optional <code>blkiotune</code> element provides the ability to tune Blkio cgroup tunable parameters for the domain. If this is - omitted, it defaults to the OS provided defaults.</dd> + omitted, it defaults to the OS provided + defaults. <span class="since">Since 0.8.8</span></dd> <dt><code>weight</code></dt> - <dd> The optional <code>weight</code> element is the I/O weight of the - guest. The value should be in range [100, 1000].</dd> + <dd> The optional <code>weight</code> element is the overall I/O + weight of the guest. The value should be in the range [100, + 1000].</dd> + <dt><code>device</code></dt> + <dd>The domain may have multiple <code>device</code> elements + that further tune the weights for each host block device in + use by the domain. Note that + multiple <a href="#elementsDisks">guest disks</a> can share a + single host block device, if they are backed by files within + the same host file system, which is why this tuning parameter + is at the global domain level rather than associated with each + guest disk device. Each <code>device</code> element has two + mandatory sub-elements, <code>path</code> describing the + absolute path of the device, and <code>weight</code> giving + the relative weight of that device, in the range [100, + 1000]. <span class="since">Since 0.9.8</span></dd> </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index b6f858e..347af1a 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -328,6 +328,18 @@ <ref name="weight"/> </element> </optional> + <zeroOrMore> + <element name="device"> + <interleave> + <element name="path"> + <ref name="absFilePath"/> + </element> + <element name="weight"> + <ref name="weight"/> + </element> + </interleave> + </element> + </zeroOrMore> </element> </optional> diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 2ab89f5..0fd9302 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -1236,6 +1236,16 @@ char * virDomainGetSchedulerType(virDomainPtr domain, #define VIR_DOMAIN_BLKIO_WEIGHT "weight" +/** + * VIR_DOMAIN_BLKIO_DEVICE_WEIGHT: + * + * Macro for the blkio tunable weight_device: it represents the + * per-device weight, as a string. The string is parsed as a + * series of /path/to/device,weight elements, separated by ';'. + */ + +#define VIR_DOMAIN_BLKIO_DEVICE_WEIGHT "device_weight" + /* Set Blkio tunables for the domain*/ int virDomainSetBlkioParameters(virDomainPtr domain, virTypedParameterPtr params, diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a85f837..f0b324d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -603,6 +603,104 @@ VIR_ENUM_IMPL(virDomainStartupPolicy, VIR_DOMAIN_STARTUP_POLICY_LAST, #define VIR_DOMAIN_XML_WRITE_FLAGS VIR_DOMAIN_XML_SECURE #define VIR_DOMAIN_XML_READ_FLAGS VIR_DOMAIN_XML_INACTIVE + +void virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr deviceWeights, + int ndevices) +{ + int i; + + for (i = 0; i < ndevices; i++) + VIR_FREE(deviceWeights[i].path); +} + +/** + * virBlkioDeviceWeightToStr: + * + * This function returns a string representing device weights that is + * suitable for writing to /cgroup/blkio/blkio.weight_device, given + * a list of per-device weights. + */ +#if defined(major) && defined(minor) +int virBlkioDeviceWeightToStr(virBlkioDeviceWeightPtr devices, + int ndevices, + char **result) +{ + int i; + struct stat s; + virBuffer buf = VIR_BUFFER_INITIALIZER; + + for (i = 0; i < ndevices; i++) { + if (stat(devices[i].path, &s) == -1) + return -1; + if ((s.st_mode & S_IFMT) != S_IFBLK) + return -1; + virBufferAsprintf(&buf, "%d:%d %d\n", + major(s.st_rdev), + minor(s.st_rdev), + devices[i].weight); + } + + if (virBufferError(&buf)) { + virBufferFreeAndReset(&buf); + return -1; + } + + *result = virBufferContentAndReset(&buf); + return 0; +} +#else +int virBlkioDeviceWeightToStr(virBlkioDeviceWeightPtr devices ATTRIBUTE_UNUSED, + int ndevices ATTRIBUTE_UNUSED, + char **result ATTRIBUTE_UNUSED) +{ + return -1; +} +#endif + +/** + * virDomainBlkioDeviceWeightParseXML + * + * this function parses a XML node: + * + * <device> + * <path>/fully/qualified/device/path</path> + * <weight>weight</weight> + * </device> + * + * and fills a virBlkioDeviceWeight struct. + */ +static int virDomainBlkioDeviceWeightParseXML(xmlNodePtr root, + virBlkioDeviceWeightPtr dw) +{ + char *c; + xmlNodePtr node; + + if (!dw) + return -1; + + node = root->children; + while (node) { + if (node->type == XML_ELEMENT_NODE) { + if (xmlStrEqual(node->name, BAD_CAST "path")) { + dw->path = (char *)xmlNodeGetContent(node); + } else if (xmlStrEqual(node->name, BAD_CAST "weight")) { + c = (char *)xmlNodeGetContent(node); + if (virStrToLong_ui(c, NULL, 10, &dw->weight) < 0) { + VIR_FREE(c); + VIR_FREE(dw->path); + return -1; + } + VIR_FREE(c); + } + } + node = node->next; + } + + return 0; +} + + + static void virDomainObjListDataFree(void *payload, const void *name ATTRIBUTE_UNUSED) { @@ -1272,6 +1370,10 @@ void virDomainDefFree(virDomainDefPtr def) VIR_FREE(def->emulator); VIR_FREE(def->description); + virBlkioDeviceWeightArrayClear(def->blkio.devices, + def->blkio.ndevices); + VIR_FREE(def->blkio.devices); + virDomainWatchdogDefFree(def->watchdog); virDomainMemballoonDefFree(def->memballoon); @@ -6711,6 +6813,24 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, &def->blkio.weight) < 0) def->blkio.weight = 0; + n = virXPathNodeSet("./blkiotune/device", ctxt, &nodes); + if (n > 0) { + if (VIR_ALLOC_N(def->blkio.devices, n) < 0) { + virReportOOMError(); + goto error; + } + + for (i = 0; i < n; i++) { + if (virDomainBlkioDeviceWeightParseXML(nodes[i], + &def->blkio.devices[i]) < 0) { + virBlkioDeviceWeightArrayClear(def->blkio.devices, i); + goto error; + } + } + def->blkio.ndevices = n; + VIR_FREE(nodes); + } + /* Extract other memory tunables */ if (virXPathULong("string(./memtune/hard_limit)", ctxt, &def->mem.hard_limit) < 0) @@ -10849,10 +10969,22 @@ virDomainDefFormatInternal(virDomainDefPtr def, def->mem.cur_balloon); /* add blkiotune only if there are any */ - if (def->blkio.weight) { + if (def->blkio.weight || def->blkio.devices) { virBufferAddLit(buf, " <blkiotune>\n"); - virBufferAsprintf(buf, " <weight>%u</weight>\n", - def->blkio.weight); + + if (def->blkio.weight) + virBufferAsprintf(buf, " <weight>%u</weight>\n", + def->blkio.weight); + + for (n = 0; n < def->blkio.ndevices; n++) { + virBufferAddLit(buf, " <device>\n"); + virBufferEscapeString(buf, " <path>%s</path>\n", + def->blkio.devices[n].path); + virBufferAsprintf(buf, " <weight>%u</weight>\n", + def->blkio.devices[n].weight); + virBufferAddLit(buf, " </device>\n"); + } + virBufferAddLit(buf, " </blkiotune>\n"); } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5ebb441..7db2a11 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1361,6 +1361,20 @@ struct _virDomainNumatuneDef { /* Future NUMA tuning related stuff should go here. */ }; +typedef struct _virBlkioDeviceWeight virBlkioDeviceWeight; +typedef virBlkioDeviceWeight *virBlkioDeviceWeightPtr; +struct _virBlkioDeviceWeight { + char *path; + unsigned int weight; +}; + +void virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr deviceWeights, + int ndevices); +int virBlkioDeviceWeightToStr(virBlkioDeviceWeightPtr deviceWeights, + int ndevices, + char **result); + + /* * Guest VM main configuration * @@ -1378,6 +1392,9 @@ struct _virDomainDef { struct { unsigned int weight; + + size_t ndevices; + virBlkioDeviceWeightPtr devices; } blkio; struct { diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 2185294..f7d0fb2 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -231,6 +231,8 @@ virDomainAuditVcpu; # domain_conf.h +virBlkioDeviceWeightArrayClear; +virBlkioDeviceWeightToStr; virDiskNameToBusDeviceIndex; virDiskNameToIndex; virDomainActualNetDefFree; diff --git a/tools/virsh.c b/tools/virsh.c index 5544a41..fe87e9b 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -4648,6 +4648,8 @@ static const vshCmdOptDef opts_blkiotune[] = { {"domain", VSH_OT_DATA, VSH_OFLAG_REQ, N_("domain name, id or uuid")}, {"weight", VSH_OT_INT, VSH_OFLAG_NONE, N_("IO Weight in range [100, 1000]")}, + {"device-weight", VSH_OT_STRING, VSH_OFLAG_NONE, + N_("per-device IO Weights, in the form of /path/to/device,weight;...")}, {"config", VSH_OT_BOOL, 0, N_("affect next boot")}, {"live", VSH_OT_BOOL, 0, N_("affect running domain")}, {"current", VSH_OT_BOOL, 0, N_("affect current domain")}, @@ -4658,6 +4660,7 @@ static bool cmdBlkiotune(vshControl * ctl, const vshCmd * cmd) { virDomainPtr dom; + const char *device_weight = NULL; int weight = 0; int nparams = 0; int rv = 0; @@ -4702,6 +4705,16 @@ cmdBlkiotune(vshControl * ctl, const vshCmd * cmd) } } + rv = vshCommandOptString(cmd, "device-weight", &device_weight); + if (rv < 0) { + vshError(ctl, "%s", + _("Unable to parse string parameter")); + goto cleanup; + } + if (rv > 0) { + nparams++; + } + if (nparams == 0) { /* get the number of blkio parameters */ if (virDomainGetBlkioParameters(dom, NULL, &nparams, flags) != 0) { @@ -4749,6 +4762,10 @@ cmdBlkiotune(vshControl * ctl, const vshCmd * cmd) vshPrint(ctl, "%-15s: %d\n", params[i].field, params[i].value.b); break; + case VIR_TYPED_PARAM_STRING: + vshPrint(ctl, "%-15s: %s\n", params[i].field, + params[i].value.s); + break; default: vshPrint(ctl, "unimplemented blkio parameter type\n"); } @@ -4765,15 +4782,32 @@ cmdBlkiotune(vshControl * ctl, const vshCmd * cmd) if (weight) { temp->value.ui = weight; - strncpy(temp->field, VIR_DOMAIN_BLKIO_WEIGHT, - sizeof(temp->field)); + if (!virStrcpy(temp->field, VIR_DOMAIN_BLKIO_WEIGHT, + sizeof(temp->field))) { + virTypedParameterArrayClear(params, nparams); + ret = false; + goto cleanup; + } weight = 0; } + + if (device_weight) { + /* Safe to cast away const here. */ + temp->value.s = (char *)device_weight; + temp->type = VIR_TYPED_PARAM_STRING; + if (!virStrcpy(temp->field, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT, + sizeof(temp->field))) { + virTypedParameterArrayClear(params, nparams); + ret = false; + goto cleanup; + } + } } - if (virDomainSetBlkioParameters(dom, params, nparams, flags) != 0) + ret = true; + if (virDomainSetBlkioParameters(dom, params, nparams, flags) != 0) { vshError(ctl, "%s", _("Unable to change blkio parameters")); - else - ret = true; + ret = false; + } } cleanup: diff --git a/tools/virsh.pod b/tools/virsh.pod index 775d302..a117e84 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -1032,12 +1032,16 @@ value are kilobytes (i.e. blocks of 1024 bytes). Specifying -1 as a value for these limits is interpreted as unlimited. -=item B<blkiotune> I<domain-id> [I<--weight> B<weight>] [[I<--config>] +=item B<blkiotune> I<domain-id> [I<--weight> B<weight>] +[I<--device-weight> B<device-weights>] [[I<--config>] [I<--live>] | [I<--current>]] Display or set the blkio parameters. QEMU/KVM supports I<--weight>. I<--weight> is in range [100, 1000]. +B<device-weights> is a single string listing one or more device/weight +pairs, in the format of /path/to/device,weight;/path/to/device,weight. + If I<--live> is specified, affect a running guest. If I<--config> is specified, affect the next boot of a persistent guest. If I<--current> is specified, affect the current guest state. -- 1.7.3.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list