--- src/conf/domain_conf.c | 272 ++++++++++++++++++++++- src/conf/domain_conf.h | 25 ++ src/libvirt_private.syms | 4 + src/qemu/qemu_cgroup.c | 131 +++++++++++ src/qemu/qemu_cgroup.h | 2 + src/qemu/qemu_process.c | 4 + tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 2 + 7 files changed, 438 insertions(+), 2 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 60e0318..0a1f973 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -997,6 +997,21 @@ virDomainVcpuPinDefFree(virDomainVcpuPinDefPtr *def, VIR_FREE(def); } +static void +virDomainVcpuBWDefFree(virDomainVcpuBWDefPtr *def, + int nvcpubw) +{ + int i; + + if (!def || !nvcpubw) + return; + + for(i = 0; i < nvcpubw; i++) + VIR_FREE(def[i]); + + VIR_FREE(def); +} + void virDomainDefFree(virDomainDefPtr def) { unsigned int i; @@ -1089,6 +1104,9 @@ void virDomainDefFree(virDomainDefPtr def) virCPUDefFree(def->cpu); + virDomainVcpuBWDefFree(def->cputune.vcpubw, + def->cputune.nvcpubw); + virDomainVcpuPinDefFree(def->cputune.vcpupin, def->cputune.nvcpupin); VIR_FREE(def->numatune.memory.nodemask); @@ -5715,6 +5733,62 @@ error: goto cleanup; } +/* Parse the XML definition for a vcpubandwidth */ +static virDomainVcpuBWDefPtr +virDomainVcpuBWDefParseXML(const xmlNodePtr node, + xmlXPathContextPtr ctxt, + int maxvcpus) +{ + virDomainVcpuBWDefPtr def; + xmlNodePtr oldnode = ctxt->node; + unsigned int vcpuid; + unsigned long long period; + long long quota; + int ret; + + if (VIR_ALLOC(def) < 0) { + virReportOOMError(); + return NULL; + } + + ctxt->node = node; + + ret = virXPathUInt("string(./@vcpu)", ctxt, &vcpuid); + if (ret == -2) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("vcpu id must be an unsigned integer")); + goto error; + } else if (ret == -1) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("can't parse vcpupin node")); + goto error; + } + + if (vcpuid >= maxvcpus) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("vcpu id must be less than maxvcpus")); + goto error; + } + + if (virXPathULongLong("string(./@period)", ctxt, &period) < 0) + period = 0; + + if (virXPathLongLong("string(./@quota)", ctxt, "a) < 0) + quota = 0; + + def->vcpuid = vcpuid; + def->period = period; + def->quota = quota; + +cleanup: + ctxt->node = oldnode; + return def; + +error: + VIR_FREE(def); + goto cleanup; +} + static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, xmlDocPtr xml, @@ -5881,6 +5955,49 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, &def->cputune.shares) < 0) def->cputune.shares = 0; + if ((n = virXPathNodeSet("./cputune/bandwidth", ctxt, &nodes)) < 0) + goto error; + + if (n > def->maxvcpus) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("bandwith nodes must be less than" + " maxvcpus")); + goto error; + } + + if (n && VIR_ALLOC_N(def->cputune.vcpubw, n) < 0) + goto no_memory; + + for (i = 0; i < n; i++) { + virDomainVcpuBWDefPtr vcpubw = NULL; + vcpubw = virDomainVcpuBWDefParseXML(nodes[i], ctxt, def->maxvcpus); + + if (!vcpubw) + goto error; + + if (virDomainVcpuBWIsDuplicate(def->cputune.vcpubw, + def->cputune.nvcpubw, + vcpubw->vcpuid)) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("duplicate vcpubandwidth for same" + " vcpu")); + VIR_FREE(vcpubw); + goto error; + } + + if (vcpubw->period || vcpubw->quota) + def->cputune.vcpubw[def->cputune.nvcpubw++] = vcpubw; + else + VIR_FREE(vcpubw); + } + if (def->cputune.nvcpubw) + ignore_value(VIR_REALLOC_N(def->cputune.vcpubw, + def->cputune.nvcpubw)); + else + VIR_FREE(def->cputune.vcpubw); + + VIR_FREE(nodes); + if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) { goto error; } @@ -8274,6 +8391,144 @@ virDomainVcpuPinDel(virDomainDefPtr def, int vcpu) return 0; } +/* Check if vcpupin with same vcpuid already exists. + * Return 1 if exists, 0 if not. */ +int +virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def, + int nvcpubw, + int vcpu) +{ + int i; + + if (!def || !nvcpubw) + return 0; + + for (i = 0; i < nvcpubw; i++) { + if (def[i]->vcpuid == vcpu) + return 1; + } + + return 0; +} + +virDomainVcpuBWDefPtr +virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def, + int nvcpubw, + int vcpu) +{ + int i; + + if (!def || !nvcpubw) + return NULL; + + for (i = 0; i < nvcpubw; i++) { + if (def[i]->vcpuid == vcpu) + return def[i]; + } + + return NULL; +} + +int +virDomainVcpuBWAdd(virDomainDefPtr def, + unsigned long long period, + long long quota, + int vcpu) +{ + virDomainVcpuBWDefPtr *vcpubw_list = NULL; + virDomainVcpuBWDefPtr vcpubw = NULL; + + /* No vcpubw exists yet. */ + if (!def->cputune.nvcpubw) { + if (period == 0 && quota == 0) + return 0; + + if (VIR_ALLOC(vcpubw) < 0) + goto no_memory; + + if (VIR_ALLOC(vcpubw_list) < 0) + goto no_memory; + + vcpubw->vcpuid = vcpu; + vcpubw->period = period; + vcpubw->quota = quota; + vcpubw_list[def->cputune.nvcpubw++] = vcpubw; + + def->cputune.vcpubw = vcpubw_list; + } else { + int nvcpubw = def->cputune.nvcpubw; + vcpubw_list = def->cputune.vcpubw; + if (virDomainVcpuBWIsDuplicate(vcpubw_list, nvcpubw, vcpu)) { + vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, vcpu); + if (period == 0 && quota == 0) { + return virDomainVcpuBWDel(def, vcpu); + } else { + vcpubw->vcpuid = vcpu; + vcpubw->period = period; + vcpubw->quota = quota; + } + } else { + if (period == 0 && quota == 0) + return 0; + + if (VIR_ALLOC(vcpubw) < 0) + goto no_memory; + + if (VIR_REALLOC_N(vcpubw_list, nvcpubw + 1) < 0) + goto no_memory; + + vcpubw->vcpuid = vcpu; + vcpubw->period = period; + vcpubw->quota = quota; + vcpubw_list[def->cputune.nvcpubw++] = vcpubw; + } + } + + return 0; + +no_memory: + virReportOOMError(); + VIR_FREE(vcpubw); + return -1; +} + +int +virDomainVcpuBWDel(virDomainDefPtr def, int vcpu) +{ + int n; + bool deleted = false; + virDomainVcpuBWDefPtr *vcpubw_list = def->cputune.vcpubw; + + /* No vcpubw exists yet */ + if (!def->cputune.nvcpubw) + return 0; + + for (n = 0; n < def->cputune.nvcpubw; n++) { + if (vcpubw_list[n]->vcpuid == vcpu) { + VIR_FREE(vcpubw_list[n]); + memmove(&vcpubw_list[n], &vcpubw_list[n+1], + (def->cputune.nvcpubw - n - 1) * + sizeof(virDomainVcpuBWDefPtr)); + deleted = true; + break; + } + } + + if (!deleted) + return 0; + + if (--def->cputune.nvcpubw == 0) { + VIR_FREE(def->cputune.vcpubw); + } else { + if (VIR_REALLOC_N(def->cputune.vcpubw, + def->cputune.nvcpubw) < 0) { + /* ignore, harmless */ + } + } + + return 0; +} + static int virDomainLifecycleDefFormat(virBufferPtr buf, int type, @@ -9553,12 +9808,24 @@ char *virDomainDefFormat(virDomainDefPtr def, virBufferAsprintf(&buf, " current='%u'", def->vcpus); virBufferAsprintf(&buf, ">%u</vcpu>\n", def->maxvcpus); - if (def->cputune.shares || def->cputune.vcpupin) + if (def->cputune.shares || def->cputune.vcpupin || + def->cputune.vcpubw) virBufferAddLit(&buf, " <cputune>\n"); if (def->cputune.shares) virBufferAsprintf(&buf, " <shares>%lu</shares>\n", def->cputune.shares); + if (def->cputune.vcpubw) { + int i; + for (i = 0; i < def->cputune.nvcpubw; i++) { + virBufferAsprintf(&buf, " <bandwidth vcpu='%u' ", + def->cputune.vcpubw[i]->vcpuid); + virBufferAsprintf(&buf, "period='%llu' ", + def->cputune.vcpubw[i]->period); + virBufferAsprintf(&buf, "quota='%lld'/>\n", + def->cputune.vcpubw[i]->quota); + } + } if (def->cputune.vcpupin) { int i; for (i = 0; i < def->cputune.nvcpupin; i++) { @@ -9580,7 +9847,8 @@ char *virDomainDefFormat(virDomainDefPtr def, } } - if (def->cputune.shares || def->cputune.vcpupin) + if (def->cputune.shares || def->cputune.vcpupin || + def->cputune.vcpubw) virBufferAddLit(&buf, " </cputune>\n"); if (def->numatune.memory.nodemask) diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index e81977c..a2929b5 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1108,6 +1108,14 @@ struct _virDomainVcpuPinDef { char *cpumask; }; +typedef struct _virDomainVcpuBWDef virDomainVcpuBWDef; +typedef virDomainVcpuBWDef *virDomainVcpuBWDefPtr; +struct _virDomainVcpuBWDef { + int vcpuid; + unsigned long long period; + long long quota; +}; + int virDomainVcpuPinIsDuplicate(virDomainVcpuPinDefPtr *def, int nvcpupin, int vcpu); @@ -1116,6 +1124,14 @@ virDomainVcpuPinDefPtr virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def, int nvcpupin, int vcpu); +int virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def, + int nvcpubw, + int vcpu); + +virDomainVcpuBWDefPtr virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def, + int nvcpubw, + int vcpu); + enum virDomainNumatuneMemMode { VIR_DOMAIN_NUMATUNE_MEM_STRICT, VIR_DOMAIN_NUMATUNE_MEM_PREFERRED, @@ -1170,6 +1186,8 @@ struct _virDomainDef { struct { unsigned long shares; + int nvcpubw; + virDomainVcpuBWDefPtr *vcpubw; int nvcpupin; virDomainVcpuPinDefPtr *vcpupin; } cputune; @@ -1413,6 +1431,13 @@ int virDomainVcpuPinAdd(virDomainDefPtr def, int virDomainVcpuPinDel(virDomainDefPtr def, int vcpu); +int virDomainVcpuBWAdd(virDomainDefPtr def, + unsigned long long period, + long long quota, + int vcpu); + +int virDomainVcpuBWDel(virDomainDefPtr def, int vcpu); + int virDomainDiskIndexByName(virDomainDefPtr def, const char *name); int virDomainDiskInsert(virDomainDefPtr def, virDomainDiskDefPtr disk); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 9b9b6ce..aad0c3a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -371,6 +371,10 @@ virDomainTimerTickpolicyTypeFromString; virDomainTimerTickpolicyTypeToString; virDomainTimerTrackTypeFromString; virDomainTimerTrackTypeToString; +virDomainVcpuBWAdd; +virDomainVcpuBWDel; +virDomainVcpuBWFindByVcpu; +virDomainVcpuBWIsDuplicate; virDomainVcpuPinAdd; virDomainVcpuPinDel; virDomainVcpuPinFindByVcpu; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 1298924..201c0b8 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -24,6 +24,7 @@ #include <config.h> #include "qemu_cgroup.h" +#include "qemu_domain.h" #include "cgroup.h" #include "logging.h" #include "memory.h" @@ -376,6 +377,136 @@ cleanup: return -1; } +int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw) +{ + int rc; + unsigned long long old_period; + + if (!vcpubw) + return 0; + + if (vcpubw->period == 0 && vcpubw->quota == 0) + return 0; + + if (vcpubw->period) { + /* get old period, and we can rollback if set quota failed */ + rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period); + if (rc < 0) { + virReportSystemError(-rc, + _("%s"), "Unable to get cpu bandwidth period"); + return -1; + } + + rc = virCgroupSetCpuCfsPeriod(cgroup, vcpubw->period); + if (rc < 0) { + virReportSystemError(-rc, + _("%s"), "Unable to set cpu bandwidth period"); + return -1; + } + } + + if (vcpubw->quota) { + rc = virCgroupSetCpuCfsQuota(cgroup, vcpubw->quota); + if (rc < 0) { + virReportSystemError(-rc, + _("%s"), "Unable to set cpu bandwidth quota"); + goto cleanup; + } + } + + return 0; + +cleanup: + if (vcpubw->period) { + rc = virCgroupSetCpuCfsPeriod(cgroup, old_period); + if (rc < 0) + virReportSystemError(-rc, + _("%s"), + "Unable to rollback cpu bandwidth period"); + } + + return -1; +} + +int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) +{ + virCgroupPtr cgroup = NULL; + virCgroupPtr cgroup_vcpu = NULL; + qemuDomainObjPrivatePtr priv = vm->privateData; + int rc; + unsigned int i; + virDomainVcpuBWDefPtr *vcpubw_list = vm->def->cputune.vcpubw; + virDomainVcpuBWDefPtr vcpubw = NULL; + int nvcpubw = vm->def->cputune.nvcpubw; + + if (driver->cgroup == NULL) + return 0; /* Not supported, so claim success */ + + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to find cgroup for %s"), + vm->def->name); + goto cleanup; + } + + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + * thread, we can not control each vcpu. So just use the last config. + */ + if (vcpubw_list) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + if (qemuSetupCgroupVcpuBW(cgroup, vcpubw_list[nvcpubw - 1]) < 0) + goto cleanup; + } + } + return 0; + } + + for (i = 0; i < priv->nvcpupids; i++) { + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 1); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to create vcpu cgroup for %s(vcpu:" + " %d)"), + vm->def->name, i); + goto cleanup; + } + + /* move the thread for vcpu to sub dir */ + rc = virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]); + if (rc < 0) { + virReportSystemError(-rc, + _("unable to add vcpu %d task %d to cgroup"), + i, priv->vcpupids[i]); + goto cleanup; + } + + if (vcpubw_list) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, i); + if (qemuSetupCgroupVcpuBW(cgroup, vcpubw) < 0) + goto cleanup; + } + } + + virCgroupFree(&cgroup_vcpu); + } + + virCgroupFree(&cgroup_vcpu); + virCgroupFree(&cgroup); + return 0; + +cleanup: + virCgroupFree(&cgroup_vcpu); + if (cgroup) { + virCgroupRemove(cgroup); + virCgroupFree(&cgroup); + } + + return -1; +} + int qemuRemoveCgroup(struct qemud_driver *driver, virDomainObjPtr vm, diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index e8abfb4..f0a5cee 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -49,6 +49,8 @@ int qemuSetupHostUsbDeviceCgroup(usbDevice *dev, void *opaque); int qemuSetupCgroup(struct qemud_driver *driver, virDomainObjPtr vm); +int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw); +int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm); int qemuRemoveCgroup(struct qemud_driver *driver, virDomainObjPtr vm, int quiet); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 88a31a3..ce3a4bb 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2677,6 +2677,10 @@ int qemuProcessStart(virConnectPtr conn, if (qemuProcessDetectVcpuPIDs(driver, vm) < 0) goto cleanup; + VIR_DEBUG("Setting cgroup for each VCPU(if required)"); + if (qemuSetupCgroupForVcpu(driver, vm) < 0) + goto cleanup; + VIR_DEBUG("Setting VCPU affinities"); if (qemuProcessSetVcpuAffinites(conn, vm) < 0) goto cleanup; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml index 0afbadb..0a67e40 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml @@ -6,6 +6,8 @@ <vcpu>2</vcpu> <cputune> <shares>2048</shares> + <bandwidth vcpu='0' period='1000000' quota='-1'/> + <bandwidth vcpu='1' period='1000' quota='1000'/> <vcpupin vcpu='0' cpuset='0'/> <vcpupin vcpu='1' cpuset='1'/> </cputune> -- 1.7.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list