At 06/30/2011 11:09 AM, Wen Congyang Write: > --- > src/conf/domain_conf.c | 272 ++++++++++++++++++++++- > src/conf/domain_conf.h | 25 ++ > src/libvirt_private.syms | 4 + > src/qemu/qemu_cgroup.c | 131 +++++++++++ > src/qemu/qemu_cgroup.h | 2 + > src/qemu/qemu_process.c | 4 + > tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 2 + > 7 files changed, 438 insertions(+), 2 deletions(-) > > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c > index 60e0318..0a1f973 100644 > --- a/src/conf/domain_conf.c > +++ b/src/conf/domain_conf.c > @@ -997,6 +997,21 @@ virDomainVcpuPinDefFree(virDomainVcpuPinDefPtr *def, > VIR_FREE(def); > } > > +static void > +virDomainVcpuBWDefFree(virDomainVcpuBWDefPtr *def, > + int nvcpubw) > +{ > + int i; > + > + if (!def || !nvcpubw) > + return; > + > + for(i = 0; i < nvcpubw; i++) > + VIR_FREE(def[i]); > + > + VIR_FREE(def); > +} > + > void virDomainDefFree(virDomainDefPtr def) > { > unsigned int i; > @@ -1089,6 +1104,9 @@ void virDomainDefFree(virDomainDefPtr def) > > virCPUDefFree(def->cpu); > > + virDomainVcpuBWDefFree(def->cputune.vcpubw, > + def->cputune.nvcpubw); > + > virDomainVcpuPinDefFree(def->cputune.vcpupin, def->cputune.nvcpupin); > > VIR_FREE(def->numatune.memory.nodemask); > @@ -5715,6 +5733,62 @@ error: > goto cleanup; > } > > +/* Parse the XML definition for a vcpubandwidth */ > +static virDomainVcpuBWDefPtr > +virDomainVcpuBWDefParseXML(const xmlNodePtr node, > + xmlXPathContextPtr ctxt, > + int maxvcpus) > +{ > + virDomainVcpuBWDefPtr def; > + xmlNodePtr oldnode = ctxt->node; > + unsigned int vcpuid; > + unsigned long long period; > + long long quota; > + int ret; > + > + if (VIR_ALLOC(def) < 0) { > + virReportOOMError(); > + return NULL; > + } > + > + ctxt->node = node; > + > + ret = virXPathUInt("string(./@vcpu)", ctxt, &vcpuid); > + if (ret == -2) { > + virDomainReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("vcpu id must be an unsigned integer")); > + goto error; > + } else if (ret == -1) { > + virDomainReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("can't parse vcpupin node")); > + goto error; > + } > + > + if (vcpuid >= maxvcpus) { > + virDomainReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("vcpu id must be less than maxvcpus")); > + goto error; > + } > + > + if (virXPathULongLong("string(./@period)", ctxt, &period) < 0) > + period = 0; > + > + if (virXPathLongLong("string(./@quota)", ctxt, "a) < 0) > + quota = 0; > + > + def->vcpuid = vcpuid; > + def->period = period; > + def->quota = quota; > + > +cleanup: > + ctxt->node = oldnode; > + return def; > + > +error: > + VIR_FREE(def); > + goto cleanup; > +} > + > > static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, > xmlDocPtr xml, > @@ -5881,6 +5955,49 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, > &def->cputune.shares) < 0) > def->cputune.shares = 0; > > + if ((n = virXPathNodeSet("./cputune/bandwidth", ctxt, &nodes)) < 0) > + goto error; > + > + if (n > def->maxvcpus) { > + virDomainReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("bandwith nodes must be less than" > + " maxvcpus")); > + goto error; > + } > + > + if (n && VIR_ALLOC_N(def->cputune.vcpubw, n) < 0) > + goto no_memory; > + > + for (i = 0; i < n; i++) { > + virDomainVcpuBWDefPtr vcpubw = NULL; > + vcpubw = virDomainVcpuBWDefParseXML(nodes[i], ctxt, def->maxvcpus); > + > + if (!vcpubw) > + goto error; > + > + if (virDomainVcpuBWIsDuplicate(def->cputune.vcpubw, > + def->cputune.nvcpubw, > + vcpubw->vcpuid)) { > + virDomainReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("duplicate vcpubandwidth for same" > + " vcpu")); > + VIR_FREE(vcpubw); > + goto error; > + } > + > + if (vcpubw->period || vcpubw->quota) > + def->cputune.vcpubw[def->cputune.nvcpubw++] = vcpubw; > + else > + VIR_FREE(vcpubw); > + } > + if (def->cputune.nvcpubw) > + ignore_value(VIR_REALLOC_N(def->cputune.vcpubw, > + def->cputune.nvcpubw)); > + else > + VIR_FREE(def->cputune.vcpubw); > + > + VIR_FREE(nodes); > + > if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) { > goto error; > } > @@ -8274,6 +8391,144 @@ virDomainVcpuPinDel(virDomainDefPtr def, int vcpu) > return 0; > } > > +/* Check if vcpupin with same vcpuid already exists. > + * Return 1 if exists, 0 if not. */ > +int > +virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def, > + int nvcpubw, > + int vcpu) > +{ > + int i; > + > + if (!def || !nvcpubw) > + return 0; > + > + for (i = 0; i < nvcpubw; i++) { > + if (def[i]->vcpuid == vcpu) > + return 1; > + } > + > + return 0; > +} > + > +virDomainVcpuBWDefPtr > +virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def, > + int nvcpubw, > + int vcpu) > +{ > + int i; > + > + if (!def || !nvcpubw) > + return NULL; > + > + for (i = 0; i < nvcpubw; i++) { > + if (def[i]->vcpuid == vcpu) > + return def[i]; > + } > + > + return NULL; > +} > + > +int > +virDomainVcpuBWAdd(virDomainDefPtr def, > + unsigned long long period, > + long long quota, > + int vcpu) > +{ > + virDomainVcpuBWDefPtr *vcpubw_list = NULL; > + virDomainVcpuBWDefPtr vcpubw = NULL; > + > + /* No vcpubw exists yet. */ > + if (!def->cputune.nvcpubw) { > + if (period == 0 && quota == 0) > + return 0; > + > + if (VIR_ALLOC(vcpubw) < 0) > + goto no_memory; > + > + if (VIR_ALLOC(vcpubw_list) < 0) > + goto no_memory; > + > + vcpubw->vcpuid = vcpu; > + vcpubw->period = period; > + vcpubw->quota = quota; > + vcpubw_list[def->cputune.nvcpubw++] = vcpubw; > + > + def->cputune.vcpubw = vcpubw_list; > + } else { > + int nvcpubw = def->cputune.nvcpubw; > + vcpubw_list = def->cputune.vcpubw; > + if (virDomainVcpuBWIsDuplicate(vcpubw_list, nvcpubw, vcpu)) { > + vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, vcpu); > + if (period == 0 && quota == 0) { > + return virDomainVcpuBWDel(def, vcpu); > + } else { > + vcpubw->vcpuid = vcpu; > + vcpubw->period = period; > + vcpubw->quota = quota; > + } > + } else { > + if (period == 0 && quota == 0) > + return 0; > + > + if (VIR_ALLOC(vcpubw) < 0) > + goto no_memory; > + > + if (VIR_REALLOC_N(vcpubw_list, nvcpubw + 1) < 0) > + goto no_memory; > + > + vcpubw->vcpuid = vcpu; > + vcpubw->period = period; > + vcpubw->quota = quota; > + vcpubw_list[def->cputune.nvcpubw++] = vcpubw; > + } > + } > + > + return 0; > + > +no_memory: > + virReportOOMError(); > + VIR_FREE(vcpubw); > + return -1; > +} > + > +int > +virDomainVcpuBWDel(virDomainDefPtr def, int vcpu) > +{ > + int n; > + bool deleted = false; > + virDomainVcpuBWDefPtr *vcpubw_list = def->cputune.vcpubw; > + > + /* No vcpubw exists yet */ > + if (!def->cputune.nvcpubw) > + return 0; > + > + for (n = 0; n < def->cputune.nvcpubw; n++) { > + if (vcpubw_list[n]->vcpuid == vcpu) { > + VIR_FREE(vcpubw_list[n]); > + memmove(&vcpubw_list[n], &vcpubw_list[n+1], > + (def->cputune.nvcpubw - n - 1) * > + sizeof(virDomainVcpuBWDefPtr)); > + deleted = true; > + break; > + } > + } > + > + if (!deleted) > + return 0; > + > + if (--def->cputune.nvcpubw == 0) { > + VIR_FREE(def->cputune.vcpubw); > + } else { > + if (VIR_REALLOC_N(def->cputune.vcpubw, > + def->cputune.nvcpubw) < 0) { > + /* ignore, harmless */ > + } > + } > + > + return 0; > +} > + > static int > virDomainLifecycleDefFormat(virBufferPtr buf, > int type, > @@ -9553,12 +9808,24 @@ char *virDomainDefFormat(virDomainDefPtr def, > virBufferAsprintf(&buf, " current='%u'", def->vcpus); > virBufferAsprintf(&buf, ">%u</vcpu>\n", def->maxvcpus); > > - if (def->cputune.shares || def->cputune.vcpupin) > + if (def->cputune.shares || def->cputune.vcpupin || > + def->cputune.vcpubw) > virBufferAddLit(&buf, " <cputune>\n"); > > if (def->cputune.shares) > virBufferAsprintf(&buf, " <shares>%lu</shares>\n", > def->cputune.shares); > + if (def->cputune.vcpubw) { > + int i; > + for (i = 0; i < def->cputune.nvcpubw; i++) { > + virBufferAsprintf(&buf, " <bandwidth vcpu='%u' ", > + def->cputune.vcpubw[i]->vcpuid); > + virBufferAsprintf(&buf, "period='%llu' ", > + def->cputune.vcpubw[i]->period); > + virBufferAsprintf(&buf, "quota='%lld'/>\n", > + def->cputune.vcpubw[i]->quota); > + } > + } > if (def->cputune.vcpupin) { > int i; > for (i = 0; i < def->cputune.nvcpupin; i++) { > @@ -9580,7 +9847,8 @@ char *virDomainDefFormat(virDomainDefPtr def, > } > } > > - if (def->cputune.shares || def->cputune.vcpupin) > + if (def->cputune.shares || def->cputune.vcpupin || > + def->cputune.vcpubw) > virBufferAddLit(&buf, " </cputune>\n"); > > if (def->numatune.memory.nodemask) > diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h > index e81977c..a2929b5 100644 > --- a/src/conf/domain_conf.h > +++ b/src/conf/domain_conf.h > @@ -1108,6 +1108,14 @@ struct _virDomainVcpuPinDef { > char *cpumask; > }; > > +typedef struct _virDomainVcpuBWDef virDomainVcpuBWDef; > +typedef virDomainVcpuBWDef *virDomainVcpuBWDefPtr; > +struct _virDomainVcpuBWDef { > + int vcpuid; > + unsigned long long period; > + long long quota; > +}; > + > int virDomainVcpuPinIsDuplicate(virDomainVcpuPinDefPtr *def, > int nvcpupin, > int vcpu); > @@ -1116,6 +1124,14 @@ virDomainVcpuPinDefPtr virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def, > int nvcpupin, > int vcpu); > > +int virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def, > + int nvcpubw, > + int vcpu); > + > +virDomainVcpuBWDefPtr virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def, > + int nvcpubw, > + int vcpu); > + > enum virDomainNumatuneMemMode { > VIR_DOMAIN_NUMATUNE_MEM_STRICT, > VIR_DOMAIN_NUMATUNE_MEM_PREFERRED, > @@ -1170,6 +1186,8 @@ struct _virDomainDef { > > struct { > unsigned long shares; > + int nvcpubw; > + virDomainVcpuBWDefPtr *vcpubw; > int nvcpupin; > virDomainVcpuPinDefPtr *vcpupin; > } cputune; > @@ -1413,6 +1431,13 @@ int virDomainVcpuPinAdd(virDomainDefPtr def, > > int virDomainVcpuPinDel(virDomainDefPtr def, int vcpu); > > +int virDomainVcpuBWAdd(virDomainDefPtr def, > + unsigned long long period, > + long long quota, > + int vcpu); > + > +int virDomainVcpuBWDel(virDomainDefPtr def, int vcpu); > + > int virDomainDiskIndexByName(virDomainDefPtr def, const char *name); > int virDomainDiskInsert(virDomainDefPtr def, > virDomainDiskDefPtr disk); > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > index 9b9b6ce..aad0c3a 100644 > --- a/src/libvirt_private.syms > +++ b/src/libvirt_private.syms > @@ -371,6 +371,10 @@ virDomainTimerTickpolicyTypeFromString; > virDomainTimerTickpolicyTypeToString; > virDomainTimerTrackTypeFromString; > virDomainTimerTrackTypeToString; > +virDomainVcpuBWAdd; > +virDomainVcpuBWDel; > +virDomainVcpuBWFindByVcpu; > +virDomainVcpuBWIsDuplicate; > virDomainVcpuPinAdd; > virDomainVcpuPinDel; > virDomainVcpuPinFindByVcpu; > diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c > index 1298924..201c0b8 100644 > --- a/src/qemu/qemu_cgroup.c > +++ b/src/qemu/qemu_cgroup.c > @@ -24,6 +24,7 @@ > #include <config.h> > > #include "qemu_cgroup.h" > +#include "qemu_domain.h" > #include "cgroup.h" > #include "logging.h" > #include "memory.h" > @@ -376,6 +377,136 @@ cleanup: > return -1; > } > > +int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw) > +{ > + int rc; > + unsigned long long old_period; > + > + if (!vcpubw) > + return 0; > + > + if (vcpubw->period == 0 && vcpubw->quota == 0) > + return 0; > + > + if (vcpubw->period) { > + /* get old period, and we can rollback if set quota failed */ > + rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period); > + if (rc < 0) { > + virReportSystemError(-rc, > + _("%s"), "Unable to get cpu bandwidth period"); > + return -1; > + } > + > + rc = virCgroupSetCpuCfsPeriod(cgroup, vcpubw->period); > + if (rc < 0) { > + virReportSystemError(-rc, > + _("%s"), "Unable to set cpu bandwidth period"); > + return -1; > + } > + } > + > + if (vcpubw->quota) { > + rc = virCgroupSetCpuCfsQuota(cgroup, vcpubw->quota); > + if (rc < 0) { > + virReportSystemError(-rc, > + _("%s"), "Unable to set cpu bandwidth quota"); > + goto cleanup; > + } > + } > + > + return 0; > + > +cleanup: > + if (vcpubw->period) { > + rc = virCgroupSetCpuCfsPeriod(cgroup, old_period); > + if (rc < 0) > + virReportSystemError(-rc, > + _("%s"), > + "Unable to rollback cpu bandwidth period"); > + } > + > + return -1; > +} > + > +int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) > +{ > + virCgroupPtr cgroup = NULL; > + virCgroupPtr cgroup_vcpu = NULL; > + qemuDomainObjPrivatePtr priv = vm->privateData; > + int rc; > + unsigned int i; > + virDomainVcpuBWDefPtr *vcpubw_list = vm->def->cputune.vcpubw; > + virDomainVcpuBWDefPtr vcpubw = NULL; > + int nvcpubw = vm->def->cputune.nvcpubw; > + > + if (driver->cgroup == NULL) > + return 0; /* Not supported, so claim success */ > + > + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0); > + if (rc != 0) { > + virReportSystemError(-rc, > + _("Unable to find cgroup for %s"), > + vm->def->name); > + goto cleanup; > + } > + > + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { > + /* If we does not know VCPU<->PID mapping or all vcpu runs in the same > + * thread, we can not control each vcpu. So just use the last config. > + */ > + if (vcpubw_list) { > + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { > + if (qemuSetupCgroupVcpuBW(cgroup, vcpubw_list[nvcpubw - 1]) < 0) > + goto cleanup; > + } > + } > + return 0; > + } > + > + for (i = 0; i < priv->nvcpupids; i++) { > + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 1); > + if (rc < 0) { > + virReportSystemError(-rc, > + _("Unable to create vcpu cgroup for %s(vcpu:" > + " %d)"), > + vm->def->name, i); > + goto cleanup; > + } > + > + /* move the thread for vcpu to sub dir */ > + rc = virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]); > + if (rc < 0) { > + virReportSystemError(-rc, > + _("unable to add vcpu %d task %d to cgroup"), > + i, priv->vcpupids[i]); > + goto cleanup; > + } > + > + if (vcpubw_list) { > + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { > + vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, i); > + if (qemuSetupCgroupVcpuBW(cgroup, vcpubw) < 0) s/cgroup/cgroup_vcpu/ > + goto cleanup; > + } > + } > + > + virCgroupFree(&cgroup_vcpu); > + } > + > + virCgroupFree(&cgroup_vcpu); > + virCgroupFree(&cgroup); > + return 0; > + > +cleanup: > + virCgroupFree(&cgroup_vcpu); > + if (cgroup) { > + virCgroupRemove(cgroup); > + virCgroupFree(&cgroup); > + } > + > + return -1; > +} > + > > int qemuRemoveCgroup(struct qemud_driver *driver, > virDomainObjPtr vm, > diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h > index e8abfb4..f0a5cee 100644 > --- a/src/qemu/qemu_cgroup.h > +++ b/src/qemu/qemu_cgroup.h > @@ -49,6 +49,8 @@ int qemuSetupHostUsbDeviceCgroup(usbDevice *dev, > void *opaque); > int qemuSetupCgroup(struct qemud_driver *driver, > virDomainObjPtr vm); > +int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw); > +int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm); > int qemuRemoveCgroup(struct qemud_driver *driver, > virDomainObjPtr vm, > int quiet); > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > index 88a31a3..ce3a4bb 100644 > --- a/src/qemu/qemu_process.c > +++ b/src/qemu/qemu_process.c > @@ -2677,6 +2677,10 @@ int qemuProcessStart(virConnectPtr conn, > if (qemuProcessDetectVcpuPIDs(driver, vm) < 0) > goto cleanup; > > + VIR_DEBUG("Setting cgroup for each VCPU(if required)"); > + if (qemuSetupCgroupForVcpu(driver, vm) < 0) > + goto cleanup; > + > VIR_DEBUG("Setting VCPU affinities"); > if (qemuProcessSetVcpuAffinites(conn, vm) < 0) > goto cleanup; > diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml > index 0afbadb..0a67e40 100644 > --- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml > +++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml > @@ -6,6 +6,8 @@ > <vcpu>2</vcpu> > <cputune> > <shares>2048</shares> > + <bandwidth vcpu='0' period='1000000' quota='-1'/> > + <bandwidth vcpu='1' period='1000' quota='1000'/> > <vcpupin vcpu='0' cpuset='0'/> > <vcpupin vcpu='1' cpuset='1'/> > </cputune> -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list