vcpu threads pin are implemented using sched_setaffinity(), but not controlled by cgroup. This patch does the following things: 1) enable cpuset cgroup 2) reflect all the vcpu threads pin info to cgroup Signed-off-by: Tang Chen <tangchen@xxxxxxxxxxxxxx> Signed-off-by: Hu Tao <hutao@xxxxxxxxxxxxxx> --- src/libvirt_private.syms | 2 ++ src/qemu/qemu_cgroup.c | 43 ++++++++++++++++++++++++ src/qemu/qemu_cgroup.h | 4 +++ src/qemu/qemu_driver.c | 82 +++++++++++++++++++++++++++++++++------------- src/util/cgroup.c | 35 +++++++++++++++++++- src/util/cgroup.h | 3 ++ 6 files changed, 146 insertions(+), 23 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index b513faf..80ea39a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -82,6 +82,7 @@ virCgroupGetCpuShares; virCgroupGetCpuacctPercpuUsage; virCgroupGetCpuacctStat; virCgroupGetCpuacctUsage; +virCgroupGetCpusetCpus; virCgroupGetCpusetMems; virCgroupGetFreezerState; virCgroupGetMemSwapHardLimit; @@ -100,6 +101,7 @@ virCgroupSetBlkioWeight; virCgroupSetCpuCfsPeriod; virCgroupSetCpuCfsQuota; virCgroupSetCpuShares; +virCgroupSetCpusetCpus; virCgroupSetCpusetMems; virCgroupSetFreezerState; virCgroupSetMemSwapHardLimit; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 8a5a536..37874d3 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -491,11 +491,45 @@ cleanup: return -1; } +int qemuSetupCgroupVcpuPin(virCgroupPtr cgroup, + virDomainVcpuPinDefPtr *vcpupin, + int nvcpupin, + int vcpuid) +{ + int i, rc = 0; + char *new_cpus = NULL; + + for (i = 0; i < nvcpupin; i++) { + if (vcpuid == vcpupin[i]->vcpuid) { + new_cpus = virDomainCpuSetFormat(vcpupin[i]->cpumask, + VIR_DOMAIN_CPUMASK_LEN); + if (!new_cpus) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to convert cpu mask")); + rc = -1; + goto cleanup; + } + rc = virCgroupSetCpusetCpus(cgroup, new_cpus); + if (rc != 0) { + virReportSystemError(-rc, + "%s", + _("Unable to set cpuset.cpus")); + goto cleanup; + } + } + } + +cleanup: + VIR_FREE(new_cpus); + return rc; +} + int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) { virCgroupPtr cgroup = NULL; virCgroupPtr cgroup_vcpu = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + virDomainDefPtr def = vm->def; int rc; unsigned int i; unsigned long long period = vm->def->cputune.period; @@ -567,6 +601,15 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) } } + /* Set vcpupin in cgroup if vcpupin xml is provided */ + if (def->cputune.nvcpupin && + qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPUSET) && + qemuSetupCgroupVcpuPin(cgroup_vcpu, + def->cputune.vcpupin, + def->cputune.nvcpupin, + i) < 0) + goto cleanup; + virCgroupFree(&cgroup_vcpu); } diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 34a9312..fa93cdb 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -53,6 +53,10 @@ int qemuSetupCgroup(struct qemud_driver *driver, int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, unsigned long long period, long long quota); +int qemuSetupCgroupVcpuPin(virCgroupPtr cgroup, + virDomainVcpuPinDefPtr *vcpupin, + int nvcpupin, + int vcpuid); int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm); int qemuSetupCgroupForEmulator(struct qemud_driver *driver, virDomainObjPtr vm); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 17c66d7..4552172 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3714,11 +3714,15 @@ qemudDomainPinVcpuFlags(virDomainPtr dom, struct qemud_driver *driver = dom->conn->privateData; virDomainObjPtr vm; virDomainDefPtr persistentDef = NULL; + virCgroupPtr cgroup_dom = NULL; + virCgroupPtr cgroup_vcpu = NULL; int maxcpu, hostcpus; virNodeInfo nodeinfo; int ret = -1; qemuDomainObjPrivatePtr priv; bool canResetting = true; + int newVcpuPinNum = 0; + virDomainVcpuPinDefPtr *newVcpuPin = NULL; int pcpu; virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | @@ -3767,43 +3771,73 @@ qemudDomainPinVcpuFlags(virDomainPtr dom, if (flags & VIR_DOMAIN_AFFECT_LIVE) { - if (priv->vcpupids != NULL) { - if (virProcessInfoSetAffinity(priv->vcpupids[vcpu], - cpumap, maplen, maxcpu) < 0) - goto cleanup; - } else { + if (priv->vcpupids == NULL) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("cpu affinity is not supported")); goto cleanup; } - if (canResetting) { - if (virDomainVcpuPinDel(vm->def, vcpu) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("failed to delete vcpupin xml of " - "a running domain")); + if (vm->def->cputune.vcpupin) { + newVcpuPin = virDomainVcpuPinDefCopy(vm->def->cputune.vcpupin, + vm->def->cputune.nvcpupin); + if (!newVcpuPin) + goto cleanup; + + newVcpuPinNum = vm->def->cputune.nvcpupin; + } else { + if (VIR_ALLOC(newVcpuPin) < 0) { + virReportOOMError(); + goto cleanup; + } + newVcpuPinNum = 0; + } + + if (virDomainVcpuPinAdd(newVcpuPin, &newVcpuPinNum, cpumap, maplen, vcpu) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to update vcpupin")); + virDomainVcpuPinDefFree(newVcpuPin, newVcpuPinNum); + goto cleanup; + } + + /* Configure the corresponding cpuset cgroup before set affinity. */ + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPUSET)) { + if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup_dom, 0) == 0 && + virCgroupForVcpu(cgroup_dom, vcpu, &cgroup_vcpu, 0) == 0 && + qemuSetupCgroupVcpuPin(cgroup_vcpu, newVcpuPin, newVcpuPinNum, vcpu) < 0) { + virReportError(VIR_ERR_OPERATION_INVALID, + _("failed to set cpuset.cpus in cgroup" + " for vcpu %d"), vcpu); goto cleanup; } } else { - if (!vm->def->cputune.vcpupin) { - if (VIR_ALLOC(vm->def->cputune.vcpupin) < 0) { - virReportOOMError(); - goto cleanup; - } - vm->def->cputune.nvcpupin = 0; + if (virProcessInfoSetAffinity(priv->vcpupids[vcpu], + cpumap, maplen, maxcpu) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, + _("failed to set cpu affinity for vcpu %d"), + vcpu); + goto cleanup; } - if (virDomainVcpuPinAdd(vm->def->cputune.vcpupin, - &vm->def->cputune.nvcpupin, - cpumap, - maplen, - vcpu) < 0) { + } + + if (canResetting) { + if (virDomainVcpuPinDel(vm->def, vcpu) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("failed to update or add vcpupin xml of " + _("failed to delete vcpupin xml of " "a running domain")); goto cleanup; } + } else { + if (vm->def->cputune.vcpupin) + virDomainVcpuPinDefFree(vm->def->cputune.vcpupin, vm->def->cputune.nvcpupin); + + vm->def->cputune.vcpupin = newVcpuPin; + vm->def->cputune.nvcpupin = newVcpuPinNum; + newVcpuPin = NULL; } + if (newVcpuPin) + virDomainVcpuPinDefFree(newVcpuPin, newVcpuPinNum); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) goto cleanup; } @@ -3844,6 +3878,10 @@ qemudDomainPinVcpuFlags(virDomainPtr dom, ret = 0; cleanup: + if (cgroup_vcpu) + virCgroupFree(&cgroup_vcpu); + if (cgroup_dom) + virCgroupFree(&cgroup_dom); if (vm) virDomainObjUnlock(vm); return ret; diff --git a/src/util/cgroup.c b/src/util/cgroup.c index 24e08b4..1808911 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -543,7 +543,8 @@ static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group, /* We need to control cpu bandwidth for each vcpu now */ if ((flags & VIR_CGROUP_VCPU) && (i != VIR_CGROUP_CONTROLLER_CPU && - i != VIR_CGROUP_CONTROLLER_CPUACCT)) { + i != VIR_CGROUP_CONTROLLER_CPUACCT && + i != VIR_CGROUP_CONTROLLER_CPUSET)) { /* treat it as unmounted and we can use virCgroupAddTask */ VIR_FREE(group->controllers[i].mountPoint); continue; @@ -1403,6 +1404,38 @@ int virCgroupGetCpusetMems(virCgroupPtr group, char **mems) } /** + * virCgroupSetCpusetCpus: + * + * @group: The cgroup to set cpuset.cpus for + * @cpus: the cpus to set + * + * Retuens: 0 on success + */ +int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus) +{ + return virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.cpus", + cpus); +} + +/** + * virCgroupGetCpusetCpus: + * + * @group: The cgroup to get cpuset.cpus for + * @cpus: the cpus to get + * + * Retuens: 0 on success + */ +int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus) +{ + return virCgroupGetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.cpus", + cpus); +} + +/** * virCgroupDenyAllDevices: * * @group: The cgroup to deny all permissions, for all devices diff --git a/src/util/cgroup.h b/src/util/cgroup.h index 727e536..68ac232 100644 --- a/src/util/cgroup.h +++ b/src/util/cgroup.h @@ -151,6 +151,9 @@ int virCgroupGetFreezerState(virCgroupPtr group, char **state); int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems); int virCgroupGetCpusetMems(virCgroupPtr group, char **mems); +int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus); +int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus); + int virCgroupRemove(virCgroupPtr group); void virCgroupFree(virCgroupPtr *group); -- 1.7.10.2 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list