Refactor some cgroup management methods from qemu into hypervisor. These methods will be shared with ch driver for cgroup management. Signed-off-by: Praveen K Paladugu <prapal@xxxxxxxxxxxxxxxxxxx> --- src/hypervisor/domain_cgroup.c | 426 ++++++++++++++++++++++++++++++++- src/hypervisor/domain_cgroup.h | 52 ++++ src/libvirt_private.syms | 13 + src/qemu/qemu_cgroup.c | 410 +------------------------------ src/qemu/qemu_cgroup.h | 11 - src/qemu/qemu_driver.c | 14 +- src/qemu/qemu_hotplug.c | 7 +- src/qemu/qemu_process.c | 20 +- 8 files changed, 522 insertions(+), 431 deletions(-) diff --git a/src/hypervisor/domain_cgroup.c b/src/hypervisor/domain_cgroup.c index 61b54f071c..0a1b3867b1 100644 --- a/src/hypervisor/domain_cgroup.c +++ b/src/hypervisor/domain_cgroup.c @@ -22,11 +22,12 @@ #include "domain_cgroup.h" #include "domain_driver.h" - +#include "util/virnuma.h" +#include "virlog.h" #include "virutil.h" #define VIR_FROM_THIS VIR_FROM_DOMAIN - +VIR_LOG_INIT("domain.cgroup"); int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio) @@ -269,3 +270,424 @@ virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup, return 0; } + + +int +virSetupBlkioCgroup(virDomainObj *vm, virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) { + if (vm->def->blkio.weight || vm->def->blkio.ndevices) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Block I/O tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupBlkio(cgroup, vm->def->blkio); +} + + +int +virSetupMemoryCgroup(virDomainObj *vm, virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) { + if (virMemoryLimitIsSet(vm->def->mem.hard_limit) || + virMemoryLimitIsSet(vm->def->mem.soft_limit) || + virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Memory cgroup is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupMemtune(cgroup, vm->def->mem); +} + + +int +virSetupCpusetCgroup(virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (virCgroupSetCpusetMemoryMigrate(cgroup, true) < 0) + return -1; + + return 0; +} + + +int +virSetupCpuCgroup(virDomainObj *vm, virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + if (vm->def->cputune.sharesSpecified) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("CPU tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + if (vm->def->cputune.sharesSpecified) { + + if (virCgroupSetCpuShares(cgroup, vm->def->cputune.shares) < 0) + return -1; + + } + + return 0; +} + + +int +virInitCgroup(const char *prefix, + virDomainObj * vm, + size_t nnicindexes, int *nicindexes, + virCgroup * cgroup, int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, char *machineName) +{ + if (!privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(cgroup); + cgroup = NULL; + + if (!vm->def->resource) + vm->def->resource = g_new0(virDomainResourceDef, 1); + + if (!vm->def->resource->partition) + vm->def->resource->partition = g_strdup("/machine"); + + if (!g_path_is_absolute(vm->def->resource->partition)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource partition '%s' must start with '/'"), + vm->def->resource->partition); + return -1; + } + + if (virCgroupNewMachine(machineName, + prefix, + vm->def->uuid, + NULL, + vm->pid, + false, + nnicindexes, nicindexes, + vm->def->resource->partition, + cgroupControllers, + maxThreadsPerProc, &cgroup) < 0) { + if (virCgroupNewIgnoreError()) + return 0; + + return -1; + } + + return 0; +} + + +void +virRestoreCgroupState(virDomainObj *vm, virCgroup *cgroup) +{ + g_autofree char *mem_mask = NULL; + size_t i = 0; + + g_autoptr(virBitmap) all_nodes = NULL; + + if (!virNumaIsAvailable() || + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto error; + + if (!(mem_mask = virBitmapFormat(all_nodes))) + goto error; + + if (virCgroupHasEmptyTasks(cgroup, VIR_CGROUP_CONTROLLER_CPUSET) <= 0) + goto error; + + if (virCgroupSetCpusetMems(cgroup, mem_mask) < 0) + goto error; + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virRestoreCgroupThread(cgroup, VIR_CGROUP_THREAD_VCPU, i) < 0) + return; + } + + for (i = 0; i < vm->def->niothreadids; i++) { + if (virRestoreCgroupThread(cgroup, VIR_CGROUP_THREAD_IOTHREAD, + vm->def->iothreadids[i]->iothread_id) < 0) + return; + } + + if (virRestoreCgroupThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0) < 0) + return; + + return; + + error: + virResetLastError(); + VIR_DEBUG("Couldn't restore cgroups to meaningful state"); + return; +} + + +int +virRestoreCgroupThread(virCgroup *cgroup, virCgroupThreadName thread, int id) +{ + g_autoptr(virCgroup) cgroup_temp = NULL; + g_autofree char *nodeset = NULL; + + if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0) + return -1; + + if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0) + return -1; + + if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0) + return -1; + + if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + return -1; + + return 0; +} + + +int +virConnectCgroup(const char *prefix, virDomainObj *vm, virCgroup *cgroup, + int cgroupControllers, bool privileged, char *machineName) +{ + if (privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(cgroup); + + if (virCgroupNewDetectMachine(vm->def->name, + prefix, + vm->pid, + cgroupControllers, machineName, &cgroup) < 0) + return -1; + + virRestoreCgroupState(vm, cgroup); + return 0; +} + + +int +virSetupCgroup(const char *prefix, + virDomainObj * vm, + size_t nnicindexes, int *nicindexes, + virCgroup * cgroup, + int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, char *machineName) +{ + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup cgroups until process is started")); + return -1; + } + + if (virInitCgroup(prefix, vm, nnicindexes, nicindexes, + cgroup, cgroupControllers, + maxThreadsPerProc, privileged, machineName) < 0) + return -1; + + if (!cgroup) + return 0; + + if (virSetupBlkioCgroup(vm, cgroup) < 0) + return -1; + + if (virSetupMemoryCgroup(vm, cgroup) < 0) + return -1; + + if (virSetupCpuCgroup(vm, cgroup) < 0) + return -1; + + if (virSetupCpusetCgroup(cgroup) < 0) + return -1; + + return 0; +} + + +int +virSetupCgroupVcpuBW(virCgroup *cgroup, + unsigned long long period, long long quota) +{ + return virCgroupSetupCpuPeriodQuota(cgroup, period, quota); +} + + +int +virSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask) +{ + return virCgroupSetupCpusetCpus(cgroup, cpumask); +} + + +int +virSetupGlobalCpuCgroup(virDomainObj *vm, virCgroup *cgroup, + virBitmap *autoNodeset) +{ + unsigned long long period = vm->def->cputune.global_period; + long long quota = vm->def->cputune.global_quota; + g_autofree char *mem_mask = NULL; + virDomainNumatuneMemMode mem_mode; + + if ((period || quota) && + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + return -1; + } + + /* + * If CPU cgroup controller is not initialized here, then we need + * neither period nor quota settings. And if CPUSET controller is + * not initialized either, then there's nothing to do anyway. + */ + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU) && + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virDomainNumatuneMaybeFormatNodeset(vm->def->numa, + autoNodeset, &mem_mask, -1) < 0) + return -1; + + if (period || quota) { + if (virSetupCgroupVcpuBW(cgroup, period, quota) < 0) + return -1; + } + + return 0; +} + + +int +virRemoveCgroup(virDomainObj *vm, virCgroup *cgroup, char *machineName) +{ + if (cgroup == NULL) + return 0; /* Not supported, so claim success */ + + if (virCgroupTerminateMachine(machineName) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + + return virCgroupRemove(cgroup); +} + + +void +virCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data) +{ + if (!data) + return; + + virCgroupFree(data->emulatorCgroup); + g_free(data->emulatorMemMask); + g_free(data); +} + + +/** + * virCgroupEmulatorAllNodesAllow: + * @cgroup: domain cgroup pointer + * @retData: filled with structure used to roll back the operation + * + * Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is + * necessary when hotplugging cpus since it requires memory allocated in the + * DMA region. Afterwards the operation can be reverted by + * virCgroupEmulatorAllNodesRestore. + * + * Returns 0 on success -1 on error + */ +int +virCgroupEmulatorAllNodesAllow(virCgroup *cgroup, + virCgroupEmulatorAllNodesData **retData) +{ + virCgroupEmulatorAllNodesData *data = NULL; + g_autofree char *all_nodes_str = NULL; + + g_autoptr(virBitmap) all_nodes = NULL; + int ret = -1; + + if (!virNumaIsAvailable() || + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto cleanup; + + if (!(all_nodes_str = virBitmapFormat(all_nodes))) + goto cleanup; + + data = g_new0(virCgroupEmulatorAllNodesData, 1); + + if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, + false, &data->emulatorCgroup) < 0) + goto cleanup; + + if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 + || virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0) + goto cleanup; + + *retData = g_steal_pointer(&data); + ret = 0; + + cleanup: + virCgroupEmulatorAllNodesDataFree(data); + + return ret; +} + + +/** + * virCgroupEmulatorAllNodesRestore: + * @data: data structure created by virCgroupEmulatorAllNodesAllow + * + * Rolls back the setting done by virCgroupEmulatorAllNodesAllow and frees the + * associated data. + */ +void +virCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data) +{ + virError *err; + + if (!data) + return; + + virErrorPreserveLast(&err); + virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask); + virErrorRestore(&err); + + virCgroupEmulatorAllNodesDataFree(data); +} diff --git a/src/hypervisor/domain_cgroup.h b/src/hypervisor/domain_cgroup.h index f93e5f74fe..fd8b74a4ee 100644 --- a/src/hypervisor/domain_cgroup.h +++ b/src/hypervisor/domain_cgroup.h @@ -23,6 +23,11 @@ #include "vircgroup.h" #include "domain_conf.h" +typedef struct _virCgroupEmulatorAllNodesData virCgroupEmulatorAllNodesData; +struct _virCgroupEmulatorAllNodesData { + virCgroup *emulatorCgroup; + char *emulatorMemMask; +}; int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio); int virDomainCgroupSetupMemtune(virCgroup *cgroup, virDomainMemtune mem); @@ -36,3 +41,50 @@ int virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup, virDomainDef *persistentDef, virTypedParameterPtr params, int nparams); +int +virSetupBlkioCgroup(virDomainObj * vm, virCgroup *cgroup); +int +virSetupMemoryCgroup(virDomainObj * vm, virCgroup *cgroup); +int +virSetupCpusetCgroup(virCgroup *cgroup); +int +virSetupCpuCgroup(virDomainObj * vm, virCgroup *cgroup); +int +virInitCgroup(const char *prefix, virDomainObj * vm, + size_t nnicindexes, int *nicindexes, + virCgroup *cgroup, int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, + char *machineName); +void +virRestoreCgroupState(virDomainObj * vm, virCgroup *cgroup); +int +virConnectCgroup(const char *prefix, virDomainObj * vm, virCgroup *cgroup, + int cgroupControllers, bool privileged, char *machineName); +int +virSetupCgroup(const char *prefix, virDomainObj * vm, + size_t nnicindexes, int *nicindexes, + virCgroup *cgroup, int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, + char *machineName); +void +virCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData * data); +int +virCgroupEmulatorAllNodesAllow(virCgroup * cgroup, + virCgroupEmulatorAllNodesData ** retData); +void +virCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData * data); +int +virSetupCgroupVcpuBW(virCgroup * cgroup, + unsigned long long period, long long quota); +int +virSetupCgroupCpusetCpus(virCgroup * cgroup, virBitmap * cpumask); +int +virSetupGlobalCpuCgroup(virDomainObj * vm, virCgroup * cgroup, + virBitmap *autoNodeset); +int +virRemoveCgroup(virDomainObj * vm, virCgroup * cgroup, char *machineName); +int +virRestoreCgroupThread(virCgroup *cgroup, virCgroupThreadName thread, + int id); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 56adc192cd..09b1fbb8c4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1543,10 +1543,23 @@ virSetConnectStorage; # hypervisor/domain_cgroup.h +virCgroupEmulatorAllNodesAllow; +virCgroupEmulatorAllNodesRestore; +virConnectCgroup; virDomainCgroupSetMemoryLimitParameters; virDomainCgroupSetupBlkio; virDomainCgroupSetupDomainBlkioParameters; virDomainCgroupSetupMemtune; +virInitCgroup; +virRemoveCgroup; +virSetupBlkioCgroup; +virSetupCgroup; +virSetupCgroupCpusetCpus; +virSetupCgroupVcpuBW; +virSetupCpuCgroup; +virSetupCpusetCgroup; +virSetupGlobalCpuCgroup; +virSetupMemoryCgroup; # hypervisor/domain_driver.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 308be5b00f..b1d2875959 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -593,46 +593,6 @@ qemuSetupVideoCgroup(virDomainObj *vm, return ret; } - -static int -qemuSetupBlkioCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, - VIR_CGROUP_CONTROLLER_BLKIO)) { - if (vm->def->blkio.weight || vm->def->blkio.ndevices) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Block I/O tuning is not available on this host")); - return -1; - } - return 0; - } - - return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio); -} - - -static int -qemuSetupMemoryCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) { - if (virMemoryLimitIsSet(vm->def->mem.hard_limit) || - virMemoryLimitIsSet(vm->def->mem.soft_limit) || - virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Memory cgroup is not available on this host")); - return -1; - } - return 0; - } - - return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem); -} - - static int qemuSetupFirmwareCgroup(virDomainObj *vm) { @@ -861,44 +821,6 @@ qemuSetupDevicesCgroup(virDomainObj *vm) } -static int -qemuSetupCpusetCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0) - return -1; - - return 0; -} - - -static int -qemuSetupCpuCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { - if (vm->def->cputune.sharesSpecified) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("CPU tuning is not available on this host")); - return -1; - } - return 0; - } - - if (vm->def->cputune.sharesSpecified) { - if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0) - return -1; - } - - return 0; -} - - static int qemuSetupCgroupAppid(virDomainObj *vm) { @@ -927,166 +849,13 @@ qemuSetupCgroupAppid(virDomainObj *vm) } -static int -qemuInitCgroup(virDomainObj *vm, - size_t nnicindexes, - int *nicindexes) -{ - qemuDomainObjPrivate *priv = vm->privateData; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); - - if (!priv->driver->privileged) - return 0; - - if (!virCgroupAvailable()) - return 0; - - virCgroupFree(priv->cgroup); - priv->cgroup = NULL; - - if (!vm->def->resource) - vm->def->resource = g_new0(virDomainResourceDef, 1); - - if (!vm->def->resource->partition) - vm->def->resource->partition = g_strdup("/machine"); - - if (!g_path_is_absolute(vm->def->resource->partition)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Resource partition '%s' must start with '/'"), - vm->def->resource->partition); - return -1; - } - - if (virCgroupNewMachine(priv->machineName, - "qemu", - vm->def->uuid, - NULL, - vm->pid, - false, - nnicindexes, nicindexes, - vm->def->resource->partition, - cfg->cgroupControllers, - cfg->maxThreadsPerProc, - &priv->cgroup) < 0) { - if (virCgroupNewIgnoreError()) - return 0; - - return -1; - } - - return 0; -} - -static int -qemuRestoreCgroupThread(virCgroup *cgroup, - virCgroupThreadName thread, - int id) -{ - g_autoptr(virCgroup) cgroup_temp = NULL; - g_autofree char *nodeset = NULL; - - if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0) - return -1; - - if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0) - return -1; - - if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0) - return -1; - - if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) - return -1; - - return 0; -} - -static void -qemuRestoreCgroupState(virDomainObj *vm) -{ - g_autofree char *mem_mask = NULL; - qemuDomainObjPrivate *priv = vm->privateData; - size_t i = 0; - g_autoptr(virBitmap) all_nodes = NULL; - - if (!virNumaIsAvailable() || - !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return; - - if (!(all_nodes = virNumaGetHostMemoryNodeset())) - goto error; - - if (!(mem_mask = virBitmapFormat(all_nodes))) - goto error; - - if (virCgroupHasEmptyTasks(priv->cgroup, - VIR_CGROUP_CONTROLLER_CPUSET) <= 0) - goto error; - - if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) - goto error; - - for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { - virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); - - if (!vcpu->online) - continue; - - if (qemuRestoreCgroupThread(priv->cgroup, - VIR_CGROUP_THREAD_VCPU, i) < 0) - return; - } - - for (i = 0; i < vm->def->niothreadids; i++) { - if (qemuRestoreCgroupThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, - vm->def->iothreadids[i]->iothread_id) < 0) - return; - } - - if (qemuRestoreCgroupThread(priv->cgroup, - VIR_CGROUP_THREAD_EMULATOR, 0) < 0) - return; - - return; - - error: - virResetLastError(); - VIR_DEBUG("Couldn't restore cgroups to meaningful state"); - return; -} - -int -qemuConnectCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); - - if (!priv->driver->privileged) - return 0; - - if (!virCgroupAvailable()) - return 0; - - virCgroupFree(priv->cgroup); - priv->cgroup = NULL; - - if (virCgroupNewDetectMachine(vm->def->name, - "qemu", - vm->pid, - cfg->cgroupControllers, - priv->machineName, - &priv->cgroup) < 0) - return -1; - - qemuRestoreCgroupState(vm); - return 0; -} - int qemuSetupCgroup(virDomainObj *vm, size_t nnicindexes, int *nicindexes) { qemuDomainObjPrivate *priv = vm->privateData; + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); if (!vm->pid) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -1094,7 +863,12 @@ qemuSetupCgroup(virDomainObj *vm, return -1; } - if (qemuInitCgroup(vm, nnicindexes, nicindexes) < 0) + if (virInitCgroup("qemu", vm, nnicindexes, nicindexes, + priv->cgroup, + cfg->cgroupControllers, + cfg->maxThreadsPerProc, + priv->driver->privileged, + priv->machineName) < 0) return -1; if (!priv->cgroup) @@ -1103,16 +877,16 @@ qemuSetupCgroup(virDomainObj *vm, if (qemuSetupDevicesCgroup(vm) < 0) return -1; - if (qemuSetupBlkioCgroup(vm) < 0) + if (virSetupBlkioCgroup(vm, priv->cgroup) < 0) return -1; - if (qemuSetupMemoryCgroup(vm) < 0) + if (virSetupMemoryCgroup(vm, priv->cgroup) < 0) return -1; - if (qemuSetupCpuCgroup(vm) < 0) + if (virSetupCpuCgroup(vm, priv->cgroup) < 0) return -1; - if (qemuSetupCpusetCgroup(vm) < 0) + if (virSetupCpusetCgroup(priv->cgroup) < 0) return -1; if (qemuSetupCgroupAppid(vm) < 0) @@ -1121,23 +895,6 @@ qemuSetupCgroup(virDomainObj *vm, return 0; } -int -qemuSetupCgroupVcpuBW(virCgroup *cgroup, - unsigned long long period, - long long quota) -{ - return virCgroupSetupCpuPeriodQuota(cgroup, period, quota); -} - - -int -qemuSetupCgroupCpusetCpus(virCgroup *cgroup, - virBitmap *cpumask) -{ - return virCgroupSetupCpusetCpus(cgroup, cpumask); -} - - int qemuSetupCgroupForExtDevices(virDomainObj *vm, virQEMUDriver *driver) @@ -1164,148 +921,3 @@ qemuSetupCgroupForExtDevices(virDomainObj *vm, return qemuExtDevicesSetupCgroup(driver, vm, cgroup_temp); } - - -int -qemuSetupGlobalCpuCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - unsigned long long period = vm->def->cputune.global_period; - long long quota = vm->def->cputune.global_quota; - g_autofree char *mem_mask = NULL; - virDomainNumatuneMemMode mem_mode; - - if ((period || quota) && - !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("cgroup cpu is required for scheduler tuning")); - return -1; - } - - /* - * If CPU cgroup controller is not initialized here, then we need - * neither period nor quota settings. And if CPUSET controller is - * not initialized either, then there's nothing to do anyway. - */ - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) && - !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - - if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && - mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && - virDomainNumatuneMaybeFormatNodeset(vm->def->numa, - priv->autoNodeset, - &mem_mask, -1) < 0) - return -1; - - if (period || quota) { - if (qemuSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0) - return -1; - } - - return 0; -} - - -int -qemuRemoveCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (priv->cgroup == NULL) - return 0; /* Not supported, so claim success */ - - if (virCgroupTerminateMachine(priv->machineName) < 0) { - if (!virCgroupNewIgnoreError()) - VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); - } - - return virCgroupRemove(priv->cgroup); -} - - -static void -qemuCgroupEmulatorAllNodesDataFree(qemuCgroupEmulatorAllNodesData *data) -{ - if (!data) - return; - - virCgroupFree(data->emulatorCgroup); - g_free(data->emulatorMemMask); - g_free(data); -} - - -/** - * qemuCgroupEmulatorAllNodesAllow: - * @cgroup: domain cgroup pointer - * @retData: filled with structure used to roll back the operation - * - * Allows all NUMA nodes for the qemu emulator thread temporarily. This is - * necessary when hotplugging cpus since it requires memory allocated in the - * DMA region. Afterwards the operation can be reverted by - * qemuCgroupEmulatorAllNodesRestore. - * - * Returns 0 on success -1 on error - */ -int -qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup, - qemuCgroupEmulatorAllNodesData **retData) -{ - qemuCgroupEmulatorAllNodesData *data = NULL; - g_autofree char *all_nodes_str = NULL; - g_autoptr(virBitmap) all_nodes = NULL; - int ret = -1; - - if (!virNumaIsAvailable() || - !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - if (!(all_nodes = virNumaGetHostMemoryNodeset())) - goto cleanup; - - if (!(all_nodes_str = virBitmapFormat(all_nodes))) - goto cleanup; - - data = g_new0(qemuCgroupEmulatorAllNodesData, 1); - - if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, - false, &data->emulatorCgroup) < 0) - goto cleanup; - - if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 || - virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0) - goto cleanup; - - *retData = g_steal_pointer(&data); - ret = 0; - - cleanup: - qemuCgroupEmulatorAllNodesDataFree(data); - - return ret; -} - - -/** - * qemuCgroupEmulatorAllNodesRestore: - * @data: data structure created by qemuCgroupEmulatorAllNodesAllow - * - * Rolls back the setting done by qemuCgroupEmulatorAllNodesAllow and frees the - * associated data. - */ -void -qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data) -{ - virErrorPtr err; - - if (!data) - return; - - virErrorPreserveLast(&err); - virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask); - virErrorRestore(&err); - - qemuCgroupEmulatorAllNodesDataFree(data); -} diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index cd537ebd82..f09134947f 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -56,18 +56,11 @@ int qemuSetupChardevCgroup(virDomainObj *vm, virDomainChrDef *dev); int qemuTeardownChardevCgroup(virDomainObj *vm, virDomainChrDef *dev); -int qemuConnectCgroup(virDomainObj *vm); int qemuSetupCgroup(virDomainObj *vm, size_t nnicindexes, int *nicindexes); -int qemuSetupCgroupVcpuBW(virCgroup *cgroup, - unsigned long long period, - long long quota); -int qemuSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask); -int qemuSetupGlobalCpuCgroup(virDomainObj *vm); int qemuSetupCgroupForExtDevices(virDomainObj *vm, virQEMUDriver *driver); -int qemuRemoveCgroup(virDomainObj *vm); typedef struct _qemuCgroupEmulatorAllNodesData qemuCgroupEmulatorAllNodesData; struct _qemuCgroupEmulatorAllNodesData { @@ -75,8 +68,4 @@ struct _qemuCgroupEmulatorAllNodesData { char *emulatorMemMask; }; -int qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup, - qemuCgroupEmulatorAllNodesData **data); -void qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data); - extern const char *const defaultDeviceACL[]; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index ba3efef42b..d365036743 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4424,7 +4424,7 @@ qemuDomainPinVcpuLive(virDomainObj *vm, if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu, false, &cgroup_vcpu) < 0) goto cleanup; - if (qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0) + if (virSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0) goto cleanup; } @@ -4633,7 +4633,7 @@ qemuDomainPinEmulator(virDomainPtr dom, 0, false, &cgroup_emulator) < 0) goto endjob; - if (qemuSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) { + if (virSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("failed to set cpuset.cpus in cgroup" " for emulator threads")); @@ -5038,7 +5038,7 @@ qemuDomainPinIOThread(virDomainPtr dom, if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, iothread_id, false, &cgroup_iothread) < 0) goto endjob; - if (qemuSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) { + if (virSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) { virReportError(VIR_ERR_OPERATION_INVALID, _("failed to set cpuset.cpus in cgroup" " for iothread %d"), iothread_id); @@ -8926,7 +8926,7 @@ qemuSetGlobalBWLive(virCgroup *cgroup, unsigned long long period, if (period == 0 && quota == 0) return 0; - if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0) + if (virSetupCgroupVcpuBW(cgroup, period, quota) < 0) return -1; return 0; @@ -9121,7 +9121,7 @@ qemuSetVcpusBWLive(virDomainObj *vm, virCgroup *cgroup, false, &cgroup_vcpu) < 0) return -1; - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (virSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) return -1; } @@ -9142,7 +9142,7 @@ qemuSetEmulatorBandwidthLive(virCgroup *cgroup, false, &cgroup_emulator) < 0) return -1; - if (qemuSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0) + if (virSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0) return -1; return 0; @@ -9169,7 +9169,7 @@ qemuSetIOThreadsBWLive(virDomainObj *vm, virCgroup *cgroup, false, &cgroup_iothread) < 0) return -1; - if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0) + if (virSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0) return -1; } diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 04c6600f26..30f04aac57 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -37,6 +37,7 @@ #include "qemu_snapshot.h" #include "qemu_virtiofs.h" #include "domain_audit.h" +#include "domain_cgroup.h" #include "netdev_bandwidth_conf.h" #include "domain_nwfilter.h" #include "virlog.h" @@ -6551,11 +6552,11 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver, bool enable) { qemuDomainObjPrivate *priv = vm->privateData; - qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL; + virCgroupEmulatorAllNodesData *emulatorCgroup = NULL; ssize_t nextvcpu = -1; int ret = -1; - if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) + if (virCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) goto cleanup; if (enable) { @@ -6576,7 +6577,7 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver, ret = 0; cleanup: - qemuCgroupEmulatorAllNodesRestore(emulatorCgroup); + virCgroupEmulatorAllNodesRestore(emulatorCgroup); return ret; } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 4963ce383f..21c41e83c9 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -73,6 +73,7 @@ #include "virpidfile.h" #include "virhostcpu.h" #include "domain_audit.h" +#include "domain_cgroup.h" #include "domain_nwfilter.h" #include "domain_validate.h" #include "locking/domain_lock.h" @@ -2744,7 +2745,7 @@ qemuProcessSetupPid(virDomainObj *vm, if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { if (use_cpumask && - qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0) + virSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0) goto cleanup; if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0) @@ -2753,7 +2754,7 @@ qemuProcessSetupPid(virDomainObj *vm, } if ((period || quota) && - qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0) + virSetupCgroupVcpuBW(cgroup, period, quota) < 0) goto cleanup; /* Move the thread to the sub dir */ @@ -6027,7 +6028,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver, { unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def); qemuDomainObjPrivate *priv = vm->privateData; - qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL; + virCgroupEmulatorAllNodesData *emulatorCgroup = NULL; virDomainVcpuDef *vcpu; qemuDomainVcpuPrivate *vcpupriv; size_t i; @@ -6055,7 +6056,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver, qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug), qemuProcessVcpusSortOrder); - if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) + if (virCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) goto cleanup; for (i = 0; i < nbootHotplug; i++) { @@ -6079,7 +6080,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver, ret = 0; cleanup: - qemuCgroupEmulatorAllNodesRestore(emulatorCgroup); + virCgroupEmulatorAllNodesRestore(emulatorCgroup); return ret; } @@ -7061,7 +7062,7 @@ qemuProcessPrepareHost(virQEMUDriver *driver, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + virRemoveCgroup(vm, priv->cgroup, priv->machineName); if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) { virReportSystemError(errno, @@ -7678,7 +7679,7 @@ qemuProcessLaunch(virConnectPtr conn, goto cleanup; VIR_DEBUG("Setting global CPU cgroup (if required)"); - if (qemuSetupGlobalCpuCgroup(vm) < 0) + if (virSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0) goto cleanup; VIR_DEBUG("Setting vCPU tuning/settings"); @@ -8290,7 +8291,7 @@ void qemuProcessStop(virQEMUDriver *driver, } retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = virRemoveCgroup(vm, priv->cgroup, priv->machineName)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { g_usleep(200*1000); goto retry; @@ -8849,7 +8850,8 @@ qemuProcessReconnect(void *opaque) if (!priv->machineName) goto error; - if (qemuConnectCgroup(obj) < 0) + if (virConnectCgroup("qemu", obj, priv->cgroup, cfg->cgroupControllers, + priv->driver->privileged, priv->machineName) < 0) goto error; if (qemuDomainPerfRestart(obj) < 0) -- 2.27.0