Refactor some cgroup management methods from qemu into hypervisor. These methods will be shared with ch driver for cgroup management. Signed-off-by: Praveen K Paladugu <prapal@xxxxxxxxxxxxxxxxxxx> --- src/hypervisor/domain_cgroup.c | 457 ++++++++++++++++++++++++++++++++- src/hypervisor/domain_cgroup.h | 72 ++++++ src/libvirt_private.syms | 14 +- src/qemu/qemu_cgroup.c | 413 +---------------------------- src/qemu/qemu_cgroup.h | 11 - src/qemu/qemu_driver.c | 14 +- src/qemu/qemu_hotplug.c | 7 +- src/qemu/qemu_process.c | 24 +- 8 files changed, 580 insertions(+), 432 deletions(-) diff --git a/src/hypervisor/domain_cgroup.c b/src/hypervisor/domain_cgroup.c index 61b54f071c..05c53ff7d1 100644 --- a/src/hypervisor/domain_cgroup.c +++ b/src/hypervisor/domain_cgroup.c @@ -22,11 +22,12 @@ #include "domain_cgroup.h" #include "domain_driver.h" - +#include "util/virnuma.h" +#include "virlog.h" #include "virutil.h" #define VIR_FROM_THIS VIR_FROM_DOMAIN - +VIR_LOG_INIT("domain.cgroup"); int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio) @@ -269,3 +270,455 @@ virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup, return 0; } + + +int +virDomainCgroupSetupBlkioCgroup(virDomainObj *vm, + virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) { + if (vm->def->blkio.weight || vm->def->blkio.ndevices) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Block I/O tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupBlkio(cgroup, vm->def->blkio); +} + + +int +virDomainCgroupSetupMemoryCgroup(virDomainObj *vm, + virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) { + if (virMemoryLimitIsSet(vm->def->mem.hard_limit) || + virMemoryLimitIsSet(vm->def->mem.soft_limit) || + virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Memory cgroup is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupMemtune(cgroup, vm->def->mem); +} + + +int +virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (virCgroupSetCpusetMemoryMigrate(cgroup, true) < 0) + return -1; + + return 0; +} + + +int +virDomainCgroupSetupCpuCgroup(virDomainObj *vm, + virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + if (vm->def->cputune.sharesSpecified) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("CPU tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + if (vm->def->cputune.sharesSpecified) { + + if (virCgroupSetCpuShares(cgroup, vm->def->cputune.shares) < 0) + return -1; + + } + + return 0; +} + + +int +virDomainCgroupInitCgroup(const char *prefix, + virDomainObj * vm, + size_t nnicindexes, + int *nicindexes, + virCgroup * cgroup, + int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, + char *machineName) +{ + if (!privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(cgroup); + cgroup = NULL; + + if (!vm->def->resource) + vm->def->resource = g_new0(virDomainResourceDef, 1); + + if (!vm->def->resource->partition) + vm->def->resource->partition = g_strdup("/machine"); + + if (!g_path_is_absolute(vm->def->resource->partition)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource partition '%s' must start with '/'"), + vm->def->resource->partition); + return -1; + } + + if (virCgroupNewMachine(machineName, + prefix, + vm->def->uuid, + NULL, + vm->pid, + false, + nnicindexes, nicindexes, + vm->def->resource->partition, + cgroupControllers, + maxThreadsPerProc, &cgroup) < 0) { + if (virCgroupNewIgnoreError()) + return 0; + + return -1; + } + + return 0; +} + + +void +virDomainCgroupRestoreCgroupState(virDomainObj *vm, + virCgroup *cgroup) +{ + g_autofree char *mem_mask = NULL; + size_t i = 0; + + g_autoptr(virBitmap) all_nodes = NULL; + + if (!virNumaIsAvailable() || + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto error; + + if (!(mem_mask = virBitmapFormat(all_nodes))) + goto error; + + if (virCgroupHasEmptyTasks(cgroup, VIR_CGROUP_CONTROLLER_CPUSET) <= 0) + goto error; + + if (virCgroupSetCpusetMems(cgroup, mem_mask) < 0) + goto error; + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virDomainCgroupRestoreCgroupThread(cgroup, + VIR_CGROUP_THREAD_VCPU, + i) < 0) + return; + } + + for (i = 0; i < vm->def->niothreadids; i++) { + if (virDomainCgroupRestoreCgroupThread(cgroup, + VIR_CGROUP_THREAD_IOTHREAD, + vm->def->iothreadids[i]->iothread_id) < 0) + return; + } + + if (virDomainCgroupRestoreCgroupThread(cgroup, + VIR_CGROUP_THREAD_EMULATOR, + 0) < 0) + return; + + return; + + error: + virResetLastError(); + VIR_DEBUG("Couldn't restore cgroups to meaningful state"); + return; +} + + +int +virDomainCgroupRestoreCgroupThread(virCgroup *cgroup, + virCgroupThreadName thread, + int id) +{ + g_autoptr(virCgroup) cgroup_temp = NULL; + g_autofree char *nodeset = NULL; + + if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0) + return -1; + + if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0) + return -1; + + if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0) + return -1; + + if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + return -1; + + return 0; +} + + +int +virDomainCgroupConnectCgroup(const char *prefix, + virDomainObj *vm, + virCgroup *cgroup, + int cgroupControllers, + bool privileged, + char *machineName) +{ + if (privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(cgroup); + + if (virCgroupNewDetectMachine(vm->def->name, + prefix, + vm->pid, + cgroupControllers, machineName, &cgroup) < 0) + return -1; + + virDomainCgroupRestoreCgroupState(vm, cgroup); + return 0; +} + + +int +virDomainCgroupSetupCgroup(const char *prefix, + virDomainObj * vm, + size_t nnicindexes, + int *nicindexes, + virCgroup * cgroup, + int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, + char *machineName) +{ + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup cgroups until process is started")); + return -1; + } + + if (virDomainCgroupInitCgroup(prefix, + vm, + nnicindexes, + nicindexes, + cgroup, + cgroupControllers, + maxThreadsPerProc, + privileged, + machineName) < 0) + return -1; + + if (!cgroup) + return 0; + + if (virDomainCgroupSetupBlkioCgroup(vm, cgroup) < 0) + return -1; + + if (virDomainCgroupSetupMemoryCgroup(vm, cgroup) < 0) + return -1; + + if (virDomainCgroupSetupCpuCgroup(vm, cgroup) < 0) + return -1; + + if (virDomainCgroupSetupCpusetCgroup(cgroup) < 0) + return -1; + + return 0; +} + + +int +virDomainCgroupSetupVcpuBW(virCgroup *cgroup, + unsigned long long period, + long long quota) +{ + return virCgroupSetupCpuPeriodQuota(cgroup, period, quota); +} + + +int +virDomainCgroupSetupCpusetCpus(virCgroup *cgroup, + virBitmap *cpumask) +{ + return virCgroupSetupCpusetCpus(cgroup, cpumask); +} + + +int +virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm, + virCgroup *cgroup, + virBitmap *autoNodeset) +{ + unsigned long long period = vm->def->cputune.global_period; + long long quota = vm->def->cputune.global_quota; + g_autofree char *mem_mask = NULL; + virDomainNumatuneMemMode mem_mode; + + if ((period || quota) && + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + return -1; + } + + /* + * If CPU cgroup controller is not initialized here, then we need + * neither period nor quota settings. And if CPUSET controller is + * not initialized either, then there's nothing to do anyway. + */ + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU) && + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virDomainNumatuneMaybeFormatNodeset(vm->def->numa, + autoNodeset, &mem_mask, -1) < 0) + return -1; + + if (period || quota) { + if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0) + return -1; + } + + return 0; +} + + +int +virDomainCgroupRemoveCgroup(virDomainObj *vm, + virCgroup *cgroup, + char *machineName) +{ + if (cgroup == NULL) + return 0; /* Not supported, so claim success */ + + if (virCgroupTerminateMachine(machineName) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + + return virCgroupRemove(cgroup); +} + + +void +virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data) +{ + if (!data) + return; + + virCgroupFree(data->emulatorCgroup); + g_free(data->emulatorMemMask); + g_free(data); +} + + +/** + * virDomainCgroupEmulatorAllNodesAllow: + * @cgroup: domain cgroup pointer + * @retData: filled with structure used to roll back the operation + * + * Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is + * necessary when hotplugging cpus since it requires memory allocated in the + * DMA region. Afterwards the operation can be reverted by + * virDomainCgroupEmulatorAllNodesRestore. + * + * Returns 0 on success -1 on error + */ +int +virDomainCgroupEmulatorAllNodesAllow(virCgroup *cgroup, + virCgroupEmulatorAllNodesData **retData) +{ + virCgroupEmulatorAllNodesData *data = NULL; + g_autofree char *all_nodes_str = NULL; + + g_autoptr(virBitmap) all_nodes = NULL; + int ret = -1; + + if (!virNumaIsAvailable() || + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto cleanup; + + if (!(all_nodes_str = virBitmapFormat(all_nodes))) + goto cleanup; + + data = g_new0(virCgroupEmulatorAllNodesData, 1); + + if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, + false, &data->emulatorCgroup) < 0) + goto cleanup; + + if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 + || virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0) + goto cleanup; + + *retData = g_steal_pointer(&data); + ret = 0; + + cleanup: + virDomainCgroupEmulatorAllNodesDataFree(data); + + return ret; +} + + +/** + * virDomainCgroupEmulatorAllNodesRestore: + * @data: data structure created by virDomainCgroupEmulatorAllNodesAllow + * + * Rolls back the setting done by virDomainCgroupEmulatorAllNodesAllow and frees the + * associated data. + */ +void +virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data) +{ + virError *err; + + if (!data) + return; + + virErrorPreserveLast(&err); + virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask); + virErrorRestore(&err); + + virDomainCgroupEmulatorAllNodesDataFree(data); +} diff --git a/src/hypervisor/domain_cgroup.h b/src/hypervisor/domain_cgroup.h index f93e5f74fe..dfe7107674 100644 --- a/src/hypervisor/domain_cgroup.h +++ b/src/hypervisor/domain_cgroup.h @@ -23,6 +23,11 @@ #include "vircgroup.h" #include "domain_conf.h" +typedef struct _virCgroupEmulatorAllNodesData virCgroupEmulatorAllNodesData; +struct _virCgroupEmulatorAllNodesData { + virCgroup *emulatorCgroup; + char *emulatorMemMask; +}; int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio); int virDomainCgroupSetupMemtune(virCgroup *cgroup, virDomainMemtune mem); @@ -36,3 +41,70 @@ int virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup, virDomainDef *persistentDef, virTypedParameterPtr params, int nparams); +int +virDomainCgroupSetupBlkioCgroup(virDomainObj * vm, + virCgroup *cgroup); +int +virDomainCgroupSetupMemoryCgroup(virDomainObj * vm, + virCgroup *cgroup); +int +virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup); +int +virDomainCgroupSetupCpuCgroup(virDomainObj * vm, + virCgroup *cgroup); +int +virDomainCgroupInitCgroup(const char *prefix, + virDomainObj * vm, + size_t nnicindexes, + int *nicindexes, + virCgroup *cgroup, + int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, + char *machineName); +void +virDomainCgroupRestoreCgroupState(virDomainObj * vm, + virCgroup *cgroup); +int +virDomainCgroupConnectCgroup(const char *prefix, + virDomainObj * vm, + virCgroup *cgroup, + int cgroupControllers, + bool privileged, + char *machineName); +int +virDomainCgroupSetupCgroup(const char *prefix, + virDomainObj * vm, + size_t nnicindexes, + int *nicindexes, + virCgroup *cgroup, + int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, + char *machineName); +void +virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData * data); +int +virDomainCgroupEmulatorAllNodesAllow(virCgroup * cgroup, + virCgroupEmulatorAllNodesData ** retData); +void +virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData * data); +int +virDomainCgroupSetupVcpuBW(virCgroup * cgroup, + unsigned long long period, + long long quota); +int +virDomainCgroupSetupCpusetCpus(virCgroup * cgroup, + virBitmap * cpumask); +int +virDomainCgroupSetupGlobalCpuCgroup(virDomainObj * vm, + virCgroup * cgroup, + virBitmap *autoNodeset); +int +virDomainCgroupRemoveCgroup(virDomainObj * vm, + virCgroup * cgroup, + char *machineName); +int +virDomainCgroupRestoreCgroupThread(virCgroup *cgroup, + virCgroupThreadName thread, + int id); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index ba3462d849..bc6fa191bf 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1544,11 +1544,23 @@ virSetConnectStorage; # hypervisor/domain_cgroup.h +virDomainCgroupConnectCgroup; +virDomainCgroupEmulatorAllNodesAllow; +virDomainCgroupEmulatorAllNodesRestore; +virDomainCgroupInitCgroup; +virDomainCgroupRemoveCgroup; virDomainCgroupSetMemoryLimitParameters; virDomainCgroupSetupBlkio; +virDomainCgroupSetupBlkioCgroup; +virDomainCgroupSetupCgroup; +virDomainCgroupSetupCpuCgroup; +virDomainCgroupSetupCpusetCgroup; +virDomainCgroupSetupCpusetCpus; virDomainCgroupSetupDomainBlkioParameters; +virDomainCgroupSetupGlobalCpuCgroup; +virDomainCgroupSetupMemoryCgroup; virDomainCgroupSetupMemtune; - +virDomainCgroupSetupVcpuBW; # hypervisor/domain_driver.h virDomainDriverAddIOThreadCheck; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 22a6f56cf9..40bd4e2f9d 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -593,46 +593,6 @@ qemuSetupVideoCgroup(virDomainObj *vm, return ret; } - -static int -qemuSetupBlkioCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, - VIR_CGROUP_CONTROLLER_BLKIO)) { - if (vm->def->blkio.weight || vm->def->blkio.ndevices) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Block I/O tuning is not available on this host")); - return -1; - } - return 0; - } - - return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio); -} - - -static int -qemuSetupMemoryCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) { - if (virMemoryLimitIsSet(vm->def->mem.hard_limit) || - virMemoryLimitIsSet(vm->def->mem.soft_limit) || - virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Memory cgroup is not available on this host")); - return -1; - } - return 0; - } - - return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem); -} - - static int qemuSetupFirmwareCgroup(virDomainObj *vm) { @@ -861,44 +821,6 @@ qemuSetupDevicesCgroup(virDomainObj *vm) } -static int -qemuSetupCpusetCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0) - return -1; - - return 0; -} - - -static int -qemuSetupCpuCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { - if (vm->def->cputune.sharesSpecified) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("CPU tuning is not available on this host")); - return -1; - } - return 0; - } - - if (vm->def->cputune.sharesSpecified) { - if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0) - return -1; - } - - return 0; -} - - static int qemuSetupCgroupAppid(virDomainObj *vm) { @@ -927,166 +849,13 @@ qemuSetupCgroupAppid(virDomainObj *vm) } -static int -qemuInitCgroup(virDomainObj *vm, - size_t nnicindexes, - int *nicindexes) -{ - qemuDomainObjPrivate *priv = vm->privateData; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); - - if (!priv->driver->privileged) - return 0; - - if (!virCgroupAvailable()) - return 0; - - virCgroupFree(priv->cgroup); - priv->cgroup = NULL; - - if (!vm->def->resource) - vm->def->resource = g_new0(virDomainResourceDef, 1); - - if (!vm->def->resource->partition) - vm->def->resource->partition = g_strdup("/machine"); - - if (!g_path_is_absolute(vm->def->resource->partition)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Resource partition '%s' must start with '/'"), - vm->def->resource->partition); - return -1; - } - - if (virCgroupNewMachine(priv->machineName, - "qemu", - vm->def->uuid, - NULL, - vm->pid, - false, - nnicindexes, nicindexes, - vm->def->resource->partition, - cfg->cgroupControllers, - cfg->maxThreadsPerProc, - &priv->cgroup) < 0) { - if (virCgroupNewIgnoreError()) - return 0; - - return -1; - } - - return 0; -} - -static int -qemuRestoreCgroupThread(virCgroup *cgroup, - virCgroupThreadName thread, - int id) -{ - g_autoptr(virCgroup) cgroup_temp = NULL; - g_autofree char *nodeset = NULL; - - if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0) - return -1; - - if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0) - return -1; - - if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0) - return -1; - - if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) - return -1; - - return 0; -} - -static void -qemuRestoreCgroupState(virDomainObj *vm) -{ - g_autofree char *mem_mask = NULL; - qemuDomainObjPrivate *priv = vm->privateData; - size_t i = 0; - g_autoptr(virBitmap) all_nodes = NULL; - - if (!virNumaIsAvailable() || - !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return; - - if (!(all_nodes = virNumaGetHostMemoryNodeset())) - goto error; - - if (!(mem_mask = virBitmapFormat(all_nodes))) - goto error; - - if (virCgroupHasEmptyTasks(priv->cgroup, - VIR_CGROUP_CONTROLLER_CPUSET) <= 0) - goto error; - - if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) - goto error; - - for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { - virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); - - if (!vcpu->online) - continue; - - if (qemuRestoreCgroupThread(priv->cgroup, - VIR_CGROUP_THREAD_VCPU, i) < 0) - return; - } - - for (i = 0; i < vm->def->niothreadids; i++) { - if (qemuRestoreCgroupThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, - vm->def->iothreadids[i]->iothread_id) < 0) - return; - } - - if (qemuRestoreCgroupThread(priv->cgroup, - VIR_CGROUP_THREAD_EMULATOR, 0) < 0) - return; - - return; - - error: - virResetLastError(); - VIR_DEBUG("Couldn't restore cgroups to meaningful state"); - return; -} - -int -qemuConnectCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); - - if (!priv->driver->privileged) - return 0; - - if (!virCgroupAvailable()) - return 0; - - virCgroupFree(priv->cgroup); - priv->cgroup = NULL; - - if (virCgroupNewDetectMachine(vm->def->name, - "qemu", - vm->pid, - cfg->cgroupControllers, - priv->machineName, - &priv->cgroup) < 0) - return -1; - - qemuRestoreCgroupState(vm); - return 0; -} - int qemuSetupCgroup(virDomainObj *vm, size_t nnicindexes, int *nicindexes) { qemuDomainObjPrivate *priv = vm->privateData; + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); if (!vm->pid) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -1094,7 +863,15 @@ qemuSetupCgroup(virDomainObj *vm, return -1; } - if (qemuInitCgroup(vm, nnicindexes, nicindexes) < 0) + if (virDomainCgroupInitCgroup("qemu", + vm, + nnicindexes, + nicindexes, + priv->cgroup, + cfg->cgroupControllers, + cfg->maxThreadsPerProc, + priv->driver->privileged, + priv->machineName) < 0) return -1; if (!priv->cgroup) @@ -1103,16 +880,16 @@ qemuSetupCgroup(virDomainObj *vm, if (qemuSetupDevicesCgroup(vm) < 0) return -1; - if (qemuSetupBlkioCgroup(vm) < 0) + if (virDomainCgroupSetupBlkioCgroup(vm, priv->cgroup) < 0) return -1; - if (qemuSetupMemoryCgroup(vm) < 0) + if (virDomainCgroupSetupMemoryCgroup(vm, priv->cgroup) < 0) return -1; - if (qemuSetupCpuCgroup(vm) < 0) + if (virDomainCgroupSetupCpuCgroup(vm, priv->cgroup) < 0) return -1; - if (qemuSetupCpusetCgroup(vm) < 0) + if (virDomainCgroupSetupCpusetCgroup(priv->cgroup) < 0) return -1; if (qemuSetupCgroupAppid(vm) < 0) @@ -1121,23 +898,6 @@ qemuSetupCgroup(virDomainObj *vm, return 0; } -int -qemuSetupCgroupVcpuBW(virCgroup *cgroup, - unsigned long long period, - long long quota) -{ - return virCgroupSetupCpuPeriodQuota(cgroup, period, quota); -} - - -int -qemuSetupCgroupCpusetCpus(virCgroup *cgroup, - virBitmap *cpumask) -{ - return virCgroupSetupCpusetCpus(cgroup, cpumask); -} - - int qemuSetupCgroupForExtDevices(virDomainObj *vm, virQEMUDriver *driver) @@ -1164,148 +924,3 @@ qemuSetupCgroupForExtDevices(virDomainObj *vm, return qemuExtDevicesSetupCgroup(driver, vm, cgroup_temp); } - - -int -qemuSetupGlobalCpuCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - unsigned long long period = vm->def->cputune.global_period; - long long quota = vm->def->cputune.global_quota; - g_autofree char *mem_mask = NULL; - virDomainNumatuneMemMode mem_mode; - - if ((period || quota) && - !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("cgroup cpu is required for scheduler tuning")); - return -1; - } - - /* - * If CPU cgroup controller is not initialized here, then we need - * neither period nor quota settings. And if CPUSET controller is - * not initialized either, then there's nothing to do anyway. - */ - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) && - !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - - if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && - mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && - virDomainNumatuneMaybeFormatNodeset(vm->def->numa, - priv->autoNodeset, - &mem_mask, -1) < 0) - return -1; - - if (period || quota) { - if (qemuSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0) - return -1; - } - - return 0; -} - - -int -qemuRemoveCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (priv->cgroup == NULL) - return 0; /* Not supported, so claim success */ - - if (virCgroupTerminateMachine(priv->machineName) < 0) { - if (!virCgroupNewIgnoreError()) - VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); - } - - return virCgroupRemove(priv->cgroup); -} - - -static void -qemuCgroupEmulatorAllNodesDataFree(qemuCgroupEmulatorAllNodesData *data) -{ - if (!data) - return; - - virCgroupFree(data->emulatorCgroup); - g_free(data->emulatorMemMask); - g_free(data); -} - - -/** - * qemuCgroupEmulatorAllNodesAllow: - * @cgroup: domain cgroup pointer - * @retData: filled with structure used to roll back the operation - * - * Allows all NUMA nodes for the qemu emulator thread temporarily. This is - * necessary when hotplugging cpus since it requires memory allocated in the - * DMA region. Afterwards the operation can be reverted by - * qemuCgroupEmulatorAllNodesRestore. - * - * Returns 0 on success -1 on error - */ -int -qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup, - qemuCgroupEmulatorAllNodesData **retData) -{ - qemuCgroupEmulatorAllNodesData *data = NULL; - g_autofree char *all_nodes_str = NULL; - g_autoptr(virBitmap) all_nodes = NULL; - int ret = -1; - - if (!virNumaIsAvailable() || - !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - if (!(all_nodes = virNumaGetHostMemoryNodeset())) - goto cleanup; - - if (!(all_nodes_str = virBitmapFormat(all_nodes))) - goto cleanup; - - data = g_new0(qemuCgroupEmulatorAllNodesData, 1); - - if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, - false, &data->emulatorCgroup) < 0) - goto cleanup; - - if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 || - virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0) - goto cleanup; - - *retData = g_steal_pointer(&data); - ret = 0; - - cleanup: - qemuCgroupEmulatorAllNodesDataFree(data); - - return ret; -} - - -/** - * qemuCgroupEmulatorAllNodesRestore: - * @data: data structure created by qemuCgroupEmulatorAllNodesAllow - * - * Rolls back the setting done by qemuCgroupEmulatorAllNodesAllow and frees the - * associated data. - */ -void -qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data) -{ - virErrorPtr err; - - if (!data) - return; - - virErrorPreserveLast(&err); - virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask); - virErrorRestore(&err); - - qemuCgroupEmulatorAllNodesDataFree(data); -} diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index cd537ebd82..f09134947f 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -56,18 +56,11 @@ int qemuSetupChardevCgroup(virDomainObj *vm, virDomainChrDef *dev); int qemuTeardownChardevCgroup(virDomainObj *vm, virDomainChrDef *dev); -int qemuConnectCgroup(virDomainObj *vm); int qemuSetupCgroup(virDomainObj *vm, size_t nnicindexes, int *nicindexes); -int qemuSetupCgroupVcpuBW(virCgroup *cgroup, - unsigned long long period, - long long quota); -int qemuSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask); -int qemuSetupGlobalCpuCgroup(virDomainObj *vm); int qemuSetupCgroupForExtDevices(virDomainObj *vm, virQEMUDriver *driver); -int qemuRemoveCgroup(virDomainObj *vm); typedef struct _qemuCgroupEmulatorAllNodesData qemuCgroupEmulatorAllNodesData; struct _qemuCgroupEmulatorAllNodesData { @@ -75,8 +68,4 @@ struct _qemuCgroupEmulatorAllNodesData { char *emulatorMemMask; }; -int qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup, - qemuCgroupEmulatorAllNodesData **data); -void qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data); - extern const char *const defaultDeviceACL[]; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 0a1ba74e65..1141efef4b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4419,7 +4419,7 @@ qemuDomainPinVcpuLive(virDomainObj *vm, if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu, false, &cgroup_vcpu) < 0) goto cleanup; - if (qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0) + if (virDomainCgroupSetupCpusetCpus(cgroup_vcpu, cpumap) < 0) goto cleanup; } @@ -4628,7 +4628,7 @@ qemuDomainPinEmulator(virDomainPtr dom, 0, false, &cgroup_emulator) < 0) goto endjob; - if (qemuSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) { + if (virDomainCgroupSetupCpusetCpus(cgroup_emulator, pcpumap) < 0) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("failed to set cpuset.cpus in cgroup" " for emulator threads")); @@ -5025,7 +5025,7 @@ qemuDomainPinIOThread(virDomainPtr dom, if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, iothread_id, false, &cgroup_iothread) < 0) goto endjob; - if (qemuSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) { + if (virDomainCgroupSetupCpusetCpus(cgroup_iothread, pcpumap) < 0) { virReportError(VIR_ERR_OPERATION_INVALID, _("failed to set cpuset.cpus in cgroup" " for iothread %d"), iothread_id); @@ -8925,7 +8925,7 @@ qemuSetGlobalBWLive(virCgroup *cgroup, unsigned long long period, if (period == 0 && quota == 0) return 0; - if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0) + if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0) return -1; return 0; @@ -9120,7 +9120,7 @@ qemuSetVcpusBWLive(virDomainObj *vm, virCgroup *cgroup, false, &cgroup_vcpu) < 0) return -1; - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (virDomainCgroupSetupVcpuBW(cgroup_vcpu, period, quota) < 0) return -1; } @@ -9141,7 +9141,7 @@ qemuSetEmulatorBandwidthLive(virCgroup *cgroup, false, &cgroup_emulator) < 0) return -1; - if (qemuSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0) + if (virDomainCgroupSetupVcpuBW(cgroup_emulator, period, quota) < 0) return -1; return 0; @@ -9168,7 +9168,7 @@ qemuSetIOThreadsBWLive(virDomainObj *vm, virCgroup *cgroup, false, &cgroup_iothread) < 0) return -1; - if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0) + if (virDomainCgroupSetupVcpuBW(cgroup_iothread, period, quota) < 0) return -1; } diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index f36de2385a..da7143eba2 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -37,6 +37,7 @@ #include "qemu_snapshot.h" #include "qemu_virtiofs.h" #include "domain_audit.h" +#include "domain_cgroup.h" #include "netdev_bandwidth_conf.h" #include "domain_nwfilter.h" #include "virlog.h" @@ -6527,11 +6528,11 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver, bool enable) { qemuDomainObjPrivate *priv = vm->privateData; - qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL; + virCgroupEmulatorAllNodesData *emulatorCgroup = NULL; ssize_t nextvcpu = -1; int ret = -1; - if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) + if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) goto cleanup; if (enable) { @@ -6552,7 +6553,7 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver, ret = 0; cleanup: - qemuCgroupEmulatorAllNodesRestore(emulatorCgroup); + virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup); return ret; } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 336f0bab2e..1673c05497 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -73,6 +73,7 @@ #include "virpidfile.h" #include "virhostcpu.h" #include "domain_audit.h" +#include "domain_cgroup.h" #include "domain_nwfilter.h" #include "domain_validate.h" #include "locking/domain_lock.h" @@ -2686,7 +2687,7 @@ qemuProcessSetupPid(virDomainObj *vm, if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { if (use_cpumask && - qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0) + virDomainCgroupSetupCpusetCpus(cgroup, use_cpumask) < 0) goto cleanup; if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0) @@ -2695,7 +2696,7 @@ qemuProcessSetupPid(virDomainObj *vm, } if ((period || quota) && - qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0) + virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0) goto cleanup; /* Move the thread to the sub dir */ @@ -5952,7 +5953,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver, { unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def); qemuDomainObjPrivate *priv = vm->privateData; - qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL; + virCgroupEmulatorAllNodesData *emulatorCgroup = NULL; virDomainVcpuDef *vcpu; qemuDomainVcpuPrivate *vcpupriv; size_t i; @@ -5980,7 +5981,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver, qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug), qemuProcessVcpusSortOrder); - if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) + if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) goto cleanup; for (i = 0; i < nbootHotplug; i++) { @@ -6004,7 +6005,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver, ret = 0; cleanup: - qemuCgroupEmulatorAllNodesRestore(emulatorCgroup); + virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup); return ret; } @@ -6994,7 +6995,7 @@ qemuProcessPrepareHost(virQEMUDriver *driver, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName); if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) { virReportSystemError(errno, @@ -7603,7 +7604,7 @@ qemuProcessLaunch(virConnectPtr conn, goto cleanup; VIR_DEBUG("Setting global CPU cgroup (if required)"); - if (qemuSetupGlobalCpuCgroup(vm) < 0) + if (virDomainCgroupSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0) goto cleanup; VIR_DEBUG("Setting vCPU tuning/settings"); @@ -8202,7 +8203,7 @@ void qemuProcessStop(virQEMUDriver *driver, } retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { g_usleep(200*1000); goto retry; @@ -8761,7 +8762,12 @@ qemuProcessReconnect(void *opaque) if (!priv->machineName) goto error; - if (qemuConnectCgroup(obj) < 0) + if (virDomainCgroupConnectCgroup("qemu", + obj, + priv->cgroup, + cfg->cgroupControllers, + priv->driver->privileged, + priv->machineName) < 0) goto error; if (qemuDomainPerfRestart(obj) < 0) -- 2.27.0