From: "Daniel P. Berrange" <berrange@xxxxxxxxxx> Use numactl to set NUMA memory placement for LXC containers * src/lxc/lxc_controller.c: Support NUMA memory placement --- src/lxc/lxc_controller.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 111 insertions(+), 0 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index d3c3b61..4f2326b 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -48,6 +48,11 @@ # include <cap-ng.h> #endif +#if HAVE_NUMACTL +# define NUMA_VERSION1_COMPATIBILITY 1 +# include <numa.h> +#endif + #include "virterror_internal.h" #include "logging.h" #include "util.h" @@ -223,6 +228,101 @@ cleanup: return ret; } + +#if HAVE_NUMACTL +static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) +{ + nodemask_t mask; + int mode = -1; + int node = -1; + int ret = -1; + int i = 0; + int maxnode = 0; + bool warned = false; + + if (!def->numatune.memory.nodemask) + return 0; + + VIR_DEBUG("Setting NUMA memory policy"); + + if (numa_available() < 0) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("Host kernel is not aware of NUMA.")); + return -1; + } + + maxnode = numa_max_node() + 1; + + /* Convert nodemask to NUMA bitmask. */ + nodemask_zero(&mask); + for (i = 0; i < VIR_DOMAIN_CPUMASK_LEN; i++) { + if (def->numatune.memory.nodemask[i]) { + if (i > NUMA_NUM_NODES) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Host cannot support NUMA node %d"), i); + return -1; + } + if (i > maxnode && !warned) { + VIR_WARN("nodeset is out of range, there is only %d NUMA " + "nodes on host", maxnode); + warned = true; + } + nodemask_set(&mask, i); + } + } + + mode = def->numatune.memory.mode; + + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + numa_set_bind_policy(1); + numa_set_membind(&mask); + numa_set_bind_policy(0); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { + int nnodes = 0; + for (i = 0; i < NUMA_NUM_NODES; i++) { + if (nodemask_isset(&mask, i)) { + node = i; + nnodes++; + } + } + + if (nnodes != 1) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("NUMA memory tuning in 'preferred' mode " + "only supports single node")); + goto cleanup; + } + + numa_set_bind_policy(0); + numa_set_preferred(node); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { + numa_set_interleave_mask(&mask); + } else { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unable to set NUMA policy %s"), + virDomainNumatuneMemModeTypeToString(mode)); + goto cleanup; + } + + ret = 0; + +cleanup: + return ret; +} +#else +static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) +{ + if (def->numatune.memory.nodemask) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("NUMA policy is not available on this platform")); + return -1; + } + + return 0; +} +#endif + + static int lxcSetContainerCpuTune(virCgroupPtr cgroup, virDomainDefPtr def) { int ret = -1; @@ -435,6 +535,17 @@ static int lxcSetContainerResources(virDomainDefPtr def) getpid(), def->name); } + rc = -1; + + /* These must come *after* placing the task in + * the cgroup, otherwise the 'cpuset' controller + * will reset the values we've just defined! + */ + if (lxcSetContainerNUMAPolicy(def) < 0) + goto cleanup; + + rc = 0; + cleanup: virCgroupFree(&driver); virCgroupFree(&cgroup); -- 1.7.6.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list