On Tue, Oct 06, 2020 at 13:47:25 +0800, Luyao Zhong wrote: > <numatune> > <memory mode="strict" nodeset="1-4,^3" migratable="yes/no" /> > <memnode cellid="0" mode="strict" nodeset="1"/> > <memnode cellid="2" mode="preferred" nodeset="2"/> > </numatune> > > Attribute ``migratable`` will be 'no' by default, and 'yes' indicates > that it allows operating system or hypervisor migrating the memory > pages between different memory nodes, that also means we will not > rely on hypervisor to set the memory policy or memory affinity, we only > use cgroups to restrict the memory nodes, so if ``migratable`` is 'yes', > the ``mode`` which indicates memory policy will be ignored. Note that I'm not reviewing whether this is justified and makes sense. I'm commenting purely on the code. > --- We require that patches are split into logical blocks. You need to split this commit at least into following patches: > docs/formatdomain.rst | 8 +++- > docs/schemas/domaincommon.rng | 5 +++ > src/conf/numa_conf.c | 45 +++++++++++++++++++ > src/conf/numa_conf.h | 3 ++ > src/libvirt_private.syms | 1 + Docs, schema and parser changes should be separated > src/qemu/qemu_command.c | 6 ++- > src/qemu/qemu_process.c | 21 +++++++++ > .../numatune-memory-migratable.args | 34 ++++++++++++++ > .../numatune-memory-migratable.err | 1 + > .../numatune-memory-migratable.xml | 33 ++++++++++++++ > tests/qemuxml2argvtest.c | 5 +++ Implementation in qemu can be all together with tests. Please also add a qemuxml2xmltest case. I've provided some more feedback in a recent review: https://www.redhat.com/archives/libvir-list/2020-October/msg00231.html specifically some guidance on tests: https://www.redhat.com/archives/libvir-list/2020-October/msg00395.html > 11 files changed, 160 insertions(+), 2 deletions(-) > create mode 100644 tests/qemuxml2argvdata/numatune-memory-migratable.args > create mode 100644 tests/qemuxml2argvdata/numatune-memory-migratable.err > create mode 100644 tests/qemuxml2argvdata/numatune-memory-migratable.xml > > diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst > index cc4f91d4ea..4e386a0c3d 100644 > --- a/docs/formatdomain.rst > +++ b/docs/formatdomain.rst [...] > @@ -1097,6 +1097,12 @@ NUMA Node Tuning > will be ignored if it's specified. If ``placement`` of ``vcpu`` is 'auto', > and ``numatune`` is not specified, a default ``numatune`` with ``placement`` > 'auto' and ``mode`` 'strict' will be added implicitly. :since:`Since 0.9.3` > + Attribute ``migratable`` is 'no' by default, and 'yes' indicates that it > + allows operating system or hypervisor migrating the memory pages between > + different memory nodes, that also means we will not rely on hypervisor to > + set the memory policy or memory affinity, we only use cgroups to restrict > + the memory nodes, so if ``migratable`` is 'yes', the ``mode`` which indicates > + memory policy will be ignored. So ... does this make us ignore the per NUMA-node set 'mode'? If yes, the code should actually reject any configs where we ignore some settings rather than just relying on the docs. > ``memnode`` > Optional ``memnode`` elements can specify memory allocation policies per each > guest NUMA node. For those nodes having no corresponding ``memnode`` element, [...] > diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c > index 6653ba05a6..c14ba1295c 100644 > --- a/src/conf/numa_conf.c > +++ b/src/conf/numa_conf.c [...] > @@ -292,6 +294,15 @@ virDomainNumatuneParseXML(virDomainNumaPtr numa, > placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO; > > if (node) { > + if ((tmp = virXMLPropString(node, "migratable")) && > + (migratable = virTristateBoolTypeFromString(tmp)) <= 0) { > + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, VIR_ERR_XML_ERROR > + _("Invalid 'migratable' attribute value '%s'"), tmp); > + goto cleanup; > + } > + numa->memory.migratable = migratable; > + VIR_FREE(tmp); > + > if ((tmp = virXMLPropString(node, "mode")) && > (mode = virDomainNumatuneMemModeTypeFromString(tmp)) < 0) { > virReportError(VIR_ERR_CONFIG_UNSUPPORTED, > @@ -369,6 +380,11 @@ virDomainNumatuneFormatXML(virBufferPtr buf, > tmp = virDomainNumatuneMemModeTypeToString(numatune->memory.mode); > virBufferAsprintf(buf, "<memory mode='%s' ", tmp); > > + if (numatune->memory.migratable) { Use an explicit condition here: if (numatune->memory.migratable != VIR_TRISTATE_BOOL_ABSENT) > + tmp = virTristateBoolTypeToString(numatune->memory.migratable); > + virBufferAsprintf(buf, "migratable='%s' ", tmp); > + } > + > if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) { > if (!(nodeset = virBitmapFormat(numatune->memory.nodeset))) > return -1; > @@ -427,6 +443,35 @@ virDomainNumaFree(virDomainNumaPtr numa) > VIR_FREE(numa); > } > > +/** > + * virDomainNumatuneGetMigratable: > + * @numatune: pointer to numatune definition > + * @migratable: where to store the result This argument is not used by the only place calling it. Please remove it. > + * > + * Get the migratable attribute for domain's memory. It's safe to > + * pass NULL to @migratable if the return value is the only info needed. > + * > + * Returns: migratable value > + */ > +int virDomainNumatuneGetMigratable(virDomainNumaPtr numatune, > + virTristateBool *migratable) > +{ > + virTristateBool tmp_migratable = 0; > + > + if (!numatune) > + return tmp_migratable; Just directly return VIR_TRISTATE_BOOL_ABSENT here ... > + > + if (numatune->memory.specified) > + tmp_migratable = numatune->memory.migratable; > + else > + return tmp_migratable; > + > + if (migratable != NULL) > + *migratable = tmp_migratable; Also none of this is really needed, just directly return the value. > + > + return tmp_migratable; > +} > + > /** > * virDomainNumatuneGetMode: > * @numatune: pointer to numatune definition [...] > diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c > index 476cf6972e..882e7e6ba2 100644 > --- a/src/qemu/qemu_command.c > +++ b/src/qemu/qemu_command.c > @@ -3189,7 +3189,11 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, > return -1; > } > > - if (nodemask) { > + // If migratable attribute is yes, we should only use cgroups setting > + // memory affinity, and skip passing the host-nodes and policy parameters > + // to QEMU command line. We don't use line comments ('//') in our code base. Please use block comments. > + if (nodemask && > + virDomainNumatuneGetMigratable(def->numa, NULL) != VIR_TRISTATE_BOOL_YES) { > if (!virNumaNodesetIsAvailable(nodemask)) > return -1; > if (virJSONValueObjectAdd(props, > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > index 6b5de29fdb..9c1116064b 100644 > --- a/src/qemu/qemu_process.c > +++ b/src/qemu/qemu_process.c [...] > @@ -2694,6 +2695,26 @@ qemuProcessSetupPid(virDomainObjPtr vm, > &mem_mask, -1) < 0) > goto cleanup; > > + // For vCPU threads, mem_mask is different among cells > + if (nameval == VIR_CGROUP_THREAD_VCPU) { > + virDomainNumaPtr numatune = vm->def->numa; > + virBitmapPtr numanode_cpumask = NULL; > + for (i = 0; i < virDomainNumaGetNodeCount(numatune); i++) { > + numanode_cpumask = virDomainNumaGetNodeCpumask(numatune, i); > + // 'i' indicates the cell id, if the vCPU id is in this cell, > + // we need get the corresonding nodeset > + if (virBitmapIsBitSet(numanode_cpumask, id)) { > + if (virDomainNumatuneMaybeFormatNodeset(numatune, > + priv->autoNodeset, > + &mem_mask, i) < 0) { > + goto cleanup; > + } else { > + break; > + } > + } > + } > + } This hunk doesn't seem to belong to this patch. The comments (apart from using wrong syntax) don't seem to justify why or how this relates to memory migrability. > + > if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0) > goto cleanup; > [...] > diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c > index 49567326d4..e6554518d7 100644 > --- a/tests/qemuxml2argvtest.c > +++ b/tests/qemuxml2argvtest.c > @@ -1950,6 +1950,11 @@ mymain(void) > > DO_TEST("numatune-memory", NONE); > DO_TEST_PARSE_ERROR("numatune-memory-invalid-nodeset", NONE); > + DO_TEST("numatune-memory-migratable", > + QEMU_CAPS_NUMA, > + QEMU_CAPS_OBJECT_MEMORY_RAM); > + DO_TEST_PARSE_ERROR("numatune-memory-migratable", NONE); Pleased don't use DO_TEST any more. Use DO_TEST_CAPS_* (such as DO_TEST_CAPS_LATEST) instead for new tests. The negative test doesn't seem to make sense here since it's not testing a negative case in your patch. This is visible from the error message it records: > +++ b/tests/qemuxml2argvdata/numatune-memory-migratable.err > @@ -0,0 +1 @@ > +unsupported configuration: Per-node memory binding is not supported with this QEMU