Reviewed-by: Daniel Henrique Barboza <danielhb413@xxxxxxxxx> Signed-off-by: Luyao Zhong <luyao.zhong@xxxxxxxxx> --- include/libvirt/libvirt-domain.h | 1 + src/conf/numa_conf.c | 9 +++++ src/qemu/qemu_command.c | 6 ++- src/qemu/qemu_process.c | 27 +++++++++++++ src/util/virnuma.c | 3 ++ .../numatune-memnode-invalid-mode.err | 1 + .../numatune-memnode-invalid-mode.xml | 33 +++++++++++++++ ...emnode-restrictive-mode.x86_64-latest.args | 40 +++++++++++++++++++ .../numatune-memnode-restrictive-mode.xml | 33 +++++++++++++++ tests/qemuxml2argvtest.c | 2 + ...memnode-restrictive-mode.x86_64-latest.xml | 40 +++++++++++++++++++ tests/qemuxml2xmltest.c | 1 + 12 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 tests/qemuxml2argvdata/numatune-memnode-invalid-mode.err create mode 100644 tests/qemuxml2argvdata/numatune-memnode-invalid-mode.xml create mode 100644 tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args create mode 100644 tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.xml create mode 100644 tests/qemuxml2xmloutdata/numatune-memnode-restrictive-mode.x86_64-latest.xml diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index de2456812c..eabb3c091b 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -1527,6 +1527,7 @@ typedef enum { VIR_DOMAIN_NUMATUNE_MEM_STRICT = 0, VIR_DOMAIN_NUMATUNE_MEM_PREFERRED = 1, VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE = 2, + VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE = 3, # ifdef VIR_ENUM_SENTINELS VIR_DOMAIN_NUMATUNE_MEM_LAST /* This constant is subject to change */ diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index f8a7a01ac9..df888a8dfb 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -43,6 +43,7 @@ VIR_ENUM_IMPL(virDomainNumatuneMemMode, "strict", "preferred", "interleave", + "restrictive", ); VIR_ENUM_IMPL(virDomainNumatunePlacement, @@ -234,6 +235,14 @@ virDomainNumatuneNodeParseXML(virDomainNumaPtr numa, _("Invalid mode attribute in memnode element")); goto cleanup; } + + if (numa->memory.mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE && + mode != VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("'restrictive' mode is required in memnode element " + "when mode is 'restrictive' in memory element")); + goto cleanup; + } VIR_FREE(tmp); mem_node->mode = mode; } diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index b06a086e18..9bf2cc8ae8 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -174,6 +174,7 @@ VIR_ENUM_IMPL(qemuNumaPolicy, "bind", "preferred", "interleave", + "restricted", ); @@ -3159,7 +3160,10 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, return -1; } - if (nodemask) { + /* If mode is "restrictive", we should only use cgroups setting allowed memory + * nodes, and skip passing the host-nodes and policy parameters to QEMU command + * line which means we will use system default memory policy. */ + if (nodemask && mode != VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) { if (!virNumaNodesetIsAvailable(nodemask)) return -1; if (virJSONValueObjectAdd(props, diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index e7421b415f..0080985dd7 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2713,6 +2713,7 @@ qemuProcessSetupPid(virDomainObjPtr vm, g_autoptr(virBitmap) hostcpumap = NULL; g_autofree char *mem_mask = NULL; int ret = -1; + size_t i; if ((period || quota) && !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { @@ -2753,6 +2754,32 @@ qemuProcessSetupPid(virDomainObjPtr vm, &mem_mask, -1) < 0) goto cleanup; + /* For vCPU threads, mem_mask is different among cells and mem_mask + * is used to set cgroups cpuset.mems for vcpu threads. If we specify + * 'restrictive' mode, that means we will set system default memory + * policy and only use cgroups to restrict allowed memory nodes. */ + if (nameval == VIR_CGROUP_THREAD_VCPU) { + virDomainNumaPtr numatune = vm->def->numa; + virBitmapPtr numanode_cpumask = NULL; + for (i = 0; i < virDomainNumaGetNodeCount(numatune); i++) { + numanode_cpumask = virDomainNumaGetNodeCpumask(numatune, i); + /* 'i' indicates the cell id, if the vCPU id is in this cell + * and mode is 'restrictive', we need get the corresponding + * nodeset. */ + if (virBitmapIsBitSet(numanode_cpumask, id) && + virDomainNumatuneGetMode(numatune, i, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE) { + if (virDomainNumatuneMaybeFormatNodeset(numatune, + priv->autoNodeset, + &mem_mask, i) < 0) { + goto cleanup; + } else { + break; + } + } + } + } + if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0) goto cleanup; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index a05e4ac72c..ef912492c6 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -152,6 +152,9 @@ virNumaSetupMemoryPolicy(virDomainNumatuneMemMode mode, numa_set_interleave_mask(&mask); break; + case VIR_DOMAIN_NUMATUNE_MEM_RESTRICTIVE: + break; + case VIR_DOMAIN_NUMATUNE_MEM_LAST: break; } diff --git a/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.err b/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.err new file mode 100644 index 0000000000..180e64d1d8 --- /dev/null +++ b/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.err @@ -0,0 +1 @@ +XML error: 'restrictive' mode is required in memnode element when mode is 'restrictive' in memory element diff --git a/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.xml b/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.xml new file mode 100644 index 0000000000..a7c18d4d50 --- /dev/null +++ b/tests/qemuxml2argvdata/numatune-memnode-invalid-mode.xml @@ -0,0 +1,33 @@ +<domain type='qemu'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>24682468</memory> + <currentMemory unit='KiB'>24682468</currentMemory> + <vcpu placement='static'>32</vcpu> + <numatune> + <memory mode='restrictive' nodeset='0-7'/> + <memnode cellid='0' mode='restrictive' nodeset='3'/> + <memnode cellid='2' mode='strict' nodeset='1-2,5-7,^6'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='20002' unit='KiB'/> + <cell id='1' cpus='1-27,29' memory='660066' unit='KiB'/> + <cell id='2' cpus='28,30-31' memory='24002400' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args new file mode 100644 index 0000000000..1f15c4396e --- /dev/null +++ b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args @@ -0,0 +1,40 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/tmp/lib/domain--1-QEMUGuest \ +USER=test \ +LOGNAME=test \ +XDG_DATA_HOME=/tmp/lib/domain--1-QEMUGuest/.local/share \ +XDG_CACHE_HOME=/tmp/lib/domain--1-QEMUGuest/.cache \ +XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest/.config \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name guest=QEMUGuest,debug-threads=on \ +-S \ +-object secret,id=masterKey0,format=raw,\ +file=/tmp/lib/domain--1-QEMUGuest/master-key.aes \ +-machine pc,accel=tcg,usb=off,dump-guest-core=off \ +-cpu qemu64 \ +-m 24105 \ +-overcommit mem-lock=off \ +-smp 32,sockets=32,cores=1,threads=1 \ +-object memory-backend-ram,id=ram-node0,size=20971520 \ +-numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-object memory-backend-ram,id=ram-node1,size=676331520 \ +-numa node,nodeid=1,cpus=1-27,cpus=29,memdev=ram-node1 \ +-object memory-backend-ram,id=ram-node2,size=24578621440 \ +-numa node,nodeid=2,cpus=28,cpus=30-31,memdev=ram-node2 \ +-uuid 9f4b6512-e73a-4a25-93e8-5307802821ce \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,fd=1729,server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-no-acpi \ +-boot strict=on \ +-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x2 \ +-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\ +resourcecontrol=deny \ +-msg timestamp=on diff --git a/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.xml b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.xml new file mode 100644 index 0000000000..72949b0657 --- /dev/null +++ b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.xml @@ -0,0 +1,33 @@ +<domain type='qemu'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>24682468</memory> + <currentMemory unit='KiB'>24682468</currentMemory> + <vcpu placement='static'>32</vcpu> + <numatune> + <memnode cellid='0' mode='restrictive' nodeset='3'/> + <memory mode='restrictive' nodeset='0-7'/> + <memnode cellid='2' mode='restrictive' nodeset='1-2,5-7,^6'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='20002' unit='KiB'/> + <cell id='1' cpus='1-27,29' memory='660066' unit='KiB'/> + <cell id='2' cpus='28,30-31' memory='24002400' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 96a2b95331..e26197a30b 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -1994,6 +1994,8 @@ mymain(void) QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM); DO_TEST_PARSE_ERROR("numatune-memnode", NONE); + DO_TEST_CAPS_LATEST("numatune-memnode-restrictive-mode"); + DO_TEST_PARSE_ERROR("numatune-memnode-invalid-mode", NONE); DO_TEST("numatune-memnode-no-memory", QEMU_CAPS_NUMA, diff --git a/tests/qemuxml2xmloutdata/numatune-memnode-restrictive-mode.x86_64-latest.xml b/tests/qemuxml2xmloutdata/numatune-memnode-restrictive-mode.x86_64-latest.xml new file mode 100644 index 0000000000..e54b2483b1 --- /dev/null +++ b/tests/qemuxml2xmloutdata/numatune-memnode-restrictive-mode.x86_64-latest.xml @@ -0,0 +1,40 @@ +<domain type='qemu'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>24682468</memory> + <currentMemory unit='KiB'>24682468</currentMemory> + <vcpu placement='static'>32</vcpu> + <numatune> + <memory mode='restrictive' nodeset='0-7'/> + <memnode cellid='0' mode='restrictive' nodeset='3'/> + <memnode cellid='2' mode='restrictive' nodeset='1-2,5,7'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu mode='custom' match='exact' check='none'> + <model fallback='forbid'>qemu64</model> + <numa> + <cell id='0' cpus='0' memory='20002' unit='KiB'/> + <cell id='1' cpus='1-27,29' memory='660066' unit='KiB'/> + <cell id='2' cpus='28,30-31' memory='24002400' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <controller type='usb' index='0' model='piix3-uhci'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + </memballoon> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 1968be6782..dea11c2e5f 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1098,6 +1098,7 @@ mymain(void) DO_TEST("numatune-distances", QEMU_CAPS_NUMA, QEMU_CAPS_NUMA_DIST); DO_TEST("numatune-no-vcpu", QEMU_CAPS_NUMA); DO_TEST("numatune-hmat", QEMU_CAPS_NUMA_HMAT, QEMU_CAPS_OBJECT_MEMORY_RAM); + DO_TEST_CAPS_LATEST("numatune-memnode-restrictive-mode"); DO_TEST("bios-nvram", NONE); DO_TEST("bios-nvram-os-interleave", NONE); -- 2.25.4