QEMU has possibility to call madvise(.., MADV_REMOVE) in some cases. Expose this feature to users by new element/attribute discard. Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> --- docs/formatdomain.html.in | 34 +++++++++++++++++++++++-- docs/schemas/cputypes.rng | 5 ++++ docs/schemas/domaincommon.rng | 10 ++++++++ src/conf/domain_conf.c | 36 ++++++++++++++++++++++++++- src/conf/domain_conf.h | 3 +++ src/conf/numa_conf.c | 27 ++++++++++++++++++++ src/conf/numa_conf.h | 3 +++ src/libvirt_private.syms | 1 + tests/qemuxml2argvdata/hugepages-pages.xml | 3 ++- tests/qemuxml2argvdata/hugepages-pages3.xml | 4 +-- tests/qemuxml2argvdata/hugepages-pages7.xml | 4 +-- tests/qemuxml2xmloutdata/hugepages-pages.xml | 3 ++- tests/qemuxml2xmloutdata/hugepages-pages3.xml | 4 +-- 13 files changed, 126 insertions(+), 11 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 072f9a0fdc..80172c18d0 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1016,6 +1016,7 @@ <source type="file|anonymous"/> <access mode="shared|private"/> <allocation mode="immediate|ondemand"/> + <discard/> </memoryBacking> ... </domain> @@ -1070,6 +1071,14 @@ numa node by <code>memAccess</code></dd> <dt><code>allocation</code></dt> <dd>Specify when allocate the memory</dd> + <dt><code>discard</code></dt> + <dd>When set and supported by hypervisor the memory + content is discarded just before guest shuts down (or + when DIMM module is unplugged). Please note that this is + just an optimization and is not guaranteed to work in + all cases (e.g. when hypervisor crashes). + <span class="since">Since 4.4.0</span> (QEMU/KVM only) + </dd> </dl> @@ -1608,7 +1617,7 @@ <cpu> ... <numa> - <cell id='0' cpus='0-3' memory='512000' unit='KiB'/> + <cell id='0' cpus='0-3' memory='512000' unit='KiB' discard='yes'/> <cell id='1' cpus='4-7' memory='512000' unit='KiB' memAccess='shared'/> </numa> ... @@ -1634,6 +1643,13 @@ <code>memAccess</code> can control whether the memory is to be mapped as "shared" or "private". This is valid only for hugepages-backed memory and nvdimm modules. + + Each <code>cell</code> element can have an optional + <code>discard</code> attribute which fine tunes the discard + feature for given numa node as described under + <a href="#elementsMemoryBacking">Memory Backing</a>. + Accepted values are <code>yes</code> and <code>no</code>. + <span class='since'>Since 4.4.0</span> </p> <p> @@ -7883,7 +7899,7 @@ qemu-kvm -net nic,model=? /dev/null <pre> ... <devices> - <memory model='dimm' access='private'> + <memory model='dimm' access='private' discard='yes'> <target> <size unit='KiB'>524287</size> <node>0</node> @@ -7937,6 +7953,20 @@ qemu-kvm -net nic,model=? /dev/null </p> </dd> + <dt><code>discard</code></dt> + <dd> + <p> + An optional attribute <code>discard</code> + (<span class="since">since 4.4.0</span>) that provides + capability to fine tune discard of data on per module + basis. Accepted values are <code>yes</code> and + <code>no</code>. The feature is described here: + <a href="#elementsMemoryBacking">Memory Backing</a>. + This attribute is allowed only for + <code>model='dimm'</code>. + </p> + </dd> + <dt><code>source</code></dt> <dd> <p> diff --git a/docs/schemas/cputypes.rng b/docs/schemas/cputypes.rng index c45b6dfb28..1f1e0e36d5 100644 --- a/docs/schemas/cputypes.rng +++ b/docs/schemas/cputypes.rng @@ -129,6 +129,11 @@ </choice> </attribute> </optional> + <optional> + <attribute name="discard"> + <ref name="virYesNo"/> + </attribute> + </optional> <optional> <element name="distances"> <oneOrMore> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 8c446ca418..13af5b74a4 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -633,6 +633,11 @@ </attribute> </element> </optional> + <optional> + <element name="discard"> + <empty/> + </element> + </optional> </interleave> </element> </optional> @@ -5124,6 +5129,11 @@ </choice> </attribute> </optional> + <optional> + <attribute name="discard"> + <ref name="virYesNo"/> + </attribute> + </optional> <interleave> <optional> <ref name="memorydev-source"/> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 6786d81c9b..86229db654 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -5516,6 +5516,20 @@ virDomainVideoDefValidate(const virDomainVideoDef *video) } +static int +virDomainMemoryDefValidate(const virDomainMemoryDef *mem) +{ + if (mem->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM && + mem->discard == VIR_TRISTATE_BOOL_YES) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("discard is not supported for nvdimms")); + return -1; + } + + return 0; +} + + static int virDomainDeviceDefValidateInternal(const virDomainDeviceDef *dev, const virDomainDef *def) @@ -5548,6 +5562,9 @@ virDomainDeviceDefValidateInternal(const virDomainDeviceDef *dev, case VIR_DOMAIN_DEVICE_VIDEO: return virDomainVideoDefValidate(dev->data.video); + case VIR_DOMAIN_DEVICE_MEMORY: + return virDomainMemoryDefValidate(dev->data.memory); + case VIR_DOMAIN_DEVICE_LEASE: case VIR_DOMAIN_DEVICE_FS: case VIR_DOMAIN_DEVICE_INPUT: @@ -5560,7 +5577,6 @@ virDomainDeviceDefValidateInternal(const virDomainDeviceDef *dev, case VIR_DOMAIN_DEVICE_SHMEM: case VIR_DOMAIN_DEVICE_TPM: case VIR_DOMAIN_DEVICE_PANIC: - case VIR_DOMAIN_DEVICE_MEMORY: case VIR_DOMAIN_DEVICE_IOMMU: case VIR_DOMAIN_DEVICE_NONE: case VIR_DOMAIN_DEVICE_LAST: @@ -15673,6 +15689,16 @@ virDomainMemoryDefParseXML(virDomainXMLOptionPtr xmlopt, } VIR_FREE(tmp); + if ((tmp = virXMLPropString(memdevNode, "discard"))) { + if ((val = virTristateBoolTypeFromString(tmp)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid discard value '%s'"), tmp); + goto error; + } + + def->discard = val; + } + /* source */ if ((node = virXPathNode("./source", ctxt)) && virDomainMemorySourceDefParseXML(node, ctxt, def) < 0) @@ -18999,6 +19025,9 @@ virDomainDefParseXML(xmlDocPtr xml, if (virXPathBoolean("boolean(./memoryBacking/locked)", ctxt)) def->mem.locked = true; + if (virXPathBoolean("boolean(./memoryBacking/discard)", ctxt)) + def->mem.discard = VIR_TRISTATE_BOOL_YES; + /* Extract blkio cgroup tunables */ if (virXPathUInt("string(./blkiotune/weight)", ctxt, &def->blkio.weight) < 0) @@ -25259,6 +25288,9 @@ virDomainMemoryDefFormat(virBufferPtr buf, if (def->access) virBufferAsprintf(buf, " access='%s'", virDomainMemoryAccessTypeToString(def->access)); + if (def->discard) + virBufferAsprintf(buf, " discard='%s'", + virTristateBoolTypeToString(def->discard)); virBufferAddLit(buf, ">\n"); virBufferAdjustIndent(buf, 2); @@ -26605,6 +26637,8 @@ virDomainMemtuneFormat(virBufferPtr buf, if (mem->allocation) virBufferAsprintf(&childBuf, "<allocation mode='%s'/>\n", virDomainMemoryAllocationTypeToString(mem->allocation)); + if (mem->discard) + virBufferAddLit(&childBuf, "<discard/>\n"); if (virXMLFormatElement(buf, "memoryBacking", NULL, &childBuf) < 0) goto cleanup; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 15d228ba9e..07d04fb2f9 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2105,6 +2105,7 @@ typedef enum { struct _virDomainMemoryDef { virDomainMemoryAccess access; + virTristateBool discard; /* source */ virBitmapPtr sourceNodes; @@ -2267,6 +2268,8 @@ struct _virDomainMemtune { int source; /* enum virDomainMemorySource */ int access; /* enum virDomainMemoryAccess */ int allocation; /* enum virDomainMemoryAllocation */ + + virTristateBool discard; }; typedef struct _virDomainPowerManagement virDomainPowerManagement; diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index 9307dd93d3..bf42946a99 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -77,6 +77,7 @@ struct _virDomainNuma { virBitmapPtr nodeset; /* host memory nodes where this guest node resides */ virDomainNumatuneMemMode mode; /* memory mode selection */ virDomainMemoryAccess memAccess; /* shared memory access configuration */ + int discard; /* discard-data for memory-backend-file, virTristateBool */ struct _virDomainNumaDistance { unsigned int value; /* locality value for node i->j or j->i */ @@ -947,6 +948,18 @@ virDomainNumaDefCPUParseXML(virDomainNumaPtr def, VIR_FREE(tmp); } + if ((tmp = virXMLPropString(nodes[i], "discard"))) { + if ((rc = virTristateBoolTypeFromString(tmp)) <= 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Invalid 'discard' attribute value '%s'"), + tmp); + goto cleanup; + } + + def->mem_nodes[cur_cell].discard = rc; + VIR_FREE(tmp); + } + /* Parse NUMA distances info */ if (virDomainNumaDefNodeDistanceParseXML(def, ctxt, cur_cell) < 0) goto cleanup; @@ -967,6 +980,7 @@ virDomainNumaDefCPUFormatXML(virBufferPtr buf, virDomainNumaPtr def) { virDomainMemoryAccess memAccess; + int discard; char *cpustr; size_t ncells = virDomainNumaGetNodeCount(def); size_t i; @@ -980,6 +994,7 @@ virDomainNumaDefCPUFormatXML(virBufferPtr buf, int ndistances; memAccess = virDomainNumaGetNodeMemoryAccessMode(def, i); + discard = virDomainNumaGetNodeDiscard(def, i); if (!(cpustr = virBitmapFormat(virDomainNumaGetNodeCpumask(def, i)))) return -1; @@ -994,6 +1009,10 @@ virDomainNumaDefCPUFormatXML(virBufferPtr buf, virBufferAsprintf(buf, " memAccess='%s'", virDomainMemoryAccessTypeToString(memAccess)); + if (discard) + virBufferAsprintf(buf, " discard='%s'", + virTristateBoolTypeToString(discard)); + ndistances = def->mem_nodes[i].ndistances; if (ndistances == 0) { virBufferAddLit(buf, "/>\n"); @@ -1304,6 +1323,14 @@ virDomainNumaGetNodeMemoryAccessMode(virDomainNumaPtr numa, } +virTristateBool +virDomainNumaGetNodeDiscard(virDomainNumaPtr numa, + size_t node) +{ + return numa->mem_nodes[node].discard; +} + + unsigned long long virDomainNumaGetNodeMemorySize(virDomainNumaPtr numa, size_t node) diff --git a/src/conf/numa_conf.h b/src/conf/numa_conf.h index 7947fdb219..85269be565 100644 --- a/src/conf/numa_conf.h +++ b/src/conf/numa_conf.h @@ -102,6 +102,9 @@ virBitmapPtr virDomainNumaGetNodeCpumask(virDomainNumaPtr numa, virDomainMemoryAccess virDomainNumaGetNodeMemoryAccessMode(virDomainNumaPtr numa, size_t node) ATTRIBUTE_NONNULL(1); +virTristateBool virDomainNumaGetNodeDiscard(virDomainNumaPtr numa, + size_t node) + ATTRIBUTE_NONNULL(1); unsigned long long virDomainNumaGetNodeMemorySize(virDomainNumaPtr numa, size_t node) ATTRIBUTE_NONNULL(1); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 5fea1bca41..d28a751ebd 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -750,6 +750,7 @@ virDomainNumaGetMaxCPUID; virDomainNumaGetMemorySize; virDomainNumaGetNodeCount; virDomainNumaGetNodeCpumask; +virDomainNumaGetNodeDiscard; virDomainNumaGetNodeDistance; virDomainNumaGetNodeMemoryAccessMode; virDomainNumaGetNodeMemorySize; diff --git a/tests/qemuxml2argvdata/hugepages-pages.xml b/tests/qemuxml2argvdata/hugepages-pages.xml index f9270782d4..cba83e754c 100644 --- a/tests/qemuxml2argvdata/hugepages-pages.xml +++ b/tests/qemuxml2argvdata/hugepages-pages.xml @@ -8,6 +8,7 @@ <page size='2048' unit='KiB' nodeset='1'/> <page size='1048576' unit='KiB' nodeset='0,2-3'/> </hugepages> + <discard/> </memoryBacking> <vcpu placement='static'>4</vcpu> <numatune> @@ -21,7 +22,7 @@ <cpu> <numa> <cell id='0' cpus='0' memory='1048576' unit='KiB'/> - <cell id='1' cpus='1' memory='1048576' unit='KiB'/> + <cell id='1' cpus='1' memory='1048576' unit='KiB' discard='no'/> <cell id='2' cpus='2' memory='1048576' unit='KiB'/> <cell id='3' cpus='3' memory='1048576' unit='KiB'/> </numa> diff --git a/tests/qemuxml2argvdata/hugepages-pages3.xml b/tests/qemuxml2argvdata/hugepages-pages3.xml index 3d3b3f3cc3..147acc4c95 100644 --- a/tests/qemuxml2argvdata/hugepages-pages3.xml +++ b/tests/qemuxml2argvdata/hugepages-pages3.xml @@ -15,8 +15,8 @@ </os> <cpu> <numa> - <cell id='0' cpus='0' memory='262144' unit='KiB'/> - <cell id='1' cpus='1' memory='786432' unit='KiB'/> + <cell id='0' cpus='0' memory='262144' unit='KiB' discard='no'/> + <cell id='1' cpus='1' memory='786432' unit='KiB' discard='yes'/> </numa> </cpu> <clock offset='utc'/> diff --git a/tests/qemuxml2argvdata/hugepages-pages7.xml b/tests/qemuxml2argvdata/hugepages-pages7.xml index d75cf5afa3..28c72f85a7 100644 --- a/tests/qemuxml2argvdata/hugepages-pages7.xml +++ b/tests/qemuxml2argvdata/hugepages-pages7.xml @@ -43,7 +43,7 @@ <memballoon model='virtio'> <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> </memballoon> - <memory model='dimm'> + <memory model='dimm' discard='no'> <source> <nodemask>1-3</nodemask> <pagesize unit='KiB'>1048576</pagesize> @@ -54,7 +54,7 @@ </target> <address type='dimm' slot='0'/> </memory> - <memory model='dimm' access='private'> + <memory model='dimm' access='private' discard='yes'> <target> <size unit='KiB'>524287</size> <node>0</node> diff --git a/tests/qemuxml2xmloutdata/hugepages-pages.xml b/tests/qemuxml2xmloutdata/hugepages-pages.xml index 498610a217..292454588e 100644 --- a/tests/qemuxml2xmloutdata/hugepages-pages.xml +++ b/tests/qemuxml2xmloutdata/hugepages-pages.xml @@ -8,6 +8,7 @@ <page size='2048' unit='KiB' nodeset='1'/> <page size='1048576' unit='KiB' nodeset='0,2-3'/> </hugepages> + <discard/> </memoryBacking> <vcpu placement='static'>4</vcpu> <numatune> @@ -21,7 +22,7 @@ <cpu> <numa> <cell id='0' cpus='0' memory='1048576' unit='KiB'/> - <cell id='1' cpus='1' memory='1048576' unit='KiB'/> + <cell id='1' cpus='1' memory='1048576' unit='KiB' discard='no'/> <cell id='2' cpus='2' memory='1048576' unit='KiB'/> <cell id='3' cpus='3' memory='1048576' unit='KiB'/> </numa> diff --git a/tests/qemuxml2xmloutdata/hugepages-pages3.xml b/tests/qemuxml2xmloutdata/hugepages-pages3.xml index be21c3eddd..90e6efa5ea 100644 --- a/tests/qemuxml2xmloutdata/hugepages-pages3.xml +++ b/tests/qemuxml2xmloutdata/hugepages-pages3.xml @@ -15,8 +15,8 @@ </os> <cpu> <numa> - <cell id='0' cpus='0' memory='262144' unit='KiB'/> - <cell id='1' cpus='1' memory='786432' unit='KiB'/> + <cell id='0' cpus='0' memory='262144' unit='KiB' discard='no'/> + <cell id='1' cpus='1' memory='786432' unit='KiB' discard='yes'/> </numa> </cpu> <clock offset='utc'/> -- 2.16.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list