<memoryBacking> <hugepages> <page size="1" unit="G" nodeset="0-3,5"/> <page size="2" unit="M" nodeset="4"/> </hugepages> </memoryBacking> Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> --- docs/formatdomain.html.in | 18 +- docs/schemas/domaincommon.rng | 19 +- src/conf/domain_conf.c | 197 +++++++++++++++++++-- src/conf/domain_conf.h | 13 +- src/parallels/parallels_driver.c | 2 +- src/qemu/qemu_command.c | 2 +- src/qemu/qemu_conf.c | 20 ++- src/qemu/qemu_process.c | 2 +- .../qemuxml2argv-hugepages-pages.xml | 45 +++++ tests/qemuxml2xmltest.c | 1 + 10 files changed, 288 insertions(+), 31 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 3c85fc5..f4362e6 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -617,7 +617,9 @@ <domain> ... <memoryBacking> - <hugepages/> + <hugepages> + <page size="1" unit="G" nodeset="0-3,5"/> + <page size="2" unit="M" nodeset="4"/> <nosharepages/> <locked/> </memoryBacking> @@ -632,7 +634,19 @@ <dl> <dt><code>hugepages</code></dt> <dd>This tells the hypervisor that the guest should have its memory - allocated using hugepages instead of the normal native page size.</dd> + allocated using hugepages instead of the normal native page size. + <span class='since'>Since 1.2.5</span> it's possible to set hugepages + more specifically per numa node. The <code>page</code> element is + introduced. It has one compulsory attribute <code>size</code> which + specifies which hugepages should be used (especially useful on systems + supporting hugepages of different sizes). The default unit for the + <code>size</code> attribute is kilobytes (multiplier of 1024). If you + want to use different unit, use optional <code>unit</code> attribute. + For systems with NUMA, the optional <code>nodeset</code> attribute may + come handy as it ties given guest's NUMA nodes to certain hugepage + sizes. From the example snippet, one gigabyte hugepages are used for + every NUMA node except node number four. For the correct syntax see + <a href="#elementsNUMATuning">this</a>.</dd> <dt><code>nosharepages</code></dt> <dd>Instructs hypervisor to disable shared pages (memory merge, KSM) for this domain. <span class="since">Since 1.0.6</span></dd> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 2caeef9..d9da0bc 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -567,7 +567,24 @@ <interleave> <optional> <element name="hugepages"> - <empty/> + <zeroOrMore> + <element name="page"> + <attribute name="size"> + <ref name="unsignedLong"/> + </attribute> + <optional> + <attribute name='unit'> + <ref name='unit'/> + </attribute> + </optional> + <optional> + <attribute name="nodeset"> + <ref name='cpuset'/> + </attribute> + </optional> + <empty/> + </element> + </zeroOrMore> </element> </optional> <optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a1ef374..b49bcb0 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -11258,6 +11258,57 @@ virDomainParseMemory(const char *xpath, xmlXPathContextPtr ctxt, } +static int +virDomainHugepagesParseXML(xmlNodePtr node, + xmlXPathContextPtr ctxt, + virDomainHugePagePtr hugepage) +{ + int ret = -1; + xmlNodePtr oldnode = ctxt->node; + unsigned long long bytes, max; + char *unit = NULL, *nodeset = NULL; + + ctxt->node = node; + + /* On 32-bit machines, our bound is 0xffffffff * KiB. On 64-bit + * machines, our bound is off_t (2^63). */ + if (sizeof(unsigned long) < sizeof(long long)) + max = 1024ull * ULONG_MAX; + else + max = LLONG_MAX; + + if (virXPathULongLong("string(./@size)", ctxt, &bytes) < 0) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("unable to parse size attribute")); + goto cleanup; + } + + unit = virXPathString("string(./@unit)", ctxt); + + if (virScaleInteger(&bytes, unit, 1024, max) < 0) + goto cleanup; + + if (!(hugepage->size = VIR_DIV_UP(bytes, 1024))) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("hugepage size can't be zero")); + goto cleanup; + } + + if ((nodeset = virXMLPropString(node, "nodeset"))) { + if (virBitmapParse(nodeset, 0, &hugepage->nodemask, + VIR_DOMAIN_CPUMASK_LEN) < 0) + goto cleanup; + } + + ret = 0; + cleanup: + VIR_FREE(unit); + VIR_FREE(nodeset); + ctxt->node = oldnode; + return ret; +} + + static virDomainResourceDefPtr virDomainResourceDefParse(xmlNodePtr node, xmlXPathContextPtr ctxt) @@ -11325,7 +11376,7 @@ virDomainDefParseXML(xmlDocPtr xml, { xmlNodePtr *nodes = NULL, node = NULL; char *tmp = NULL; - size_t i; + size_t i, j; int n; long id = -1; virDomainDefPtr def; @@ -11475,8 +11526,55 @@ virDomainDefParseXML(xmlDocPtr xml, def->mem.cur_balloon = def->mem.max_balloon; } - if ((node = virXPathNode("./memoryBacking/hugepages", ctxt))) - def->mem.hugepage_backed = true; + + if ((n = virXPathNodeSet("./memoryBacking/hugepages/page", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract hugepages nodes")); + goto error; + } + + if (n) { + if (VIR_ALLOC_N(def->mem.hugepages, n) < 0) + goto error; + + for (i = 0; i < n; i++) { + if (virDomainHugepagesParseXML(nodes[i], ctxt, + &def->mem.hugepages[i]) < 0) + goto error; + def->mem.nhugepages++; + + for (j = 0; j < i; j++) { + if (def->mem.hugepages[i].nodemask && + def->mem.hugepages[j].nodemask && + virBitmapDoesIntersect(def->mem.hugepages[i].nodemask, + def->mem.hugepages[j].nodemask)) { + virReportError(VIR_ERR_XML_DETAIL, + _("nodeset attribute of hugepages " + "of sizes %llu and %llu intersect"), + def->mem.hugepages[i].size, + def->mem.hugepages[j].size); + goto error; + } else if (!def->mem.hugepages[i].nodemask && + !def->mem.hugepages[j].nodemask) { + virReportError(VIR_ERR_XML_DETAIL, + _("two master hugepages detected: " + "%llu and %llu"), + def->mem.hugepages[i].size, + def->mem.hugepages[j].size); + goto error; + } + } + } + + VIR_FREE(nodes); + } else { + if ((node = virXPathNode("./memoryBacking/hugepages", ctxt))) { + if (VIR_ALLOC(def->mem.hugepages) < 0) + goto error; + + def->mem.nhugepages = 1; + } + } if ((node = virXPathNode("./memoryBacking/nosharepages", ctxt))) def->mem.nosharepages = true; @@ -11498,7 +11596,6 @@ virDomainDefParseXML(xmlDocPtr xml, goto error; for (i = 0; i < n; i++) { - size_t j; if (virDomainBlkioDeviceParseXML(nodes[i], &def->blkio.devices[i]) < 0) goto error; @@ -12383,7 +12480,6 @@ virDomainDefParseXML(xmlDocPtr xml, if (chr->target.port == -1) { int maxport = -1; - size_t j; for (j = 0; j < i; j++) { if (def->parallels[j]->target.port > maxport) maxport = def->parallels[j]->target.port; @@ -12411,7 +12507,6 @@ virDomainDefParseXML(xmlDocPtr xml, if (chr->target.port == -1) { int maxport = -1; - size_t j; for (j = 0; j < i; j++) { if (def->serials[j]->target.port > maxport) maxport = def->serials[j]->target.port; @@ -12469,7 +12564,6 @@ virDomainDefParseXML(xmlDocPtr xml, if (chr->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_VIRTIO_SERIAL && chr->info.addr.vioserial.port == 0) { int maxport = 0; - size_t j; for (j = 0; j < i; j++) { virDomainChrDefPtr thischr = def->channels[j]; if (thischr->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_VIRTIO_SERIAL && @@ -12586,7 +12680,7 @@ virDomainDefParseXML(xmlDocPtr xml, if (n && VIR_ALLOC_N(def->videos, n) < 0) goto error; for (i = 0; i < n; i++) { - size_t j = def->nvideos; + j = def->nvideos; virDomainVideoDefPtr video = virDomainVideoDefParseXML(nodes[j], def, flags); @@ -14024,13 +14118,38 @@ virDomainDefCheckABIStability(virDomainDefPtr src, dst->mem.cur_balloon, src->mem.cur_balloon); goto error; } - if (src->mem.hugepage_backed != dst->mem.hugepage_backed) { + if (src->mem.nhugepages != dst->mem.nhugepages) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Target domain huge page backing %d does not match source %d"), - dst->mem.hugepage_backed, - src->mem.hugepage_backed); + _("Target domain huge pages count %zu does not match source %zu"), + dst->mem.nhugepages, src->mem.nhugepages); goto error; } + for (i = 0; i < src->mem.nhugepages; i++) { + virDomainHugePagePtr src_huge = &src->mem.hugepages[i]; + virDomainHugePagePtr dst_huge = &dst->mem.hugepages[i]; + + if (src_huge->size != dst_huge->size) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Target domain huge page size %llu " + "does not match source %llu"), + dst_huge->size, src_huge->size); + goto error; + } + + if (src_huge->nodemask && dst_huge->nodemask) { + if (!virBitmapEqual(src_huge->nodemask, dst_huge->nodemask)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Target huge page nodemask does not match source")); + goto error; + } + } else { + if (src_huge->nodemask || dst_huge->nodemask) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Target huge page nodemask does not match source")); + goto error; + } + } + } if (src->vcpus != dst->vcpus) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, @@ -17136,6 +17255,54 @@ virDomainResourceDefFormat(virBufferPtr buf, } +static int +virDomainHugepagesFormatBuf(virBufferPtr buf, + virDomainHugePagePtr hugepage) +{ + int ret = -1; + + virBufferAsprintf(buf, "<page size='%llu' unit='KiB'", + hugepage->size); + + if (hugepage->nodemask) { + char *nodeset = NULL; + if (!(nodeset = virBitmapFormat(hugepage->nodemask))) + goto cleanup; + virBufferAsprintf(buf, " nodeset='%s'", nodeset); + VIR_FREE(nodeset); + } + + virBufferAddLit(buf, "/>\n"); + + ret = 0; + cleanup: + return ret; +} + +static void +virDomainHugepagesFormat(virBufferPtr buf, + virDomainHugePagePtr hugepages, + size_t nhugepages) +{ + size_t i; + + if (nhugepages == 1 && + hugepages[0].size == 0) { + virBufferAddLit(buf, "<hugepages/>\n"); + return; + } + + virBufferAddLit(buf, "<hugepages>\n"); + virBufferAdjustIndent(buf, 2); + + for (i = 0; i < nhugepages; i++) + virDomainHugepagesFormatBuf(buf, &hugepages[i]); + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</hugepages>\n"); +} + + #define DUMPXML_FLAGS \ (VIR_DOMAIN_XML_SECURE | \ VIR_DOMAIN_XML_INACTIVE | \ @@ -17319,11 +17486,11 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAddLit(buf, "</memtune>\n"); } - if (def->mem.hugepage_backed || def->mem.nosharepages || def->mem.locked) { + if (def->mem.nhugepages || def->mem.nosharepages || def->mem.locked) { virBufferAddLit(buf, "<memoryBacking>\n"); virBufferAdjustIndent(buf, 2); - if (def->mem.hugepage_backed) - virBufferAddLit(buf, "<hugepages/>\n"); + if (def->mem.nhugepages) + virDomainHugepagesFormat(buf, def->mem.hugepages, def->mem.nhugepages); if (def->mem.nosharepages) virBufferAddLit(buf, "<nosharepages/>\n"); if (def->mem.locked) diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 4c9b7e8..61f057c 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1848,6 +1848,14 @@ struct _virDomainResourceDef { char *partition; }; +typedef struct _virDomaiHugePage virDomainHugePage; +typedef virDomainHugePage *virDomainHugePagePtr; + +struct _virDomaiHugePage { + virBitmapPtr nodemask; /* guest's NUMA node mask */ + unsigned long long size; /* hugepage size in KiB */ +}; + /* * Guest VM main configuration * @@ -1874,7 +1882,10 @@ struct _virDomainDef { struct { unsigned long long max_balloon; /* in kibibytes */ unsigned long long cur_balloon; /* in kibibytes */ - bool hugepage_backed; + + virDomainHugePagePtr hugepages; + size_t nhugepages; + bool nosharepages; bool locked; int dump_core; /* enum virDomainMemDump */ diff --git a/src/parallels/parallels_driver.c b/src/parallels/parallels_driver.c index a503dea..bb9538f 100644 --- a/src/parallels/parallels_driver.c +++ b/src/parallels/parallels_driver.c @@ -2023,7 +2023,7 @@ parallelsApplyChanges(virDomainObjPtr dom, virDomainDefPtr new) return -1; } - if (old->mem.hugepage_backed != new->mem.hugepage_backed || + if (old->mem.nhugepages != new->mem.nhugepages || old->mem.hard_limit != new->mem.hard_limit || old->mem.soft_limit != new->mem.soft_limit || old->mem.min_guarantee != new->mem.min_guarantee || diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index b14ce83..0b8cef5 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -7332,7 +7332,7 @@ qemuBuildCommandLine(virConnectPtr conn, virCommandAddArg(cmd, "-m"); def->mem.max_balloon = VIR_DIV_UP(def->mem.max_balloon, 1024) * 1024; virCommandAddArgFormat(cmd, "%llu", def->mem.max_balloon / 1024); - if (def->mem.hugepage_backed) { + if (def->mem.nhugepages) { char *mem_path; if (!cfg->nhugetlbfs) { diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index cf5ce97..03593d6 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -611,15 +611,17 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg, } else { CHECK_TYPE("hugetlbfs_mount", VIR_CONF_STRING); if (p && p->str) { - if (VIR_REALLOC_N(cfg->hugetlbfs, 1) < 0) - goto cleanup; - cfg->nhugetlbfs = 1; - if (virQEMUDriverConfigHugeTLBFSInit(&cfg->hugetlbfs[0], - p->str, true) < 0) - goto cleanup; - } else { - VIR_FREE(cfg->hugetlbfs); - cfg->nhugetlbfs = 0; + if (STREQ(p->str, "")) { + VIR_FREE(cfg->hugetlbfs); + cfg->nhugetlbfs = 0; + } else { + if (VIR_REALLOC_N(cfg->hugetlbfs, 1) < 0) + goto cleanup; + cfg->nhugetlbfs = 1; + if (virQEMUDriverConfigHugeTLBFSInit(&cfg->hugetlbfs[0], + p->str, true) < 0) + goto cleanup; + } } } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 16d03d8..d898aad 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3791,7 +3791,7 @@ int qemuProcessStart(virConnectPtr conn, } virDomainAuditSecurityLabel(vm, true); - if (vm->def->mem.hugepage_backed) { + if (vm->def->mem.nhugepages) { for (i = 0; i < cfg->nhugetlbfs; i++) { char *hugepagePath = qemuGetHugepagePath(&cfg->hugetlbfs[i]); diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml new file mode 100644 index 0000000..5ad0695 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml @@ -0,0 +1,45 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>4194304</memory> + <currentMemory unit='KiB'>4194304</currentMemory> + <memoryBacking> + <hugepages> + <page size='2048' unit='KiB' nodeset='1'/> + <page size='1048576' unit='KiB' nodeset='0,2-3'/> + </hugepages> + </memoryBacking> + <vcpu placement='static'>4</vcpu> + <numatune> + <memory mode='strict' nodeset='0-3'/> + <memnode cellid='3' mode='strict' nodeset='3'/> + </numatune> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='1048576'/> + <cell id='1' cpus='1' memory='1048576'/> + <cell id='2' cpus='2' memory='1048576'/> + <cell id='3' cpus='3' memory='1048576'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'/> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index cefe05b..09cb228 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -197,6 +197,7 @@ mymain(void) DO_TEST("hyperv-off"); DO_TEST("hugepages"); + DO_TEST("hugepages-pages"); DO_TEST("nosharepages"); DO_TEST("disk-aio"); DO_TEST("disk-cdrom"); -- 1.8.5.5 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list