Add an element named "strict-hugepages" to control whether to refuse guest initialization in case hugepage allocation cannot be performed. Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index ff50214..e79f5e6 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -632,6 +632,9 @@ <dt><code>hugepages</code></dt> <dd>This tells the hypervisor that the guest should have its memory allocated using hugepages instead of the normal native page size.</dd> + <dt><code>strict-hugepages</code></dt> + <dd>This tells the hypervisor that the guest should refuse to start + in case of failure to allocate guest memory with hugepages</dd> <dt><code>nosharepages</code></dt> <dd>Instructs hypervisor to disable shared pages (memory merge, KSM) for this domain. <span class="since">Since 1.0.6</span></dd> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 28e24f9..f16ef0b 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -11226,6 +11226,9 @@ virDomainDefParseXML(xmlDocPtr xml, if (virXPathBoolean("boolean(./memoryBacking/locked)", ctxt)) def->mem.locked = true; + if ((node = virXPathNode("./memoryBacking/stricthugepages", ctxt))) + def->mem.strict_hugepages = true; + /* Extract blkio cgroup tunables */ if (virXPathUInt("string(./blkiotune/weight)", ctxt, &def->blkio.weight) < 0) diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index d8f2e49..8ea5cf0 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1977,6 +1977,7 @@ struct _virDomainDef { unsigned long long max_balloon; /* in kibibytes */ unsigned long long cur_balloon; /* in kibibytes */ bool hugepage_backed; + bool strict_hugepages; bool nosharepages; bool locked; int dump_core; /* enum virDomainMemDump */ diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 96b8825..3f8d0a4 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -12133,10 +12133,9 @@ cleanup: return def; } - -static int qemuParseProcFileStrings(int pid_value, - const char *name, - char ***list) +int qemuParseProcFileStrings(int pid_value, + const char *name, + char ***list) { char *path = NULL; int ret = -1; diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index de7683d..bcdfefa 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -226,7 +226,9 @@ virDomainDefPtr qemuParseCommandLinePid(virCapsPtr qemuCaps, char **pidfile, virDomainChrSourceDefPtr *monConfig, bool *monJSON); - +int qemuParseProcFileStrings(int pid_value, + const char *name, + char ***list); int qemuDomainAssignAddresses(virDomainDefPtr def, virQEMUCapsPtr qemuCaps, virDomainObjPtr obj) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 8bcd98e..cb8298e 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -25,6 +25,7 @@ #include <unistd.h> #include <signal.h> #include <sys/stat.h> +#include <stdlib.h> #if defined(__linux__) # include <linux/capability.h> #elif defined(__FreeBSD__) @@ -3507,6 +3508,95 @@ error: } +/* + * Returns bool: whether to fail guest initialization. + * + */ +static bool qemuValidateStrictHugepage(virDomainObjPtr vm, virQEMUDriverConfigPtr cfg) +{ + bool ret = false; + char **maps = NULL; + int i; + char *buf; + + if (!vm->def->mem.strict_hugepages) + return ret; + + ret = true; + + if (!vm->def->mem.hugepage_backed || !cfg->hugepagePath) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("strict huge pages depends on huge pages")); + return ret; + } + + buf = malloc(strlen(cfg->hugepagePath) + 50); + + /* The parser requires /proc/pid, which only exists on platforms + * like Linux where pid_t fits in int. */ + if ((int) vm->pid != vm->pid || + qemuParseProcFileStrings(vm->pid, "maps", &maps) < 0) + goto cleanup; + + for (i = 0; maps && maps[i]; i++) { + char *endptr; + unsigned long start, end; + const char *map = maps[i]; + bool found = false; + + sprintf(buf, "%s/qemu_back_mem.pc.ram.", cfg->hugepagePath); + if (strstr(map,buf) != NULL) + found = true; + + sprintf(buf, "%s/kvm.", cfg->hugepagePath); + if (strstr(map,buf) != NULL) + found = true; + + if (!found) + continue; + + errno = 0; + start = strtol(map, &endptr, 16); + if ((errno == ERANGE && (start == LONG_MAX || start == LONG_MIN)) + || (errno != 0 && start == 0)) { + continue; + } + + if (endptr && *endptr == '-') + endptr++; + + if (!*endptr) + continue; + + errno = 0; + end = strtol(endptr, NULL, 16); + if ((errno == ERANGE && (end == LONG_MAX || end == LONG_MIN)) + || (errno != 0 && end == 0)) { + continue; + } + + if (end-start >= vm->def->mem.max_balloon * 1024) { + ret = false; + break; + } + } + + if (ret) { + /* FIXME: is VIR_ERR_NO_MEMORY to be used exclusively + * to reference libvirt allocation failures? + */ + virReportError(VIR_ERR_NO_MEMORY, "%s", + _("guest memory not hugetlbfs backed")); + } + +cleanup: + for (i = 0; maps && maps[i]; i++) + VIR_FREE(maps[i]); + free(buf); + return ret; +} + + static bool qemuValidateCpuMax(virDomainDefPtr def, virQEMUCapsPtr qemuCaps) { @@ -4071,6 +4161,13 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; } + /* enforce strict hugepage */ + if (qemuValidateStrictHugepage(vm, cfg)) { + VIR_WARN("Failure to allocate hugepage backing for %s, exiting", + vm->def->name); + goto cleanup; + } + /* set default link states */ /* qemu doesn't support setting this on the command line, so * enter the monitor */ -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list