I've incorporated feedback received on the prior version into the patch below. The host mount point for hugetlbfs is queried by default from /proc/mounts unless overridden in qemu.conf via: hugepage_mount = "<path_to_mount_point>" This should make the concern of establishing a mount point path convention a non-issue for the general case while still allowing the same to be deterministically set if needed. Complete disable of hugepage backing can be accomplished by setting the mount point to "". A guest now requests hugepage backing via specifying: <hugepage enable='on'/> in its domain definition xml file. A 'hugepage' element along with an enable attribute is added to the definition, but the element content is open for future use. As before if a guest requests hugepage backing and either the host mount point can't be determined, or the target qemu doesn't support -mem-path, an error is flagged. Signed-off-by: john cooper <john.cooper@xxxxxxxxxx> --- diff --git a/src/domain_conf.c b/src/domain_conf.c index f3e4c6c..ac328ea 100644 --- a/src/domain_conf.c +++ b/src/domain_conf.c @@ -2369,6 +2369,18 @@ static virDomainDefPtr virDomainDefParseXML(virConnectPtr conn, if (virXPathULong(conn, "string(./currentMemory[1])", ctxt, &def->memory) < 0) def->memory = def->maxmem; + tmp = virXPathString(conn, "string(./hugepage/@enable)", ctxt); + if (!tmp || STREQ(tmp, "off")) + def->hugepage_backed = 0; + else if (STREQ(tmp, "on")) + def->hugepage_backed = 1; + else { + virDomainReportError(conn, VIR_ERR_INTERNAL_ERROR, + _("invalid hugepage mode %s"), tmp); + goto error; + } + VIR_FREE(tmp); + if (virXPathULong(conn, "string(./vcpu[1])", ctxt, &def->vcpus) < 0) def->vcpus = 1; @@ -3933,6 +3945,8 @@ char *virDomainDefFormat(virConnectPtr conn, virBufferVSprintf(&buf, " <memory>%lu</memory>\n", def->maxmem); virBufferVSprintf(&buf, " <currentMemory>%lu</currentMemory>\n", def->memory); + virBufferVSprintf(&buf, " <hugepage enable='%s'/>\n", + def->hugepage_backed ? "on" : "off"); for (n = 0 ; n < def->cpumasklen ; n++) if (def->cpumask[n] != 1) diff --git a/src/domain_conf.h b/src/domain_conf.h index 6e111fa..d6bdcdb 100644 --- a/src/domain_conf.h +++ b/src/domain_conf.h @@ -481,6 +481,7 @@ struct _virDomainDef { unsigned long memory; unsigned long maxmem; + unsigned char hugepage_backed; unsigned long vcpus; int cpumasklen; char *cpumask; diff --git a/src/qemu.conf b/src/qemu.conf index 3009725..9ecac23 100644 --- a/src/qemu.conf +++ b/src/qemu.conf @@ -95,3 +95,13 @@ # The group ID for QEMU processes run by the system instance #group = "root" + +# If provided by the host and a hugetlbfs mount point is configured, +# a guest may request huge page backing. When this mount point is +# unspecified here, determination of a host mount point in /proc/mounts +# will be attempted. Specifying an explicit mount overrides detection +# of the same in /proc/mounts. Setting the mount point to "" will +# disable guest hugepage backing. + +# hugepage_mount = "<path_to_mount_point>" + diff --git a/src/qemu_conf.c b/src/qemu_conf.c index 4043d70..5a3d70c 100644 --- a/src/qemu_conf.c +++ b/src/qemu_conf.c @@ -35,6 +35,7 @@ #include <sys/wait.h> #include <arpa/inet.h> #include <sys/utsname.h> +#include <mntent.h> #include "c-ctype.h" #include "virterror_internal.h" @@ -50,6 +51,8 @@ #include "nodeinfo.h" #include "logging.h" +unsigned char *find_mnt(unsigned char *type); + #define VIR_FROM_THIS VIR_FROM_QEMU VIR_ENUM_DECL(virDomainDiskQEMUBus) @@ -87,6 +90,7 @@ VIR_ENUM_IMPL(qemuVideo, VIR_DOMAIN_VIDEO_TYPE_LAST, NULL, /* no arg needed for xen */ NULL /* don't support vbox */); +#define PROC_MOUNT_BUF_LEN 255 int qemudLoadDriverConfig(struct qemud_driver *driver, const char *filename) { @@ -218,10 +222,41 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, } VIR_FREE(group); + p = virConfGetValue (conf, "hugepage_mount"); + CHECK_TYPE ("hugepage_mount", VIR_CONF_STRING); + if (!p) + driver->hugepage_mount = find_mnt("hugetlbfs"); + else if (!p->str) + driver->hugepage_mount = NULL; + else { + VIR_FREE(driver->hugepage_mount); + if (!(driver->hugepage_mount = strdup(p->str))) { + virReportOOMError(NULL); + virConfFree(conf); + return -1; + } + } + virConfFree (conf); return 0; } +/* search /proc/mounts for mount point of *type; return pointer to + * malloc'ed string of the path if found, otherwise return NULL + */ +unsigned char *find_mnt(unsigned char *type) +{ + FILE *f = setmntent("/proc/mounts", "r"); + unsigned char buf[PROC_MOUNT_BUF_LEN]; + struct mntent mb; + + if (f) + while (getmntent_r(f, &mb, buf, PROC_MOUNT_BUF_LEN)) + if (STREQ(mb.mnt_type, type)) + return (strdup(mb.mnt_dir)); + return (NULL); +} + /* The list of possible machine types for various architectures, as supported by QEMU - taken from 'qemu -M ?' for each arch */ static const char *const arch_info_hvm_x86_machines[] = { @@ -500,6 +535,8 @@ static unsigned int qemudComputeCmdFlags(const char *help, flags |= QEMUD_CMD_FLAG_VGA; if (strstr(help, "boot=on")) flags |= QEMUD_CMD_FLAG_DRIVE_BOOT; + if (strstr(help, "-mem-path")) + flags |= QEMUD_CMD_FLAG_MEM_PATH; if (version >= 9000) flags |= QEMUD_CMD_FLAG_VNC_COLON; @@ -1125,6 +1162,15 @@ int qemudBuildCommandLine(virConnectPtr conn, ADD_ARG_LIT("-no-kvm"); ADD_ARG_LIT("-m"); ADD_ARG_LIT(memory); + if (def->hugepage_backed) { + if (!driver->hugepage_mount || !(qemuCmdFlags & QEMUD_CMD_FLAG_MEM_PATH)) { + qemudReportError(conn, NULL, NULL, VIR_ERR_NO_SUPPORT, + "%s", _("hugepage backing not supported")); + goto error; + } + ADD_ARG_LIT("-mem-path"); + ADD_ARG_LIT(driver->hugepage_mount); + } ADD_ARG_LIT("-smp"); ADD_ARG_LIT(vcpus); diff --git a/src/qemu_conf.h b/src/qemu_conf.h index fbf2ab9..847597f 100644 --- a/src/qemu_conf.h +++ b/src/qemu_conf.h @@ -58,6 +58,7 @@ enum qemud_cmd_flags { QEMUD_CMD_FLAG_KVM = (1 << 13), /* Whether KVM is compiled in */ QEMUD_CMD_FLAG_DRIVE_FORMAT = (1 << 14), /* Is -drive format= avail */ QEMUD_CMD_FLAG_VGA = (1 << 15), /* Is -vga avail */ + QEMUD_CMD_FLAG_MEM_PATH = (1 << 16), /* mmap'ped guest backing supported */ }; /* Main driver state */ @@ -86,6 +87,7 @@ struct qemud_driver { char *vncListen; char *vncPassword; char *vncSASLdir; + char *hugepage_mount; virCapsPtr caps; diff --git a/src/qemu_driver.c b/src/qemu_driver.c index 00dc6e5..bdecf5a 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -638,6 +638,7 @@ qemudShutdown(void) { VIR_FREE(qemu_driver->vncListen); VIR_FREE(qemu_driver->vncPassword); VIR_FREE(qemu_driver->vncSASLdir); + VIR_FREE(qemu_driver->hugepage_mount); /* Free domain callback list */ virDomainEventCallbackListFree(qemu_driver->domainEventCallbacks); -- john.cooper@xxxxxxxxxx -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list