We have found certain application scenarios where overriding of the default qemu host cache mode provides a substantial improvement in guest performance. In particular, disabling host caching of the file/dev backing a guest drive. A summary of performance metrics may be found below. Attached is a simple patch which adds general support for a "cache=*" attribute to be passed to qemu from libvirt as part of a xml <driver> element. It is parsed in a domain's xml definition as well as being generated during output of the same. For example: <disk type='file' device='disk'> <source file='/guest_roots/fc8.root'/> <target dev='hda'/> <driver name='qemu' type='qwerty' cache='none'/> </disk> where both the existing "type=*" and added "cache=*" attributes are optional and independent. Note this is only intended to serve as an internal hook allowing override of qemu's default cache behavior. At the present it is not proposed as an end-user documented/visible feature. -john Mark Wagner wrote:
This data is provided is provided by the Red Hat Performance team. It is intended to support the request for adding the required functionality to libvirt to allow for setting a storage parameter to control the caching behavior of QEMU. This value would be passed on the QEMU command line. Our testing of commercial databases in a larger, Enterprise configuration (MSA, 16 cores, etc) has shown that the default QEMU caching behavior is not always the best. This set of data compares the drop off in performance of both writethrough and writeback cache compared to the nocache option. The main metric is transactions per minute (tpm). The other "axis" is the number of simulated users (x1000) % nocache TPM Results K Users WT WB 10 60% 64% 20 66% 71% 40 68% 72% 60 71% 75% 80 74% 79% 100 76% 83% From the above set of data, it is clear that the default behavior of QEMU is the worst performer out of the three cache options for this type of use case. It is also clear that we at minimum, 25% of the possible TPM performance just due to the cache setting.
-- john.cooper@xxxxxxxxxx
domain_conf.c | 32 +++++++++++++++++++++++++------- domain_conf.h | 15 +++++++++++++++ qemu_conf.c | 32 ++++++++++++++++++++++++++++---- 3 files changed, 68 insertions(+), 11 deletions(-) ================================================================= --- a/src/domain_conf.h +++ b/src/domain_conf.h @@ -80,6 +80,20 @@ enum virDomainDiskBus { VIR_DOMAIN_DISK_BUS_LAST }; +/* summary of all possible host open file cache modes + */ +typedef enum { + VIR_DOMAIN_DISK_CACHE_DEFAULT = 0, + VIR_DOMAIN_DISK_CACHE_DISABLE, + VIR_DOMAIN_DISK_CACHE_WRITEBACK, + VIR_DOMAIN_DISK_CACHE_WRITETHRU, + } host_cachemode; + +typedef struct { + char *tag; + host_cachemode idx; + } cache_map_t; + /* Stores the virtual disk configuration */ typedef struct _virDomainDiskDef virDomainDiskDef; typedef virDomainDiskDef *virDomainDiskDefPtr; @@ -91,6 +105,7 @@ struct _virDomainDiskDef { char *dst; char *driverName; char *driverType; + host_cachemode cachemode; unsigned int readonly : 1; unsigned int shared : 1; int slotnum; /* pci slot number for unattach */ ================================================================= --- a/src/qemu_conf.c +++ b/src/qemu_conf.c @@ -616,6 +616,26 @@ qemudNetworkIfaceConnect(virConnectPtr c return NULL; } +/* map from internal cache mode to qemu cache arg text + */ +static cache_map_t qemu_cache_map[] = { + {"none", VIR_DOMAIN_DISK_CACHE_DISABLE}, + {"writeback", VIR_DOMAIN_DISK_CACHE_WRITEBACK}, + {"writethrough", VIR_DOMAIN_DISK_CACHE_WRITETHRU}, + {NULL}}; + +/* return qemu arg text * corresponding to idx if found, NULL otherwise + */ +static inline char *qemu_cache_mode(host_cachemode idx) +{ + int i; + + for (i = 0; qemu_cache_map[i].tag; ++i) + if (qemu_cache_map[i].idx == idx) + return (qemu_cache_map[i].tag); + return (NULL); +} + static int qemudBuildCommandLineChrDevStr(virDomainChrDefPtr dev, char *buf, int buflen) @@ -946,6 +966,8 @@ int qemudBuildCommandLine(virConnectPtr virDomainDiskDefPtr disk = vm->def->disks[i]; int idx = virDiskNameToIndex(disk->dst); const char *bus = virDomainDiskQEMUBusTypeToString(disk->bus); + int nc; + char *cachemode; if (disk->bus == VIR_DOMAIN_DISK_BUS_USB) { if (disk->device == VIR_DOMAIN_DISK_DEVICE_DISK) { @@ -980,15 +1002,17 @@ int qemudBuildCommandLine(virConnectPtr break; } - snprintf(opt, PATH_MAX, "file=%s,if=%s,%sindex=%d%s%s", + nc = snprintf(opt, PATH_MAX, "file=%s,if=%s,%sindex=%d%s", disk->src ? disk->src : "", bus, media ? media : "", idx, bootable && disk->device == VIR_DOMAIN_DISK_DEVICE_DISK - ? ",boot=on" : "", - disk->shared && ! disk->readonly - ? ",cache=off" : ""); + ? ",boot=on" : ""); + cachemode = qemu_cache_mode(disk->cachemode); + if (cachemode || disk->shared && !disk->readonly) + snprintf(opt + nc, PATH_MAX - nc, ",cache=%s", + cachemode ? cachemode : "off"); ADD_ARG_LIT("-drive"); ADD_ARG_LIT(opt); ================================================================= --- a/src/domain_conf.c +++ b/src/domain_conf.c @@ -528,6 +528,16 @@ int virDomainDiskCompare(virDomainDiskDe #ifndef PROXY + +/* map from xml cache tag to internal cache mode + */ +static cache_map_t cache_map[] = { + {"none", VIR_DOMAIN_DISK_CACHE_DISABLE}, + {"off", VIR_DOMAIN_DISK_CACHE_DISABLE}, + {"writeback", VIR_DOMAIN_DISK_CACHE_WRITEBACK}, + {"writethrough", VIR_DOMAIN_DISK_CACHE_WRITETHRU}, + {NULL}}; + /* Parse the XML definition for a disk * @param node XML nodeset to parse for disk definition */ @@ -543,6 +553,8 @@ virDomainDiskDefParseXML(virConnectPtr c char *source = NULL; char *target = NULL; char *bus = NULL; + char *cachetag = NULL; + int i; if (VIR_ALLOC(def) < 0) { virDomainReportError(conn, VIR_ERR_NO_MEMORY, NULL); @@ -592,6 +604,14 @@ virDomainDiskDefParseXML(virConnectPtr c (xmlStrEqual(cur->name, BAD_CAST "driver"))) { driverName = virXMLPropString(cur, "name"); driverType = virXMLPropString(cur, "type"); + if (cachetag = virXMLPropString(cur, "cache")) { + for (i = 0; cache_map[i].tag; ++i) + if (!strcmp(cache_map[i].tag, cachetag)) { + def->cachemode = cache_map[i].idx; + break; + } + VIR_FREE(cachetag); + } } else if (xmlStrEqual(cur->name, BAD_CAST "readonly")) { def->readonly = 1; } else if (xmlStrEqual(cur->name, BAD_CAST "shareable")) { @@ -2620,14 +2640,12 @@ virDomainDiskDefFormat(virConnectPtr con type, device); if (def->driverName) { + virBufferVSprintf(buf, " <driver name='%s'", def->driverName); if (def->driverType) - virBufferVSprintf(buf, - " <driver name='%s' type='%s'/>\n", - def->driverName, def->driverType); - else - virBufferVSprintf(buf, - " <driver name='%s'/>\n", - def->driverName); + virBufferVSprintf(buf, " type='%s'", def->driverType); + if (def->cachemode) + virBufferVSprintf(buf, " cache='%s'", cache_map[def->cachemode].tag); + virBufferVSprintf(buf, "/>\n"); } if (def->src) {
-- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list