On Wed, Nov 30, 2016 at 5:41 PM, Peter Krempa <pkrempa@xxxxxxxxxx> wrote: > On Wed, Nov 30, 2016 at 16:06:37 +0530, prasanna.kalever@xxxxxxxxxx wrote: >> From: Prasanna Kumar Kalever <prasanna.kalever@xxxxxxxxxx> >> >> This patch optimizes calls to glfs_init() and friends >> >> Currently, a start of a VM will call 2 glfs_new/glfs_init (which will create >> glfs object, once for stat, read headers and next to chown) and then will fork >> qemu process which will call once again (for actual read write IO). >> >> Not that all, in case if we are have 4 extra attached disks, then the total >> calls to glfs_init() and friends will be (4+1)*2 in libvirt and (4+1)*1 in >> qemu space i.e 15 calls. Since we don't have control over qemu process as that >> executes in a different process environment, lets do not bother much about it. >> >> This patch shrinks these 10 calls (i.e objects from above example) to just >> one, by maintaining a cache of glfs objects. >> >> Additionally snapshot(external) scenario will further complex the situation ... >> >> The glfs object is shared across other only if volume name and all the >> volfile servers match (includes hostname, transport and port number). >> In case of hit glfs object takes a ref and on close unref happens. >> >> Thanks to 'Peter Krempa' for all the inputs. >> >> Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@xxxxxxxxxx> >> --- >> v2: Address review comments from Peter on v1 >> * Rebased on latest master >> * Changes to commit msg >> * Introduce storage API's for Register and Unregister of volume >> * During qemu process Start/Stop and snapshot create >> * Check Transport and Port type >> * Atomic element add/del to list and ref counting >> Pending: Treating IP and FQDN belong to same host > > In addition to dan's review: > > You should split the parts that add the caching and the parts that > optimize the calls to virStorageVolInit and friends. Yeah! agree. > >> >> v1: Initial patch >> --- >> src/qemu/qemu_domain.c | 2 +- >> src/qemu/qemu_domain.h | 5 + >> src/qemu/qemu_driver.c | 29 ++++ >> src/qemu/qemu_process.c | 25 +++ >> src/storage/storage_backend_fs.c | 2 + >> src/storage/storage_backend_gluster.c | 295 +++++++++++++++++++++++++++++++--- >> src/storage/storage_driver.c | 23 ++- >> src/storage/storage_driver.h | 3 + >> 8 files changed, 357 insertions(+), 27 deletions(-) >> > >> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h >> index 7650ff3..a9e38bd 100644 >> --- a/src/qemu/qemu_domain.h >> +++ b/src/qemu/qemu_domain.h >> @@ -591,6 +591,11 @@ bool qemuDomainDiskSourceDiffers(virDomainDiskDefPtr disk, >> bool qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk, >> virDomainDiskDefPtr orig_disk); >> >> +void qemuDomainGetImageIds(virQEMUDriverConfigPtr cfg, >> + virDomainObjPtr vm, >> + virStorageSourcePtr src, >> + uid_t *uid, gid_t *gid); >> + >> int qemuDomainStorageFileInit(virQEMUDriverPtr driver, >> virDomainObjPtr vm, >> virStorageSourcePtr src); >> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c >> index 3517aa2..7cae094 100644 >> --- a/src/qemu/qemu_driver.c >> +++ b/src/qemu/qemu_driver.c >> @@ -14211,6 +14211,7 @@ qemuDomainSnapshotCreateSingleDiskActive(virQEMUDriverPtr driver, >> cleanup: >> if (need_unlink && virStorageFileUnlink(newDiskSrc)) >> VIR_WARN("unable to unlink just-created %s", source); >> + virStorageFileDeinit(disk->src); >> virStorageFileDeinit(newDiskSrc); >> virStorageSourceFree(newDiskSrc); >> virStorageSourceFree(persistDiskSrc); >> @@ -14566,6 +14567,7 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain, >> virDomainSnapshotObjPtr snap = NULL; >> virDomainSnapshotPtr snapshot = NULL; >> virDomainSnapshotDefPtr def = NULL; >> + virDomainSnapshotDefPtr refDef = NULL; >> bool update_current = true; >> bool redefine = flags & VIR_DOMAIN_SNAPSHOT_CREATE_REDEFINE; >> unsigned int parse_flags = VIR_DOMAIN_SNAPSHOT_PARSE_DISKS; >> @@ -14574,6 +14576,9 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain, >> bool align_match = true; >> virQEMUDriverConfigPtr cfg = NULL; >> virCapsPtr caps = NULL; >> + unsigned int dIndex; >> + uid_t uid; >> + gid_t gid; >> >> virCheckFlags(VIR_DOMAIN_SNAPSHOT_CREATE_REDEFINE | >> VIR_DOMAIN_SNAPSHOT_CREATE_CURRENT | >> @@ -14690,6 +14695,19 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain, >> >> qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE); >> >> + for (dIndex = 0; dIndex < def->ndisks; dIndex++) { > > Please use 'i' for iterator. sure > >> + virDomainSnapshotDiskDef disk = def->disks[dIndex]; >> + >> + if (virStorageSourceIsEmpty(disk.src)) >> + continue; >> + >> + qemuDomainGetImageIds(cfg, vm, disk.src, &uid, &gid); >> + >> + if (virStorageVolumeRegister(disk.src, uid, gid) <0) >> + goto cleanup; >> + } >> + >> + >> if (redefine) { >> if (virDomainSnapshotRedefinePrep(domain, vm, &def, &snap, >> &update_current, flags) < 0) >> @@ -14800,6 +14818,17 @@ qemuDomainSnapshotCreateXML(virDomainPtr domain, >> snapshot = virGetDomainSnapshot(domain, snap->def->name); >> >> endjob: >> + refDef = (!snap) ? def : snap->def; >> + >> + for (dIndex = 0; dIndex < refDef->ndisks; dIndex++) { >> + virDomainSnapshotDiskDef disk = refDef->disks[dIndex]; >> + >> + if (virStorageSourceIsEmpty(disk.src)) >> + continue; >> + >> + virStorageVolumeUnRegister(disk.src); >> + } >> + >> if (snapshot && !(flags & VIR_DOMAIN_SNAPSHOT_CREATE_NO_METADATA)) { >> if (qemuDomainSnapshotWriteMetadata(vm, snap, driver->caps, >> cfg->snapshotDir) < 0) { >> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c >> index ecd7ded..20793cf 100644 >> --- a/src/qemu/qemu_process.c >> +++ b/src/qemu/qemu_process.c >> @@ -4612,6 +4612,9 @@ qemuProcessInit(virQEMUDriverPtr driver, >> qemuDomainObjPrivatePtr priv = vm->privateData; >> int stopFlags; >> int ret = -1; >> + unsigned int dIndex; >> + uid_t uid; >> + gid_t gid; >> >> VIR_DEBUG("vm=%p name=%s id=%d migration=%d", >> vm, vm->def->name, vm->def->id, migration); >> @@ -4664,6 +4667,18 @@ qemuProcessInit(virQEMUDriverPtr driver, >> if (qemuDomainSetPrivatePaths(driver, vm) < 0) >> goto cleanup; >> >> + for (dIndex = 0; dIndex < vm->def->ndisks; dIndex++) { > > We usually use 'i' for iterator variables. > >> + virDomainDiskDefPtr disk = vm->def->disks[dIndex]; >> + >> + if (virStorageSourceIsEmpty(disk->src)) >> + continue; >> + >> + qemuDomainGetImageIds(cfg, vm, disk->src, &uid, &gid); >> + >> + if (virStorageVolumeRegister(disk->src, uid, gid) < 0) >> + goto cleanup; >> + } >> + >> ret = 0; >> >> cleanup: > > [...] > >> diff --git a/src/storage/storage_backend_fs.c b/src/storage/storage_backend_fs.c >> index de0e8d5..0e03e06 100644 >> --- a/src/storage/storage_backend_fs.c >> +++ b/src/storage/storage_backend_fs.c >> @@ -1488,6 +1488,8 @@ virStorageFileBackendFileDeinit(virStorageSourcePtr src) > > [...] > >> +static int >> +virStorageBackendGlusterSetPreopened(virStorageSourcePtr src, >> + virStorageFileBackendGlusterPrivPtr priv) >> +{ >> + unsigned int idx; >> + virStorageBackendGlusterStatePtrPreopened entry = NULL; >> + >> + if (connCache == NULL && (virOnce(&glusterConnOnce, >> + virGlusterConnAlloc) < 0)) >> + return -1; >> + >> + if (virGlusterGlobalError) >> + return -1; >> + >> + if (!connCache->nConn) { >> + if (virMutexInit(&connCache->lock) < 0) { >> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", >> + _("Unable to initialize mutex")); >> + return -1; >> + } >> + } >> + >> + for (idx = 0; idx < connCache->nConn; idx++) { >> + if (STREQ(connCache->conn[idx]->volname, src->volume)) >> + return 0; >> + } >> + >> + if (VIR_ALLOC(entry) < 0) >> + return -1; >> + >> + if (VIR_STRDUP(entry->volname, src->volume) < 0) >> + goto error; >> + >> + if (VIR_ALLOC_N(entry->hosts, entry->nservers) < 0) >> + goto error; >> + >> + entry->nservers = src->nhosts; >> + >> + for (idx = 0; idx < src->nhosts; idx++) { >> + if (VIR_ALLOC(entry->hosts[idx]) < 0) >> + goto error; >> + if (src->hosts[idx].transport == VIR_STORAGE_NET_HOST_TRANS_UNIX) { >> + if (VIR_STRDUP(entry->hosts[idx]->u.uds.path, >> + src->hosts[idx].socket) < 0) >> + goto error; >> + entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_UNIX; >> + } else if (src->hosts[idx].transport == >> + VIR_STORAGE_NET_HOST_TRANS_TCP) { >> + if (VIR_STRDUP(entry->hosts[idx]->u.tcp.host, >> + src->hosts[idx].name) < 0) >> + goto error; >> + if (VIR_STRDUP(entry->hosts[idx]->u.tcp.port, >> + src->hosts[idx].port) < 0) >> + goto error; >> + entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_TCP; >> + } else { >> + entry->hosts[idx]->type = VIR_STORAGE_NET_HOST_TRANS_LAST; > > We've got virStorageSourceCopy. The cache could use those objects so > that you don't have to copy everything manually. I'm thinking that you are referring to virStorageNetHostDefCopy here ? > >> + } >> + } >> + entry->priv = priv; >> + >> + virMutexLock(&connCache->lock); >> + virAtomicIntSet(&entry->ref, 1); > > You should use virObjectLockable for this as I told you in the previous > version. It provides a mutex and reference counting and it's the way we > do stuff in libvirt. oh yeah. I missed it somehow, sorry. > >> + if (VIR_INSERT_ELEMENT(connCache->conn, -1, connCache->nConn, entry) < 0) { > > This is equivalent to VIR_APPEND_ELEMENT. > >> + virMutexUnlock(&connCache->lock); >> + goto error; >> + } >> + virMutexUnlock(&connCache->lock); >> + >> + return 0; >> + >> + error: >> + for (idx = 0; idx < entry->nservers; idx++) { >> + if (entry->hosts[idx]->type == VIR_STORAGE_NET_HOST_TRANS_UNIX) { >> + VIR_FREE(entry->hosts[idx]->u.uds.path); >> + } else { >> + VIR_FREE(entry->hosts[idx]->u.tcp.host); >> + VIR_FREE(entry->hosts[idx]->u.tcp.port); >> + } >> + VIR_FREE(entry->hosts[idx]); > > You should create a freeing function. got it. > >> + } >> + >> + VIR_FREE(entry->hosts); >> + VIR_FREE(entry->volname); >> + VIR_FREE(entry); >> + >> + return -1; >> +} >> + >> +static glfs_t * >> +virStorageBackendGlusterFindPreopened(virStorageSourcePtr src) >> +{ >> + unsigned int cIdx, sIdx, nIdx; /* connectionIdx, savedIdx, newIdx */ > > The comment explaining these variables is pointless. Also please one > variable declaration per line. Okay. > >> + bool flag = false; > > The name of this variable does not help deciphering what it's used for. Okay. > >> + >> + if (connCache == NULL) >> + return NULL; >> + >> + virStorageBackendGlusterStatePtrPreopened entry; >> + >> + for (cIdx = 0; cIdx < connCache->nConn; cIdx++) { >> + entry = connCache->conn[cIdx]; >> + if (STREQ(entry->volname, src->volume)) { > > By inverting a few of the conditions and using continue statements you > can avoid deep nesting. > >> + if (entry->nservers == src->nhosts) { >> + for (sIdx = 0; sIdx < entry->nservers; sIdx++) { >> + for (nIdx = 0; nIdx < src->nhosts; nIdx++) { >> + if (entry->hosts[sIdx]->type == >> + src->hosts[nIdx].transport) { >> + if (entry->hosts[sIdx]->type >> + == VIR_STORAGE_NET_HOST_TRANS_UNIX) { >> + if (STREQ(entry->hosts[sIdx]->u.uds.path, >> + src->hosts[nIdx].socket)) { >> + flag = true; >> + break; >> + } >> + } else { >> + if (STREQ(entry->hosts[sIdx]->u.tcp.host, >> + src->hosts[nIdx].name) && >> + STREQ(entry->hosts[sIdx]->u.tcp.port, >> + src->hosts[nIdx].port)) { >> + flag = true; >> + break; >> + } >> + } >> + } >> + } >> + if (!flag) >> + return NULL; >> + flag = false; >> + } >> + virAtomicIntInc(&entry->ref); >> + return entry->priv->vol; >> + } else { >> + return NULL; >> + } >> + } >> + } >> + return NULL; >> +} >> + >> +static void >> +virStorageBackendGlusterClosePreopened(virStorageSourcePtr src) >> +{ >> + virStorageFileBackendGlusterPrivPtr priv = src->drv->priv; >> + unsigned int cIdx, hIdx; /* connectionIdx, hostIdx */ > > The comment is rather pointless. Declare one variable per line. Okay. > >> + for (cIdx = 0; cIdx < connCache->nConn; cIdx++) { >> + if (STREQ(connCache->conn[cIdx]->volname, src->volume)) { >> + virAtomicIntDecAndTest(&connCache->conn[cIdx]->ref); >> + if (virAtomicIntGet(&connCache->conn[cIdx]->ref) != 0) { >> + if (priv && priv->canonpath) { >> + VIR_FREE(priv->canonpath); >> + VIR_FREE(priv); >> + src->drv->priv = NULL; > > All this should be replaced by a virObject destructor for your given > cache type. Yeah, got it. > >> + } >> + return; >> + } >> + >> + glfs_fini(connCache->conn[cIdx]->priv->vol); >> + >> + VIR_FREE(connCache->conn[cIdx]->priv->canonpath); >> + VIR_FREE(connCache->conn[cIdx]->priv); >> + >> + for (hIdx = 0; hIdx < connCache->conn[cIdx]->nservers; hIdx++) { >> + if (connCache->conn[cIdx]->hosts[hIdx]->type == >> + VIR_STORAGE_NET_HOST_TRANS_UNIX) { >> + VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.uds.path); >> + } else { >> + VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.tcp.host); >> + VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]->u.tcp.port); >> + } >> + VIR_FREE(connCache->conn[cIdx]->hosts[hIdx]); >> + } >> + >> + VIR_FREE(connCache->conn[cIdx]->hosts); >> + VIR_FREE(connCache->conn[cIdx]->volname); >> + VIR_FREE(connCache->conn[cIdx]); >> + >> + virMutexLock(&connCache->lock); >> + VIR_DELETE_ELEMENT(connCache->conn, cIdx, connCache->nConn); >> + virMutexUnlock(&connCache->lock); > > Doing a extracted freeing function will allow you to get rid of this > duplication too. Yes, now it make scene to me. > >> + >> + VIR_FREE(src->drv); >> + } >> + } >> + >> + if (!connCache->conn) >> + virMutexDestroy(&connCache->lock); >> +} >> >> static void >> virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state) > > [...] > >> @@ -612,6 +852,7 @@ virStorageFileBackendGlusterInitServer(virStorageFileBackendGlusterPrivPtr priv, >> static int >> virStorageFileBackendGlusterInit(virStorageSourcePtr src) >> { >> + int ret = 0; >> virStorageFileBackendGlusterPrivPtr priv = NULL; >> size_t i; >> >> @@ -625,6 +866,12 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src) >> if (VIR_ALLOC(priv) < 0) >> return -1; >> >> + priv->vol = virStorageBackendGlusterFindPreopened(src); >> + if (priv->vol) { >> + src->drv->priv = priv; >> + return ret; >> + } >> + >> VIR_DEBUG("initializing gluster storage file %p " >> "(priv='%p' volume='%s' path='%s') as [%u:%u]", >> src, priv, src->volume, src->path, >> @@ -635,6 +882,10 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src) >> goto error; >> } >> >> + ret = virStorageBackendGlusterSetPreopened(src, priv); >> + if (ret < 0) >> + goto error; >> + >> for (i = 0; i < src->nhosts; i++) { >> if (virStorageFileBackendGlusterInitServer(priv, src->hosts + i) < 0) >> goto error; >> @@ -648,13 +899,13 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src) >> } >> >> src->drv->priv = priv; >> + ret = 0; >> >> - return 0; >> + return ret; > > Why do you need a ret variable if you overwrite it right before > returning it? The above changes don't make sense. right! > >> >> error: >> - if (priv->vol) >> - glfs_fini(priv->vol); >> VIR_FREE(priv); >> + virStorageBackendGlusterClosePreopened(src); >> >> return -1; >> } >> diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c >> index df65807..9c3bffb 100644 >> --- a/src/storage/storage_driver.c >> +++ b/src/storage/storage_driver.c > > [...] > >> @@ -2967,6 +2964,24 @@ virStorageFileInit(virStorageSourcePtr src) >> return virStorageFileInitAs(src, -1, -1); >> } >> >> +int >> +virStorageVolumeRegister(virStorageSourcePtr src, >> + uid_t uid, gid_t gid) >> +{ >> + if (virStorageFileInitAs(src, uid, gid) < 0) >> + return -1; >> + >> + src->drv->priv = NULL; > > What's the point of this wrapper? Also it seems wrong as you initialize > it and overwrite the private data after that? That will leak them which > is wrong. > > Ah, that is correct for the gluster driver. Note that other backends > have private data too and this breaks it and leaks their private data. Realized. Thanks Peter. -- Prasanna > >> + return 0; >> +} -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list