This patch offer, 1. Optimizing the calls to glfs_init() and friends 2. Temporarily reduce the memory leak appear in libvirt process account, even if the root cause for the leaks are glfs_fini() (7 - 10MB per object) [Hopefully gluster should address this in its future releases, not very near] Currently, a start of a VM will call 2 glfs_new/glfs_init (which will create glfs object, once for stat, read headers and next to chown) and then will fork qemu process which will call once again (for actual read write IO). Not that all, in case if we are have 4 extra attached disks, then the total calls to glfs_init() and friends will be (4+1)*2 in libvirt and (4+1)*1 in qemu space i.e 15 calls. Since we don't have control over qemu process as that executes in a different process environment, lets do not bother much about it. This patch shrinks these 10 calls (i.e objects from above example) to just one, by maintaining a cache of glfs objects. The glfs object is shared across other only if volume name and all the volfile servers match. In case of hit glfs object takes a ref and only on close unref happens. Thanks to 'Peter Krempa' for the discussion. Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@xxxxxxxxxx> --- WORK IN PROGRESS: (WIP) ---------------- While initially caching the glfs object, i.e. in virStorageBackendGlusterSetPreopened() I have took a ref=2, so on the following virStorageBackendGlusterClosePreopened() --ref will make ref=1, which will help not cleaning up the object which has to be shared with next coming disks (if conditions meat). Given some context, the idea is that on time-out (or after all disks are initiallized) some one should call virStorageBackendGlusterClosePreopened() which will ideally make ref=0, hence cached object will be cleaned/deleted calling glfs_fini() I had a thought of doing the time-out cleanup call in virSecurityManagerSetAllLabel() or similar, but that looks too odd for me? Some help ? Thanks in advance. --- src/storage/storage_backend_gluster.c | 136 ++++++++++++++++++++++++++++++++-- src/storage/storage_backend_gluster.h | 33 ++++++++- 2 files changed, 160 insertions(+), 9 deletions(-) diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c index 8e86704..4f53ebc 100644 --- a/src/storage/storage_backend_gluster.c +++ b/src/storage/storage_backend_gluster.c @@ -47,19 +47,132 @@ struct _virStorageBackendGlusterState { char *dir; /* dir from URI, or "/"; always starts and ends in '/' */ }; +virGlusterDefPtr ConnCache = {0,}; + typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState; typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr; +void +virStorageBackendGlusterSetPreopened(virStorageSourcePtr src, glfs_t *fs) +{ + size_t i; + virStorageBackendGlusterStatePtrPreopened entry = NULL; + + if (ConnCache == NULL && (VIR_ALLOC(ConnCache) < 0)) + return; + + for (i = 0; i < ConnCache->nConn; i++) { + if (STREQ(ConnCache->Conn[i]->volname, src->volume)) + return; + } + + if (VIR_ALLOC(entry) < 0) + goto L1; + + if (VIR_STRDUP(entry->volname, src->volume) < 0) + goto L1; + + entry->nhosts = src->nhosts; + for (i = 0; i < src->nhosts; i++) { + if (VIR_ALLOC_N(entry->hosts[i], strlen(src->hosts[i].name) + 1) < 0) + goto L2; + strcpy(entry->hosts[i], src->hosts[i].name); + } + + entry->fs = fs; + entry->ref = 2; /* persist glfs obj per volume until a final timeout + virStorageBackendGlusterClosePreopened() is called */ + + if (VIR_INSERT_ELEMENT(ConnCache->Conn, -1, ConnCache->nConn, entry) < 0) + goto L2; + + return; + +L2: + for (i = 0; i < entry->nhosts; i++) + VIR_FREE(entry->hosts[i]); +L1: + if(ConnCache->nConn == 0) + VIR_FREE(ConnCache); + VIR_FREE(entry->volname); + VIR_FREE(entry); +} + +glfs_t * +virStorageBackendGlusterFindPreopened(virStorageSourcePtr src) +{ + size_t i, j, k, ret = 0; + size_t min, max; + + if (ConnCache == NULL) + return NULL; + + virStorageBackendGlusterStatePtrPreopened entry; + + for (i = 0; i < ConnCache->nConn; i++) { + entry = ConnCache->Conn[i]; + if (STREQ(entry->volname, src->volume)) { + min = entry->nhosts < src->nhosts ? entry->nhosts : src->nhosts; + max = entry->nhosts >= src->nhosts ? entry->nhosts : src->nhosts; + for (j = 0; j< min; j++) { + if (entry->nhosts == min) { + for (k = 0; k < max; k++) { + if (STREQ(entry->hosts[j], src->hosts[k].name)) { + ret = 1; + break; + } + } + if (!ret) + return NULL; + } else { + for (k = 0; k < max; k++) { + if (STREQ(src->hosts[j].name, entry->hosts[k])) { + ret = 1; + break; + } + } + if (!ret) + return NULL; + } + } + entry->ref++; + return entry->fs; + } + } + return NULL; +} + +int +virStorageBackendGlusterClosePreopened(glfs_t *fs) +{ + size_t i; + int ret = 0; + + if (fs == NULL) + return ret; + + for (i = 0; i < ConnCache->nConn; i++) { + if (ConnCache->Conn[i]->fs == fs) { + if (--ConnCache->Conn[i]->ref) + return ret; + + ret = glfs_fini(ConnCache->Conn[i]->fs); + VIR_FREE(ConnCache->Conn[i]->volname); + VIR_FREE(ConnCache->Conn[i]); + + VIR_DELETE_ELEMENT(ConnCache->Conn, i, ConnCache->nConn); + } + } + return ret; +} + static void virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state) { if (!state) return; - /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for - * glfs_fini, with errno containing random data, so there's no way - * to tell if it succeeded. 3.4.2 is supposed to fix this.*/ - if (state->vol && glfs_fini(state->vol) < 0) + if (state->vol && virStorageBackendGlusterClosePreopened(state->vol) < 0) VIR_DEBUG("shutdown of gluster volume %s failed with errno %d", state->volname, errno); @@ -556,8 +669,7 @@ virStorageFileBackendGlusterDeinit(virStorageSourcePtr src) src, src->hosts->name, src->hosts->port ? src->hosts->port : "0", src->volume, src->path); - if (priv->vol) - glfs_fini(priv->vol); + virStorageBackendGlusterClosePreopened(priv->vol); VIR_FREE(priv->canonpath); VIR_FREE(priv); @@ -630,11 +742,20 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src) src, priv, src->volume, src->path, (unsigned int)src->drv->uid, (unsigned int)src->drv->gid); + + priv->vol = virStorageBackendGlusterFindPreopened(src); + if (priv->vol) { + src->drv->priv = priv; + return 0; + } + if (!(priv->vol = glfs_new(src->volume))) { virReportOOMError(); goto error; } + virStorageBackendGlusterSetPreopened(src, priv->vol); + for (i = 0; i < src->nhosts; i++) { if (virStorageFileBackendGlusterInitServer(priv, src->hosts + i) < 0) goto error; @@ -652,8 +773,7 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src) return 0; error: - if (priv->vol) - glfs_fini(priv->vol); + virStorageBackendGlusterClosePreopened(priv->vol); VIR_FREE(priv); return -1; diff --git a/src/storage/storage_backend_gluster.h b/src/storage/storage_backend_gluster.h index 6796016..a0326aa 100644 --- a/src/storage/storage_backend_gluster.h +++ b/src/storage/storage_backend_gluster.h @@ -22,9 +22,40 @@ #ifndef __VIR_STORAGE_BACKEND_GLUSTER_H__ # define __VIR_STORAGE_BACKEND_GLUSTER_H__ -# include "storage_backend.h" +#include "storage_backend.h" +#include <glusterfs/api/glfs.h> extern virStorageBackend virStorageBackendGluster; extern virStorageFileBackend virStorageFileBackendGluster; + +struct _virStorageBackendGlusterStatePreopened { + char *volname; + size_t nhosts; + char *hosts[1024]; /* FIXME: 1024 ? */ + glfs_t *fs; + int ref; +}; + +typedef struct _virStorageBackendGlusterStatePreopened virStorageBackendGlusterStatePreopened; +typedef virStorageBackendGlusterStatePreopened *virStorageBackendGlusterStatePtrPreopened; + +struct _virGlusterDef { + size_t nConn; + virStorageBackendGlusterStatePtrPreopened *Conn; +}; + +typedef struct _virGlusterDef virGlusterDef; +typedef virGlusterDef *virGlusterDefPtr; + +extern virGlusterDefPtr ConnCache; + +void +virStorageBackendGlusterSetPreopened(virStorageSourcePtr src, glfs_t *fs); + +glfs_t* +virStorageBackendGlusterFindPreopened(virStorageSourcePtr src); + +int +virStorageBackendGlusterClosePreopened(glfs_t *fs); #endif /* __VIR_STORAGE_BACKEND_GLUSTER_H__ */ -- 2.7.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list