On Mon, Nov 11, 2013 at 09:19:30PM -0700, Eric Blake wrote: > Actually put gfapi to use, by allowing the creation of a gluster > pool. Right now, all volumes are treated as raw; further patches > will allow peering into files to allow for qcow2 files and backing > chains, and reporting proper volume allocation. > > I've reported a couple of glusterfs bugs; if we were to require a > minimum of (not-yet-released) glusterfs 3.5, we could use the new > glfs_readdir [1] and not worry about the bogus return value of > glfs_fini [2], but for now I'm testing with Fedora 19's glusterfs > 3.4.1. > > [1] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00085.html > [2] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00086.html > > * src/storage/storage_backend_gluster.c > (virStorageBackendGlusterRefreshPool): Initial implementation. > (virStorageBackendGlusterOpen, virStorageBackendGlusterClose): New > helper functions. > > Signed-off-by: Eric Blake <eblake@xxxxxxxxxx> > --- > src/storage/storage_backend_gluster.c | 220 +++++++++++++++++++++++++++++++++- > 1 file changed, 215 insertions(+), 5 deletions(-) > > diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c > index 2863c73..bc90de9 100644 > --- a/src/storage/storage_backend_gluster.c > +++ b/src/storage/storage_backend_gluster.c > @@ -23,20 +23,230 @@ > > #include <glusterfs/api/glfs.h> > > -#include "virerror.h" > #include "storage_backend_gluster.h" > #include "storage_conf.h" > +#include "viralloc.h" > +#include "virerror.h" > +#include "virlog.h" > +#include "virstoragefile.h" > +#include "virstring.h" > +#include "viruri.h" > > #define VIR_FROM_THIS VIR_FROM_STORAGE > > +struct _virStorageBackendGlusterState { > + glfs_t *vol; > + > + /* Accept the same URIs as qemu's block/gluster.c: > + * gluster[+transport]://[server[:port]]/vol/[dir/]image[?socket=...] */ > + virURI *uri; > + > + char *volname; /* vol from URI */ > + char *dir; /* dir from URI, or "." */ > +}; > + > +typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState; > +typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr; > + > +static void > +virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state) > +{ > + if (!state) > + return; > + > + /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for > + * glfs_fini, with errno containing random data, so there's no way > + * to tell if it succeeded. 3.4.2 is supposed to fix this.*/ > + if (state->vol && glfs_fini(state->vol) < 0) > + VIR_DEBUG("shutdown of gluster volume %s failed with errno %d", > + state->volname, errno); > + > + virURIFree(state->uri); > + VIR_FREE(state->volname); > + VIR_FREE(state->dir); > + VIR_FREE(state); > +} > + > +static virStorageBackendGlusterStatePtr > +virStorageBackendGlusterOpen(virStoragePoolObjPtr pool) > +{ > + virStorageBackendGlusterStatePtr ret = NULL; > + char *sub; > + const char *name = pool->def->source.name; > + > + if (VIR_ALLOC(ret) < 0) > + return NULL; > + > + if (*name == '/') > + name++; > + if ((sub = strchr(name, '/'))) { > + if (VIR_STRNDUP(ret->volname, name, sub - name) < 0 || > + VIR_STRDUP(ret->dir, sub) < 0) > + goto error; > + } else { > + if (VIR_STRDUP(ret->volname, pool->def->source.name) < 0 || > + VIR_STRDUP(ret->dir, ".") < 0) > + goto error; > + } > + > + /* FIXME: Currently hard-coded to tcp transport; XML needs to be > + * extended to allow alternate transport */ > + if (VIR_ALLOC(ret->uri) < 0 || > + VIR_STRDUP(ret->uri->scheme, "gluster") < 0 || > + VIR_STRDUP(ret->uri->server, pool->def->source.hosts[0].name) < 0 || > + virAsprintf(&ret->uri->path, "%s%s", > + *pool->def->source.name == '/' ? "" : "/", > + pool->def->source.name) < 0) > + goto error; I'm not a huge fan of chaining together statements like this, since some gdb versions are pretty awful at telling you which one of these 4 statements has the problem on segv. > + ret->uri->port = pool->def->source.hosts[0].port; > + > + if (!(ret->vol = glfs_new(ret->volname))) { > + virReportOOMError(); > + goto error; > + } > + > + if (glfs_set_volfile_server(ret->vol, "tcp", > + ret->uri->server, ret->uri->port) < 0 || > + glfs_init(ret->vol) < 0) { > + char *uri = virURIFormat(ret->uri); > + > + virReportSystemError(errno, _("failed to connect to %s"), > + NULLSTR(uri)); > + VIR_FREE(uri); > + goto error; > + } > + > + return ret; > + > +error: > + virStorageBackendGlusterClose(ret); > + return NULL; > +} > + > + > +/* Populate *volptr for the given name and stat information, or leave > + * it NULL if the entry should be skipped (such as "."). Return 0 on > + * success, -1 on failure. */ > +static int > +virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, > + const char *pool, > + const char *name, > + struct stat *st, > + virStorageVolDefPtr *volptr) > +{ > + char *tmp; > + int ret = -1; > + virStorageVolDefPtr vol = NULL; > + > + /* Silently skip directories, including '.' and '..'. FIXME: > + * should non-'.' subdirectories be listed as type dir? */ What behaviour does the 'fs' pool have in for non-'.' subdirs ? Seems we perhaps ought to match that. > + *volptr = NULL; > + if (S_ISDIR(st->st_mode)) > + return 0; > + > + if (VIR_ALLOC(vol) < 0 || > + VIR_STRDUP(vol->name, name) < 0 || > + virAsprintf(&vol->key, "%s%s/%s", > + *pool == '/' ? "" : "/", pool, vol->name) < 0) > + goto cleanup; > + > + /* FIXME - must open files to determine if they are non-raw */ > + vol->type = VIR_STORAGE_VOL_NETWORK; > + vol->target.format = VIR_STORAGE_FILE_RAW; > + vol->capacity = vol->allocation = st->st_size; So gluster has no concept of sparse volumes ? > + tmp = state->uri->path; > + state->uri->path = vol->key; > + if (!(vol->target.path = virURIFormat(state->uri))) { > + state->uri->path = tmp; > + goto cleanup; > + } > + state->uri->path = tmp; > + > + *volptr = vol; > + vol = NULL; > + ret = 0; > +cleanup: > + virStorageVolDefFree(vol); > + return ret; > +} > > static int > virStorageBackendGlusterRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED, > - virStoragePoolObjPtr pool ATTRIBUTE_UNUSED) > + virStoragePoolObjPtr pool) > { > - virReportError(VIR_ERR_NO_SUPPORT, "%s", > - _("gluster pool type not fully supported yet")); > - return -1; > + int ret = -1; > + virStorageBackendGlusterStatePtr state = NULL; > + struct { > + struct dirent ent; > + /* See comment below about readdir_r needing padding */ > + char padding[MAX(1, 256 - (int) (sizeof(struct dirent) > + - offsetof(struct dirent, d_name)))]; > + } de; > + struct dirent *ent; > + glfs_fd_t *dir = NULL; > + struct stat st; > + struct statvfs sb; > + > + if (!(state = virStorageBackendGlusterOpen(pool))) > + goto cleanup; What kind of overhead is there in opening / closing gluster connections ? Is it something we should just do in the pool start/stop methods, instead of on every operation like refresh. > + > + /* Why oh why did glfs 3.4 decide to expose only readdir_r rather > + * than readdir? POSIX admits that readdir_r is inherently a > + * flawed design, because systems are not required to define > + * NAME_MAX: http://austingroupbugs.net/view.php?id=696 > + * http://womble.decadent.org.uk/readdir_r-advisory.html > + * > + * Fortunately, gluster appears to limit its underlying bricks to > + * only use file systems such as XFS that have a NAME_MAX of 255; > + * so we are currently guaranteed that if we provide 256 bytes of > + * tail padding, then we should have enough space to avoid buffer > + * overflow no matter whether the OS used d_name[], d_name[1], or > + * d_name[256] in its 'struct dirent'. > + * http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00083.html > + */ > + > + if (!(dir = glfs_opendir(state->vol, state->dir))) { > + virReportSystemError(errno, _("cannot open path '%s'"), > + pool->def->name); > + goto cleanup; > + } > + while (!(errno = glfs_readdirplus_r(dir, &st, &de.ent, &ent)) && ent) { > + virStorageVolDefPtr vol; > + int okay = virStorageBackendGlusterRefreshVol(state, > + pool->def->source.name, > + ent->d_name, &st, > + &vol); > + if (okay < 0) > + goto cleanup; > + if (vol && VIR_APPEND_ELEMENT(pool->volumes.objs, pool->volumes.count, > + vol) < 0) > + goto cleanup; > + } > + if (errno) { > + virReportSystemError(errno, _("failed to read directory '%s'"), > + pool->def->source.name); > + goto cleanup; > + } > + > + if (glfs_statvfs(state->vol, state->dir, &sb) < 0) { > + virReportSystemError(errno, _("cannot statvfs path '%s'"), pool->def->source.name); Seems you lost the line break here. > + goto cleanup; > + } > + > + pool->def->capacity = ((unsigned long long)sb.f_frsize * > + (unsigned long long)sb.f_blocks); > + pool->def->available = ((unsigned long long)sb.f_bfree * > + (unsigned long long)sb.f_frsize); > + pool->def->allocation = pool->def->capacity - pool->def->available; > + > + ret = 0; > +cleanup: > + if (dir) > + glfs_closedir(dir); > + virStorageBackendGlusterClose(state); > + if (ret < 0) > + virStoragePoolObjClearVols(pool); > + return ret; > } > Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list