The iterator interface is modeled on that for references, though no vtable is necessary because there is (so far?) only one type of dir_iterator. There are obviously a lot of features that could easily be added to this class: * Skip/include directory paths in the iteration * Shallow/deep iteration * Letting the caller decide which subdirectories to recurse into (e.g., via a dir_iterator_advance_into() function) * Option to iterate in sorted order * Option to iterate over directory paths before vs. after their contents But these are not needed for the current patch series, so I refrain. Signed-off-by: Michael Haggerty <mhagger@xxxxxxxxxxxx> --- Makefile | 1 + dir-iterator.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dir-iterator.h | 86 +++++++++++++++++++++++++++ 3 files changed, 267 insertions(+) create mode 100644 dir-iterator.c create mode 100644 dir-iterator.h diff --git a/Makefile b/Makefile index ac8f365..b4ffc11 100644 --- a/Makefile +++ b/Makefile @@ -722,6 +722,7 @@ LIB_OBJS += diff-lib.o LIB_OBJS += diff-no-index.o LIB_OBJS += diff.o LIB_OBJS += dir.o +LIB_OBJS += dir-iterator.o LIB_OBJS += editor.o LIB_OBJS += entry.o LIB_OBJS += environment.o diff --git a/dir-iterator.c b/dir-iterator.c new file mode 100644 index 0000000..e1d60f0 --- /dev/null +++ b/dir-iterator.c @@ -0,0 +1,180 @@ +#include "cache.h" +#include "dir.h" +#include "iterator.h" +#include "dir-iterator.h" + +struct dir_iterator_level { + int initialized; + + DIR *dir; + + /* + * The length of the directory part of refname at this level + * (including the trailing '/'): + */ + size_t prefix_len; + + /* + * The last action that has been taken with the current entry + * (needed for directories, which have to be included in the + * iteration and also iterated into): + */ + enum { + DIR_STATE_ITER, + DIR_STATE_RECURSE + } dir_state; +}; + +/* + * The full data structure used to manage the internal directory + * iteration state. It includes members that are not part of the + * public interface. + */ +struct dir_iterator_int { + struct dir_iterator base; + + /* + * The number of levels currently on the stack. This is always + * at least 1, because when it becomes zero the iteration is + * ended and this struct is freed. + */ + size_t levels_nr; + + /* The number of levels that have been allocated on the stack */ + size_t levels_alloc; + + /* + * A stack of levels. levels[0] is the uppermost directory + * that will be included in this iteration. + */ + struct dir_iterator_level *levels; +}; + +int dir_iterator_advance(struct dir_iterator *dir_iterator) +{ + struct dir_iterator_int *iter = + (struct dir_iterator_int *)dir_iterator; + + while (1) { + struct dir_iterator_level *level = + &iter->levels[iter->levels_nr - 1]; + struct dirent *de; + + if (!level->initialized) { + if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1])) + strbuf_addch(&iter->base.path, '/'); + level->prefix_len = iter->base.path.len; + + /* opendir() errors are handled below */ + level->dir = opendir(iter->base.path.buf); + + level->initialized = 1; + } else if (S_ISDIR(iter->base.st.st_mode)) { + if (level->dir_state == DIR_STATE_ITER) { + /* + * The directory was just iterated + * over; now prepare to iterate into + * it. + */ + level->dir_state = DIR_STATE_RECURSE; + ALLOC_GROW(iter->levels, iter->levels_nr + 1, + iter->levels_alloc); + level = &iter->levels[iter->levels_nr++]; + level->initialized = 0; + continue; + } else { + /* + * The directory has already been + * iterated over and iterated into; + * we're done with it. + */ + } + } + + if (!level->dir) { + /* + * This level is exhausted (or wasn't opened + * successfully); pop up a level. + */ + if (--iter->levels_nr == 0) { + return dir_iterator_abort(dir_iterator); + } + continue; + } + + /* + * Loop until we find an entry that we can give back + * to the caller: + */ + while (1) { + strbuf_setlen(&iter->base.path, level->prefix_len); + de = readdir(level->dir); + + if (!de) { + /* This level is exhausted; pop up a level. */ + closedir(level->dir); + level->dir = NULL; + if (--iter->levels_nr == 0) + return dir_iterator_abort(dir_iterator); + break; + } + + if (is_dot_or_dotdot(de->d_name)) + continue; + + strbuf_addstr(&iter->base.path, de->d_name); + if (lstat(iter->base.path.buf, &iter->base.st) < 0) + continue; /* silently skip */ + + /* + * We have to set these each time because + * the path strbuf might have been realloc()ed. + */ + + iter->base.relative_path = + iter->base.path.buf + iter->levels[0].prefix_len; + iter->base.basename = + iter->base.path.buf + level->prefix_len; + level->dir_state = DIR_STATE_ITER; + + return ITER_OK; + } + } +} + +int dir_iterator_abort(struct dir_iterator *dir_iterator) +{ + struct dir_iterator_int *iter = (struct dir_iterator_int *)dir_iterator; + + while (iter->levels_nr) { + struct dir_iterator_level *level = + &iter->levels[--iter->levels_nr]; + + if (level->dir) + closedir(level->dir); + } + + free(iter->levels); + strbuf_release(&iter->base.path); + free(iter); + return ITER_DONE; +} + +struct dir_iterator *dir_iterator_begin(const char *path) +{ + struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter)); + struct dir_iterator *dir_iterator = &iter->base; + + if (!path || !*path) + die("BUG: empty path passed to dir_iterator_begin()"); + + strbuf_init(&iter->base.path, PATH_MAX); + strbuf_addstr(&iter->base.path, path); + + ALLOC_GROW(iter->levels, 10, iter->levels_alloc); + + iter->levels_nr = 1; + iter->levels[0].initialized = 0; + + return dir_iterator; +} diff --git a/dir-iterator.h b/dir-iterator.h new file mode 100644 index 0000000..8eb1f4c --- /dev/null +++ b/dir-iterator.h @@ -0,0 +1,86 @@ +#ifndef DIR_ITERATOR_H +#define DIR_ITERATOR_H + +/* + * Iterate over a directory tree. + * + * Iterate over a directory tree, recursively, including paths of all + * types and hidden paths. Skip "." and ".." entries and don't follow + * symlinks except for the original path. + * + * Every time dir_iterator_advance() is called, update the members of + * the dir_iterator structure to reflect the next path in the + * iteration. The order that paths are iterated over within a + * directory is undefined, but directory paths are always iterated + * over before the subdirectory contents. + * + * A typical iteration looks like this: + * + * int ok; + * struct iterator *iter = dir_iterator_begin(path); + * + * while ((ok = dir_iterator_advance(iter)) == ITER_OK) { + * if (want_to_stop_iteration()) { + * ok = dir_iterator_abort(iter); + * break; + * } + * + * // Access information about the current path: + * if (S_ISDIR(iter->st.st_mode)) + * printf("%s is a directory\n", iter->relative_path); + * } + * + * if (ok != ITER_DONE) + * handle_error(); + * + * Callers are allowed to modify iter->path while they are working, + * but they must restore it to its original contents before calling + * dir_iterator_advance() again. + */ + +struct dir_iterator { + /* The current path: */ + struct strbuf path; + + /* + * The current path relative to the starting path. This part + * of the path always uses "/" characters to separate path + * components: + */ + const char *relative_path; + + /* The current basename: */ + const char *basename; + + /* The result of calling lstat() on path: */ + struct stat st; +}; + +/* + * Start a directory iteration over path. Return a dir_iterator that + * holds the internal state of the iteration. + * + * The iteration includes all paths under path, not including path + * itself and not including "." or ".." entries. + * + * path is the starting directory. An internal copy will be made. + */ +struct dir_iterator *dir_iterator_begin(const char *path); + +/* + * Advance the iterator to the first or next item and return ITER_OK. + * If the iteration is exhausted, free the resources associated with + * the iterator and return ITER_DONE. On error, return ITER_ERROR. It + * is a bug to use iterator or call this function again after it has + * returned false. + */ +int dir_iterator_advance(struct dir_iterator *iterator); + +/* + * End the iteration before it has been exhausted. Free the reference + * iterator and any associated resources and return ITER_DONE. Return + * ITER_ERROR on error. + */ +int dir_iterator_abort(struct dir_iterator *iterator); + +#endif -- 2.8.1 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html