Gah, I just realized I failed to correct refs/files-backend.c's behavior and kept 0 instead of DIR_ITERATOR_PRE_ORDER_TRAVERSAL as its flag. I'll correct this on a v7, but I'll wait for the rest of your reviews before sending that revision. On Sun, Apr 2, 2017 at 1:35 AM, Daniel Ferreira <bnmvco@xxxxxxxxx> wrote: > Perform major refactor of dir_iterator_advance(). dir_iterator has > ceased to rely on a convoluted state machine mechanism of two loops and > two state variables (level.initialized and level.dir_state). This serves > to ease comprehension of the iterator mechanism and ease addition of new > features to the iterator. > > Create an option for the dir_iterator API to iterate over subdirectories > only after having iterated through their contents. This feature was > predicted, although not implemented by 0fe5043 ("dir_iterator: new API > for iterating over a directory tree", 2016-06-18). This is useful for > recursively removing a directory and calling rmdir() on a directory only > after all of its contents have been wiped. > > Add an option for the dir_iterator API to iterate over the root > directory (the one it was initialized with) as well. > > Add the "flags" parameter to dir_iterator_create, allowing for the > aforementioned new features to be enabled. The new default behavior > (i.e. flags set to 0) does not iterate over directories. Flag > DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing > so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a > directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR > iterates over the root directory. These flags do not conflict with each > other and may be used simultaneously. > > Amend a call to dir_iterator_begin() in refs/files-backend.c to pass > the flags parameter introduced. > > Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to > test "post-order" and "iterate-over-root" modes. > > Signed-off-by: Daniel Ferreira <bnmvco@xxxxxxxxx> > --- > dir-iterator.c | 149 +++++++++++++++++++++++++++---------------- > dir-iterator.h | 28 ++++++-- > refs/files-backend.c | 2 +- > t/helper/test-dir-iterator.c | 6 +- > t/t0065-dir-iterator.sh | 61 +++++++++++++++++- > 5 files changed, 179 insertions(+), 67 deletions(-) > > diff --git a/dir-iterator.c b/dir-iterator.c > index ce8bf81..4b23889 100644 > --- a/dir-iterator.c > +++ b/dir-iterator.c > @@ -4,8 +4,6 @@ > #include "dir-iterator.h" > > struct dir_iterator_level { > - int initialized; > - > DIR *dir; > > /* > @@ -20,8 +18,11 @@ struct dir_iterator_level { > * iteration and also iterated into): > */ > enum { > - DIR_STATE_ITER, > - DIR_STATE_RECURSE > + DIR_STATE_PUSHED, > + DIR_STATE_PRE_ITERATION, > + DIR_STATE_ITERATING, > + DIR_STATE_POST_ITERATION, > + DIR_STATE_EXHAUSTED > } dir_state; > }; > > @@ -48,15 +49,20 @@ struct dir_iterator_int { > * that will be included in this iteration. > */ > struct dir_iterator_level *levels; > + > + /* Holds the flags passed to dir_iterator_begin(). */ > + unsigned flags; > }; > > static inline void push_dir_level(struct dir_iterator_int *iter, struct dir_iterator_level *level) > { > - level->dir_state = DIR_STATE_RECURSE; > ALLOC_GROW(iter->levels, iter->levels_nr + 1, > iter->levels_alloc); > + > + /* Push a new level */ > level = &iter->levels[iter->levels_nr++]; > - level->initialized = 0; > + level->dir = NULL; > + level->dir_state = DIR_STATE_PUSHED; > } > > static inline int pop_dir_level(struct dir_iterator_int *iter) > @@ -75,18 +81,35 @@ static inline int set_iterator_data(struct dir_iterator_int *iter, struct dir_it > } > > /* > - * We have to set these each time because > - * the path strbuf might have been realloc()ed. > + * Check if we are dealing with the root directory as an > + * item that's being iterated through. > */ > - iter->base.relative_path = > - iter->base.path.buf + iter->levels[0].prefix_len; > + if (level->dir_state != DIR_STATE_ITERATING && > + iter->levels_nr == 1) { > + iter->base.relative_path = "."; > + } > + else { > + iter->base.relative_path = > + iter->base.path.buf + iter->levels[0].prefix_len; > + } > iter->base.basename = > iter->base.path.buf + level->prefix_len; > - level->dir_state = DIR_STATE_ITER; > > return 0; > } > > +/* > + * This function uses a state machine with the following states: > + * -> DIR_STATE_PUSHED: the directory has been pushed to the > + * iterator traversal tree. > + * -> DIR_STATE_PRE_ITERATION: the directory is *NOT* initialized. The > + * dirpath has already been returned if pre-order traversal is set. > + * -> DIR_STATE_ITERATING: the directory is initialized. We are traversing > + * through it. > + * -> DIR_STATE_POST_ITERATION: the directory has been iterated through. > + * We are ready to close it. > + * -> DIR_STATE_EXHAUSTED: the directory is closed and ready to be popped. > + */ > int dir_iterator_advance(struct dir_iterator *dir_iterator) > { > struct dir_iterator_int *iter = > @@ -97,7 +120,18 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator) > &iter->levels[iter->levels_nr - 1]; > struct dirent *de; > > - if (!level->initialized) { > + if (level->dir_state == DIR_STATE_PUSHED) { > + level->dir_state = DIR_STATE_PRE_ITERATION; > + > + if (iter->flags & DIR_ITERATOR_PRE_ORDER_TRAVERSAL) { > + /* We may not want the root directory to be iterated over */ > + if (iter->levels_nr != 1 || > + (iter->flags & DIR_ITERATOR_LIST_ROOT_DIR)) { > + set_iterator_data(iter, level); > + return ITER_OK; > + } > + } > + } else if (level->dir_state == DIR_STATE_PRE_ITERATION) { > /* > * Note: dir_iterator_begin() ensures that > * path is not the empty string. > @@ -108,63 +142,32 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator) > > level->dir = opendir(iter->base.path.buf); > if (!level->dir && errno != ENOENT) { > + /* > + * This level wasn't opened sucessfully; pretend we > + * iterated through it already. > + */ > warning("error opening directory %s: %s", > iter->base.path.buf, strerror(errno)); > - /* Popping the level is handled below */ > - } > > - level->initialized = 1; > - } else if (S_ISDIR(iter->base.st.st_mode)) { > - if (level->dir_state == DIR_STATE_ITER) { > - /* > - * The directory was just iterated > - * over; now prepare to iterate into > - * it. > - */ > - push_dir_level(iter, level); > + level->dir_state = DIR_STATE_POST_ITERATION; > continue; > - } else { > - /* > - * The directory has already been > - * iterated over and iterated into; > - * we're done with it. > - */ > } > - } > > - if (!level->dir) { > - /* > - * This level is exhausted (or wasn't opened > - * successfully); pop up a level. > - */ > - if (pop_dir_level(iter) == 0) > - return dir_iterator_abort(dir_iterator); > - > - continue; > - } > - > - /* > - * Loop until we find an entry that we can give back > - * to the caller: > - */ > - while (1) { > + level->dir_state = DIR_STATE_ITERATING; > + } else if (level->dir_state == DIR_STATE_ITERATING) { > strbuf_setlen(&iter->base.path, level->prefix_len); > errno = 0; > de = readdir(level->dir); > > if (!de) { > - /* This level is exhausted; pop up a level. */ > + /* In case of readdir() error */ > if (errno) { > warning("error reading directory %s: %s", > iter->base.path.buf, strerror(errno)); > - } else if (closedir(level->dir)) > - warning("error closing directory %s: %s", > - iter->base.path.buf, strerror(errno)); > + } > > - level->dir = NULL; > - if (pop_dir_level(iter) == 0) > - return dir_iterator_abort(dir_iterator); > - break; > + level->dir_state = DIR_STATE_POST_ITERATION; > + continue; > } > > if (is_dot_or_dotdot(de->d_name)) > @@ -175,7 +178,38 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator) > if (set_iterator_data(iter, level)) > continue; > > + if (S_ISDIR(iter->base.st.st_mode)) { > + push_dir_level(iter, level); > + continue; > + } > + > return ITER_OK; > + } else if (level->dir_state == DIR_STATE_POST_ITERATION) { > + if (level->dir != NULL && closedir(level->dir)) { > + warning("error closing directory %s: %s", > + iter->base.path.buf, strerror(errno)); > + } > + level->dir_state = DIR_STATE_EXHAUSTED; > + > + strbuf_setlen(&iter->base.path, level->prefix_len); > + /* > + * Since we are iterating through the dirpath > + * after we have gone through it, we still need > + * to get rid of the trailing slash we appended. > + */ > + strbuf_strip_suffix(&iter->base.path, "/"); > + > + if (iter->flags & DIR_ITERATOR_POST_ORDER_TRAVERSAL) { > + /* We may not want the root directory to be iterated over */ > + if (iter->levels_nr != 1 || > + (iter->flags & DIR_ITERATOR_LIST_ROOT_DIR)) { > + set_iterator_data(iter, level); > + return ITER_OK; > + } > + } > + } else if (level->dir_state == DIR_STATE_EXHAUSTED) { > + if (pop_dir_level(iter) == 0) > + return dir_iterator_abort(dir_iterator); > } > } > } > @@ -201,7 +235,7 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator) > return ITER_DONE; > } > > -struct dir_iterator *dir_iterator_begin(const char *path) > +struct dir_iterator *dir_iterator_begin(const char *path, unsigned flags) > { > struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter)); > struct dir_iterator *dir_iterator = &iter->base; > @@ -209,13 +243,16 @@ struct dir_iterator *dir_iterator_begin(const char *path) > if (!path || !*path) > die("BUG: empty path passed to dir_iterator_begin()"); > > + iter->flags = flags; > + > strbuf_init(&iter->base.path, PATH_MAX); > strbuf_addstr(&iter->base.path, path); > > ALLOC_GROW(iter->levels, 10, iter->levels_alloc); > > iter->levels_nr = 1; > - iter->levels[0].initialized = 0; > + iter->levels[0].dir = NULL; > + iter->levels[0].dir_state = DIR_STATE_PUSHED; > > return dir_iterator; > } > diff --git a/dir-iterator.h b/dir-iterator.h > index 27739e6..0e82f36 100644 > --- a/dir-iterator.h > +++ b/dir-iterator.h > @@ -11,13 +11,12 @@ > * Every time dir_iterator_advance() is called, update the members of > * the dir_iterator structure to reflect the next path in the > * iteration. The order that paths are iterated over within a > - * directory is undefined, but directory paths are always iterated > - * over before the subdirectory contents. > + * directory is undefined. > * > * A typical iteration looks like this: > * > * int ok; > - * struct iterator *iter = dir_iterator_begin(path); > + * struct iterator *iter = dir_iterator_begin(path, flags); > * > * while ((ok = dir_iterator_advance(iter)) == ITER_OK) { > * if (want_to_stop_iteration()) { > @@ -38,6 +37,22 @@ > * dir_iterator_advance() again. > */ > > +/* > + * Possible flags for dir_iterator_begin(). > + * > + * -> DIR_ITERATOR_PRE_ORDER_TRAVERSAL: the iterator shall return > + * a dirpath it has found before iterating through that directory's > + * contents. > + * -> DIR_ITERATOR_POST_ORDER_TRAVERSAL: the iterator shall return > + * a dirpath it has found after iterating through that directory's > + * contents. > + * -> DIR_ITERATOR_LIST_ROOT_DIR: the iterator shall return the dirpath > + * of the root directory it is iterating through. > + */ > +#define DIR_ITERATOR_PRE_ORDER_TRAVERSAL (1 << 0) > +#define DIR_ITERATOR_POST_ORDER_TRAVERSAL (1 << 1) > +#define DIR_ITERATOR_LIST_ROOT_DIR (1 << 2) > + > struct dir_iterator { > /* The current path: */ > struct strbuf path; > @@ -57,15 +72,16 @@ struct dir_iterator { > }; > > /* > - * Start a directory iteration over path. Return a dir_iterator that > - * holds the internal state of the iteration. > + * Start a directory iteration over path, with options specified in > + * 'flags'. Return a dir_iterator that holds the internal state of > + * the iteration. > * > * The iteration includes all paths under path, not including path > * itself and not including "." or ".." entries. > * > * path is the starting directory. An internal copy will be made. > */ > -struct dir_iterator *dir_iterator_begin(const char *path); > +struct dir_iterator *dir_iterator_begin(const char *path, unsigned flags); > > /* > * Advance the iterator to the first or next item and return ITER_OK. > diff --git a/refs/files-backend.c b/refs/files-backend.c > index 50188e9..b4bba74 100644 > --- a/refs/files-backend.c > +++ b/refs/files-backend.c > @@ -3346,7 +3346,7 @@ static struct ref_iterator *files_reflog_iterator_begin(struct ref_store *ref_st > files_downcast(ref_store, 0, "reflog_iterator_begin"); > > base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable); > - iter->dir_iterator = dir_iterator_begin(git_path("logs")); > + iter->dir_iterator = dir_iterator_begin(git_path("logs"), 0); > return ref_iterator; > } > > diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c > index 06f03fc..a1b8c78 100644 > --- a/t/helper/test-dir-iterator.c > +++ b/t/helper/test-dir-iterator.c > @@ -6,6 +6,7 @@ > int cmd_main(int argc, const char **argv) { > struct strbuf path = STRBUF_INIT; > struct dir_iterator *diter; > + unsigned flag = DIR_ITERATOR_PRE_ORDER_TRAVERSAL; > > if (argc < 2) { > return 1; > @@ -13,7 +14,10 @@ int cmd_main(int argc, const char **argv) { > > strbuf_add(&path, argv[1], strlen(argv[1])); > > - diter = dir_iterator_begin(path.buf); > + if (argc == 3) > + flag = atoi(argv[2]); > + > + diter = dir_iterator_begin(path.buf, flag); > > while (dir_iterator_advance(diter) == ITER_OK) { > if (S_ISDIR(diter->st.st_mode)) > diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh > index b857c07..ade3ee0 100755 > --- a/t/t0065-dir-iterator.sh > +++ b/t/t0065-dir-iterator.sh > @@ -28,12 +28,28 @@ test_expect_success 'dir-iterator should iterate through all files' ' > >dir/a/e && > >dir/d/e/d/a && > > - test-dir-iterator ./dir | sort >./actual-pre-order-sorted-output && > + test-dir-iterator ./dir 1 | sort >./actual-pre-order-sorted-output && > rm -rf dir && > > test_cmp expect-sorted-output actual-pre-order-sorted-output > ' > > +test_expect_success 'dir-iterator should iterate through all files on post-order mode' ' > + mkdir -p dir && > + mkdir -p dir/a/b/c/ && > + >dir/b && > + >dir/c && > + mkdir -p dir/d/e/d/ && > + >dir/a/b/c/d && > + >dir/a/e && > + >dir/d/e/d/a && > + > + test-dir-iterator ./dir 2 | sort >actual-post-order-sorted-output && > + rm -rf dir && > + > + test_cmp expect-sorted-output actual-post-order-sorted-output > +' > + > cat >expect-pre-order-output <<-\EOF && > [d] (a) ./dir/a > [d] (a/b) ./dir/a/b > @@ -41,14 +57,53 @@ cat >expect-pre-order-output <<-\EOF && > [f] (a/b/c/d) ./dir/a/b/c/d > EOF > > -test_expect_success 'dir-iterator should list files in the correct order' ' > +test_expect_success 'dir-iterator should list files properly on pre-order mode' ' > mkdir -p dir/a/b/c/ && > >dir/a/b/c/d && > > - test-dir-iterator ./dir >actual-pre-order-output && > + test-dir-iterator ./dir 1 >actual-pre-order-output && > rm -rf dir && > > test_cmp expect-pre-order-output actual-pre-order-output > ' > > +cat >expect-post-order-output <<-\EOF && > +[f] (a/b/c/d) ./dir/a/b/c/d > +[d] (a/b/c) ./dir/a/b/c > +[d] (a/b) ./dir/a/b > +[d] (a) ./dir/a > +EOF > + > +test_expect_success 'dir-iterator should list files properly on post-order mode' ' > + mkdir -p dir/a/b/c/ && > + >dir/a/b/c/d && > + > + test-dir-iterator ./dir 2 >actual-post-order-output && > + rm -rf dir && > + > + test_cmp expect-post-order-output actual-post-order-output > +' > + > +cat >expect-pre-order-post-order-root-dir-output <<-\EOF && > +[d] (.) ./dir > +[d] (a) ./dir/a > +[d] (a/b) ./dir/a/b > +[d] (a/b/c) ./dir/a/b/c > +[f] (a/b/c/d) ./dir/a/b/c/d > +[d] (a/b/c) ./dir/a/b/c > +[d] (a/b) ./dir/a/b > +[d] (a) ./dir/a > +[d] (.) ./dir > +EOF > + > +test_expect_success 'dir-iterator should list files properly on pre-order + post-order + root-dir mode' ' > + mkdir -p dir/a/b/c/ && > + >dir/a/b/c/d && > + > + test-dir-iterator ./dir 7 >actual-pre-order-post-order-root-dir-output && > + rm -rf dir && > + > + test_cmp expect-pre-order-post-order-root-dir-output actual-pre-order-post-order-root-dir-output > +' > + > test_done > -- > 2.7.4 (Apple Git-66) >