This patchset contains: - tests to the dir-iterator API; - dir-iterator refactoring to make its state machine simpler and feature adding with tests; - a replacement of explicit recursive dir iteration at copy_or_link_directory for the dir-iterator API; - some refactoring and behavior changes at local clone, mainly to take care of symlinks and hidden files at .git/objects, together with tests for these types of files. Changes since v7[1]: - Applied some style fixes at tests, as suggested by SZEDER - Removed the code to find circular symlinks as suggested in this[2] thread. The way it was previously implemented wouldn't work on Windows. So Dscho suggested me to remove this section until we come up with a more portable implementation. [1]: https://public-inbox.org/git/cover.1560898723.git.matheus.bernardino@xxxxxx/ [2]: https://public-inbox.org/git/nycvar.QRO.7.76.6.1907041136530.44@xxxxxxxxxxxxxxxxx/ travis build: https://travis-ci.org/matheustavares/git/builds/557047597 Daniel Ferreira (1): dir-iterator: add tests for dir-iterator API Matheus Tavares (8): clone: better handle symlinked files at .git/objects/ dir-iterator: use warning_errno when possible dir-iterator: refactor state machine model dir-iterator: add flags parameter to dir_iterator_begin clone: copy hidden paths at local clone clone: extract function from copy_or_link_directory clone: use dir-iterator to avoid explicit dir traversal clone: replace strcmp by fspathcmp Ævar Arnfjörð Bjarmason (1): clone: test for our behavior on odd objects/* content Makefile | 1 + builtin/clone.c | 75 +++++----- dir-iterator.c | 263 ++++++++++++++++++++--------------- dir-iterator.h | 64 +++++++-- refs/files-backend.c | 17 ++- t/helper/test-dir-iterator.c | 58 ++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + t/t0066-dir-iterator.sh | 148 ++++++++++++++++++++ t/t5604-clone-reference.sh | 133 ++++++++++++++++++ 10 files changed, 597 insertions(+), 164 deletions(-) create mode 100644 t/helper/test-dir-iterator.c create mode 100755 t/t0066-dir-iterator.sh Range-diff against v7: 1: 437b1eb1c7 ! 1: a2016d9d3b clone: test for our behavior on odd objects/* content @@ -98,7 +98,7 @@ + mv $last_loose a-loose-dir && + ln -s a-loose-dir $last_loose && + find . -type f | sort >../../../T.objects-files.raw && -+ echo unknown_content> unknown_file ++ echo unknown_content >unknown_file + ) && + git -C T fsck && + git -C T rev-list --all --objects >T.objects 2: 108bea2652 ! 2: 47a4f9b31c clone: better handle symlinked files at .git/objects/ @@ -80,7 +80,7 @@ + cd ../ && find . -type f | sort >../../../T.objects-files.raw && + find . -type l | sort >../../../T.objects-symlinks.raw && - echo unknown_content> unknown_file + echo unknown_content >unknown_file ) && git -C T fsck && @@ 3: 2c0232be6c ! 3: bbce6a601b dir-iterator: add tests for dir-iterator API @@ -129,7 +129,7 @@ + EOF + + test-tool dir-iterator ./dir >out && -+ sort <out >./actual-iteration-sorted-output && ++ sort out >./actual-iteration-sorted-output && + + test_cmp expected-iteration-sorted-output actual-iteration-sorted-output +' 4: 0b76044165 = 4: 0cc5f1f0b4 dir-iterator: use warning_errno when possible 5: 44c47d579c ! 5: f871b5d3f4 dir-iterator: refactor state machine model @@ -340,14 +340,14 @@ * A typical iteration looks like this: * * int ok; - * struct iterator *iter = dir_iterator_begin(path); - * +- * struct iterator *iter = dir_iterator_begin(path); ++ * struct dir_iterator *iter = dir_iterator_begin(path); ++ * + * if (!iter) + * goto error_handler; -+ * + * * while ((ok = dir_iterator_advance(iter)) == ITER_OK) { * if (want_to_stop_iteration()) { - * ok = dir_iterator_abort(iter); @@ }; 6: 86fc04ad0e ! 6: fe838d7eb4 dir-iterator: add flags parameter to dir_iterator_begin @@ -22,16 +22,6 @@ diff --git a/dir-iterator.c b/dir-iterator.c --- a/dir-iterator.c +++ b/dir-iterator.c -@@ - struct dir_iterator_level { - DIR *dir; - -+ /* The inode number of this level's directory. */ -+ ino_t ino; -+ - /* - * The length of the directory part of path at this level - * (including a trailing '/'): @@ * that will be included in this iteration. */ @@ -51,10 +41,6 @@ static int push_level(struct dir_iterator_int *iter) { @@ - if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1])) - strbuf_addch(&iter->base.path, '/'); - level->prefix_len = iter->base.path.len; -+ level->ino = iter->base.st.st_ino; level->dir = opendir(iter->base.path.buf); if (!level->dir) { @@ -96,33 +82,17 @@ + err = stat(iter->base.path.buf, &iter->base.st); + else + err = lstat(iter->base.path.buf, &iter->base.st); -+ + +- return 0; + saved_errno = errno; + if (err && errno != ENOENT) + warning_errno("failed to stat '%s'", iter->base.path.buf); + + errno = saved_errno; + return err; -+} -+ -+/* -+ * Look for a recursive symlink at iter->base.path pointing to any directory on -+ * the previous stack levels. If it is found, return 1. If not, return 0. -+ */ -+static int find_recursive_symlinks(struct dir_iterator_int *iter) -+{ -+ int i; -+ -+ if (!(iter->flags & DIR_ITERATOR_FOLLOW_SYMLINKS) || -+ !S_ISDIR(iter->base.st.st_mode)) -+ return 0; - -+ for (i = 0; i < iter->levels_nr; ++i) -+ if (iter->base.st.st_ino == iter->levels[i].ino) -+ return 1; - return 0; } + int dir_iterator_advance(struct dir_iterator *dir_iterator) @@ struct dir_iterator_int *iter = (struct dir_iterator_int *)dir_iterator; @@ -165,12 +135,6 @@ + if (errno != ENOENT && iter->flags & DIR_ITERATOR_PEDANTIC) + goto error_out; continue; -+ } -+ -+ if (find_recursive_symlinks(iter)) { -+ warning("ignoring recursive symlink at '%s'", -+ iter->base.path.buf); -+ continue; + } return ITER_OK; @@ -207,7 +171,7 @@ * A typical iteration looks like this: * * int ok; -- * struct iterator *iter = dir_iterator_begin(path); +- * struct dir_iterator *iter = dir_iterator_begin(path); + * unsigned int flags = DIR_ITERATOR_PEDANTIC; + * struct dir_iterator *iter = dir_iterator_begin(path, flags); * @@ -230,9 +194,12 @@ + * - DIR_ITERATOR_FOLLOW_SYMLINKS: make dir-iterator follow symlinks. + * i.e., linked directories' contents will be iterated over and + * iter->base.st will contain information on the referred files, -+ * not the symlinks themselves, which is the default behavior. -+ * Recursive symlinks are skipped with a warning and broken symlinks -+ * are ignored. ++ * not the symlinks themselves, which is the default behavior. Broken ++ * symlinks are ignored. ++ * ++ * Warning: circular symlinks are also followed when ++ * DIR_ITERATOR_FOLLOW_SYMLINKS is set. The iteration may end up with ++ * an ELOOP if they happen and DIR_ITERATOR_PEDANTIC is set. + */ +#define DIR_ITERATOR_PEDANTIC (1 << 0) +#define DIR_ITERATOR_FOLLOW_SYMLINKS (1 << 1) @@ -383,7 +350,7 @@ + EOF + + mkdir -p dir3/a && -+ > dir3/a/b && ++ >dir3/a/b && + chmod 0 dir3/a && + + test-tool dir-iterator ./dir3 >actual-no-permissions-output && @@ -399,7 +366,7 @@ + EOF + + mkdir -p dir3/a && -+ > dir3/a/b && ++ >dir3/a/b && + chmod 0 dir3/a && + + test_must_fail test-tool dir-iterator --pedantic ./dir3 \ @@ -435,7 +402,7 @@ + EOF + + test-tool dir-iterator ./dir4 >out && -+ sort <out >actual-no-follow-sorted-output && ++ sort out >actual-no-follow-sorted-output && + + test_cmp expected-no-follow-sorted-output actual-no-follow-sorted-output +' @@ -452,24 +419,9 @@ + EOF + + test-tool dir-iterator --follow-symlinks ./dir4 >out && -+ sort <out >actual-follow-sorted-output && ++ sort out >actual-follow-sorted-output && + + test_cmp expected-follow-sorted-output actual-follow-sorted-output +' -+ -+ -+test_expect_success SYMLINKS 'dir-iterator should ignore recursive symlinks w/ follow flag' ' -+ cat >expected-rec-symlinks-sorted-output <<-EOF && -+ [d] (a) [a] ./dir5/a -+ [d] (a/b) [b] ./dir5/a/b -+ [d] (a/b/d) [d] ./dir5/a/b/d -+ [d] (a/c) [c] ./dir5/a/c -+ EOF -+ -+ test-tool dir-iterator --follow-symlinks ./dir5 >out && -+ sort <out >actual-rec-symlinks-sorted-output && -+ -+ test_cmp expected-rec-symlinks-sorted-output actual-rec-symlinks-sorted-output -+' + test_done 7: 17685057cd = 7: 3da6408e04 clone: copy hidden paths at local clone 8: c7f3a8640e = 8: af7430eb2c clone: extract function from copy_or_link_directory 9: 7934036d30 ! 9: e8308c7408 clone: use dir-iterator to avoid explicit dir traversal @@ -11,11 +11,7 @@ error on readdir or stat inside dir_iterator_advance. Previously it would just print a warning for errors on stat and ignore errors on readdir, which isn't nice because a local git clone could succeed even - though the .git/objects copy didn't fully succeed. Also, with the - dir-iterator API, recursive symlinks will be detected and skipped. This - is another behavior improvement, since the current version would - continue to copy the same content over and over until stat() returned an - ELOOP error. + though the .git/objects copy didn't fully succeed. Signed-off-by: Matheus Tavares <matheus.bernardino@xxxxxx> 10: 2e25c03c07 = 10: 782ca07eed clone: replace strcmp by fspathcmp -- 2.22.0