I am reading the source code of git ls-files and learned that git ls-files may have duplicate files name when there are unmerged path in a branch merge or when different options are used at the same time. Users may fell confuse when they see these duplicate file names. As Junio C Hamano said ,it have odd behaviour. Therefore, we can provide an additional option to git ls-files to delete those repeated information. This fixes https://github.com/gitgitgadget/git/issues/198 Thanks! ZheNing Hu (3): ls_files.c: bugfix for --deleted and --modified ls_files.c: consolidate two for loops into one ls-files.c: add --deduplicate option Documentation/git-ls-files.txt | 5 ++ builtin/ls-files.c | 85 ++++++++++++++++++++-------------- t/t3012-ls-files-dedup.sh | 66 ++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 35 deletions(-) create mode 100755 t/t3012-ls-files-dedup.sh base-commit: 6d3ef5b467eccd2769f1aa1c555d317d3c8dc707 Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-832%2Fadlternative%2Fls-files-dedup-v6 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-832/adlternative/ls-files-dedup-v6 Pull-Request: https://github.com/gitgitgadget/git/pull/832 Range-diff vs v5: 1: ec9464f6094 ! 1: fbc38ce9075 ls_files.c: bugfix for --deleted and --modified @@ Commit message ## builtin/ls-files.c ## @@ builtin/ls-files.c: static void show_files(struct repository *repo, struct dir_struct *dir) + for (i = 0; i < repo->index->cache_nr; i++) { + const struct cache_entry *ce = repo->index->cache[i]; + struct stat st; +- int err; ++ int stat_err; + + construct_fullname(&fullname, repo, ce); + +@@ builtin/ls-files.c: static void show_files(struct repository *repo, struct dir_struct *dir) + continue; if (ce_skip_worktree(ce)) continue; - err = lstat(fullname.buf, &st); +- err = lstat(fullname.buf, &st); - if (show_deleted && err) -- show_ce(repo, dir, ce, fullname.buf, tag_removed); ++ stat_err = lstat(fullname.buf, &st); ++ if (stat_err && (errno != ENOENT && errno != ENOTDIR)) ++ error_errno("cannot lstat '%s'", fullname.buf); ++ if (stat_err && show_deleted) + show_ce(repo, dir, ce, fullname.buf, tag_removed); - if (show_modified && ie_modified(repo->index, ce, &st, 0)) -+ if (err) { -+ if (errno != ENOENT && errno != ENOTDIR) -+ error_errno("cannot lstat '%s'", fullname.buf); -+ if (show_deleted) -+ show_ce(repo, dir, ce, fullname.buf, tag_removed); -+ if (show_modified) +- show_ce(repo, dir, ce, fullname.buf, tag_modified); ++ if (show_modified && ++ (stat_err || ie_modified(repo->index, ce, &st, 0))) + show_ce(repo, dir, ce, fullname.buf, tag_modified); -+ } else if (show_modified && ie_modified(repo->index, ce, &st, 0)) - show_ce(repo, dir, ce, fullname.buf, tag_modified); } } + 2: 802ff802be8 ! 2: 3997d390883 ls_files.c: consolidate two for loops into one @@ builtin/ls-files.c: static void show_files(struct repository *repo, struct dir_s - if (show_cached || show_stage) { - for (i = 0; i < repo->index->cache_nr; i++) { - const struct cache_entry *ce = repo->index->cache[i]; -+ if (! (show_cached || show_stage || show_deleted || show_modified)) -+ return; -+ for (i = 0; i < repo->index->cache_nr; i++) { -+ const struct cache_entry *ce = repo->index->cache[i]; -+ struct stat st; -+ int err; - +- - construct_fullname(&fullname, repo, ce); -+ construct_fullname(&fullname, repo, ce); - +- - if ((dir->flags & DIR_SHOW_IGNORED) && - !ce_excluded(dir, repo->index, fullname.buf, ce)) - continue; @@ builtin/ls-files.c: static void show_files(struct repository *repo, struct dir_s - ce_stage(ce) ? tag_unmerged : - (ce_skip_worktree(ce) ? tag_skip_worktree : - tag_cached)); -+ if ((dir->flags & DIR_SHOW_IGNORED) && -+ !ce_excluded(dir, repo->index, fullname.buf, ce)) -+ continue; -+ if (ce->ce_flags & CE_UPDATE) -+ continue; -+ if (show_cached || show_stage) { -+ if (!show_unmerged || ce_stage(ce)) -+ show_ce(repo, dir, ce, fullname.buf, -+ ce_stage(ce) ? tag_unmerged : -+ (ce_skip_worktree(ce) ? tag_skip_worktree : -+ tag_cached)); - } +- } - } - if (show_deleted || show_modified) { - for (i = 0; i < repo->index->cache_nr; i++) { - const struct cache_entry *ce = repo->index->cache[i]; - struct stat st; -- int err; -- +- int stat_err; ++ if (! (show_cached || show_stage || show_deleted || show_modified)) ++ return; ++ for (i = 0; i < repo->index->cache_nr; i++) { ++ const struct cache_entry *ce = repo->index->cache[i]; ++ struct stat st; ++ int stat_err; + - construct_fullname(&fullname, repo, ce); -- ++ construct_fullname(&fullname, repo, ce); + - if ((dir->flags & DIR_SHOW_IGNORED) && - !ce_excluded(dir, repo->index, fullname.buf, ce)) - continue; @@ builtin/ls-files.c: static void show_files(struct repository *repo, struct dir_s - continue; - if (ce_skip_worktree(ce)) - continue; -- err = lstat(fullname.buf, &st); -- if (err) { -- if (errno != ENOENT && errno != ENOTDIR) -- error_errno("cannot lstat '%s'", fullname.buf); -- if (show_deleted) -- show_ce(repo, dir, ce, fullname.buf, tag_removed); -- if (show_modified) +- stat_err = lstat(fullname.buf, &st); +- if (stat_err && (errno != ENOENT && errno != ENOTDIR)) +- error_errno("cannot lstat '%s'", fullname.buf); +- if (stat_err && show_deleted) +- show_ce(repo, dir, ce, fullname.buf, tag_removed); +- if (show_modified && +- (stat_err || ie_modified(repo->index, ce, &st, 0))) - show_ce(repo, dir, ce, fullname.buf, tag_modified); -- } else if (show_modified && ie_modified(repo->index, ce, &st, 0)) ++ if ((dir->flags & DIR_SHOW_IGNORED) && ++ !ce_excluded(dir, repo->index, fullname.buf, ce)) ++ continue; ++ if (ce->ce_flags & CE_UPDATE) ++ continue; ++ if (show_cached || show_stage) { ++ if (!show_unmerged || ce_stage(ce)) ++ show_ce(repo, dir, ce, fullname.buf, ++ ce_stage(ce) ? tag_unmerged : ++ (ce_skip_worktree(ce) ? tag_skip_worktree : ++ tag_cached)); + } ++ if (!show_deleted && !show_modified) ++ continue; + if (ce_skip_worktree(ce)) + continue; -+ err = lstat(fullname.buf, &st); -+ if (err) { -+ if (errno != ENOENT && errno != ENOTDIR) -+ error_errno("cannot lstat '%s'", fullname.buf); -+ if (show_deleted) -+ show_ce(repo, dir, ce, fullname.buf, tag_removed); -+ if (show_modified) - show_ce(repo, dir, ce, fullname.buf, tag_modified); -- } -+ } else if (show_modified && ie_modified(repo->index, ce, &st, 0)) -+ show_ce(repo, dir, ce, fullname.buf, tag_modified); ++ stat_err = lstat(fullname.buf, &st); ++ if (stat_err && (errno != ENOENT && errno != ENOTDIR)) ++ error_errno("cannot lstat '%s'", fullname.buf); ++ if (stat_err && show_deleted) ++ show_ce(repo, dir, ce, fullname.buf, tag_removed); ++ if (show_modified && ++ (stat_err || ie_modified(repo->index, ce, &st, 0))) ++ show_ce(repo, dir, ce, fullname.buf, tag_modified); } strbuf_release(&fullname); 3: e9c53186706 ! 3: 07b603fd97c ls-files.c: add --deduplicate option @@ builtin/ls-files.c: static int line_terminator = '\n'; static const char *prefix; static int max_prefix_len; @@ builtin/ls-files.c: static void show_files(struct repository *repo, struct dir_struct *dir) - { - int i; - struct strbuf fullname = STRBUF_INIT; -+ const struct cache_entry *last_shown_ce; - - /* For cached/deleted files we don't need to even do the readdir */ - if (show_others || show_killed) { -@@ builtin/ls-files.c: static void show_files(struct repository *repo, struct dir_struct *dir) - } - if (! (show_cached || show_stage || show_deleted || show_modified)) - return; -+ last_shown_ce = NULL; - for (i = 0; i < repo->index->cache_nr; i++) { - const struct cache_entry *ce = repo->index->cache[i]; - struct stat st; -@@ builtin/ls-files.c: static void show_files(struct repository *repo, struct dir_struct *dir) - - construct_fullname(&fullname, repo, ce); - -+ if (skipping_duplicates && last_shown_ce && -+ !strcmp(last_shown_ce->name,ce->name)) -+ continue; - if ((dir->flags & DIR_SHOW_IGNORED) && - !ce_excluded(dir, repo->index, fullname.buf, ce)) continue; if (ce->ce_flags & CE_UPDATE) continue; - if (show_cached || show_stage) { -+ if (skipping_duplicates && last_shown_ce && -+ !strcmp(last_shown_ce->name,ce->name)) -+ continue; - if (!show_unmerged || ce_stage(ce)) +- if (show_cached || show_stage) { +- if (!show_unmerged || ce_stage(ce)) ++ if ((show_cached || show_stage) && ++ (!show_unmerged || ce_stage(ce))) { show_ce(repo, dir, ce, fullname.buf, ce_stage(ce) ? tag_unmerged : (ce_skip_worktree(ce) ? tag_skip_worktree : tag_cached)); -+ if (show_cached && skipping_duplicates) -+ last_shown_ce = ce; ++ if (skipping_duplicates) ++ goto skip_to_next_name; } - if (ce_skip_worktree(ce)) + if (!show_deleted && !show_modified) continue; -+ if (skipping_duplicates && last_shown_ce && -+ !strcmp(last_shown_ce->name,ce->name)) -+ continue; - err = lstat(fullname.buf, &st); - if (err) { -- if (errno != ENOENT && errno != ENOTDIR) -- error_errno("cannot lstat '%s'", fullname.buf); -- if (show_deleted) -+ if (skipping_duplicates && show_deleted && show_modified) - show_ce(repo, dir, ce, fullname.buf, tag_removed); -- if (show_modified) -- show_ce(repo, dir, ce, fullname.buf, tag_modified); -+ else { -+ if (errno != ENOENT && errno != ENOTDIR) -+ error_errno("cannot lstat '%s'", fullname.buf); -+ if (show_deleted) -+ show_ce(repo, dir, ce, fullname.buf, tag_removed); -+ if (show_modified) -+ show_ce(repo, dir, ce, fullname.buf, tag_modified); -+ } - } else if (show_modified && ie_modified(repo->index, ce, &st, 0)) - show_ce(repo, dir, ce, fullname.buf, tag_modified); -+ last_shown_ce = ce; +@@ builtin/ls-files.c: static void show_files(struct repository *repo, struct dir_struct *dir) + stat_err = lstat(fullname.buf, &st); + if (stat_err && (errno != ENOENT && errno != ENOTDIR)) + error_errno("cannot lstat '%s'", fullname.buf); +- if (stat_err && show_deleted) ++ if (stat_err && show_deleted) { + show_ce(repo, dir, ce, fullname.buf, tag_removed); ++ if (skipping_duplicates) ++ goto skip_to_next_name; ++ } + if (show_modified && +- (stat_err || ie_modified(repo->index, ce, &st, 0))) ++ (stat_err || ie_modified(repo->index, ce, &st, 0))) { + show_ce(repo, dir, ce, fullname.buf, tag_modified); ++ if (skipping_duplicates) ++ goto skip_to_next_name; ++ } ++ continue; ++skip_to_next_name: ++ { ++ int j; ++ struct cache_entry **cache = repo->index->cache; ++ for (j = i + 1; j < repo->index->cache_nr; j++) ++ if (strcmp(ce->name, cache[j]->name)) ++ break; ++ i = j - 1; /* compensate for outer for loop */ ++ } } strbuf_release(&fullname); -- gitgitgadget