The index might be aware that a file hasn't modified via fsmonitor, but unpack-trees did not pay attention to it and checked via ie_match_stat which can be inefficient on certain filesystems. This significantly slows down commands that run oneway_merge, like checkout and reset --hard. This patch makes oneway_merge check whether a file is considered unchanged through fsmonitor and skips ie_match_stat on it. unpack-trees also now correctly copies over fsmonitor validity state from the source index. Finally, for correctness, we force a refresh of fsmonitor state in tweak_fsmonitor. After this change, commands like stash (that use reset --hard internally) go from 8s or more to ~2s on a 250k file repository on a mac. Signed-off-by: Utsav Shah utsav@xxxxxxxxxxx [utsav@xxxxxxxxxxx] Utsav Shah (1): unpack-trees: skip stat on fsmonitor-valid files fsmonitor.c | 20 +++++++++++--------- t/t7113-post-index-change-hook.sh | 3 --- t/t7519-status-fsmonitor.sh | 9 +++++++-- unpack-trees.c | 6 +++++- 4 files changed, 23 insertions(+), 15 deletions(-) base-commit: 566a1439f6f56c2171b8853ddbca0ad3f5098770 Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-424%2FUtsav2%2Fskip-lstat-v2 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-424/Utsav2/skip-lstat-v2 Pull-Request: https://github.com/gitgitgadget/git/pull/424 Range-diff vs v1: 1: 609c7c5047 ! 1: f76ba554ed unpack-trees: skip lstat based on fsmonitor @@ -1,24 +1,147 @@ Author: Utsav Shah <utsav@xxxxxxxxxxx> - unpack-trees: skip lstat based on fsmonitor + unpack-trees: skip stat on fsmonitor-valid files - git stash runs git reset --hard as its final step, which can be fairly slow on a large repository. - This change lets us skip that if fsmonitor thinks those files aren't modified. + The index might be aware that a file hasn't modified via fsmonitor, but + unpack-trees did not pay attention to it and checked via ie_match_stat + which can be inefficient on certain filesystems. This significantly slows + down commands that run oneway_merge, like checkout and reset --hard. - git stash goes from ~8s -> 2s on my repository after this change. + This patch makes oneway_merge check whether a file is considered + unchanged through fsmonitor and skips ie_match_stat on it. unpack-trees + also now correctly copies over fsmonitor validity state from the source + index. Finally, for correctness, we force a refresh of fsmonitor state in + tweak_fsmonitor. + + After this change, commands like stash (that use reset --hard + internally) go from 8s or more to ~2s on a 250k file repository on a + mac. Signed-off-by: Utsav Shah <utsav@xxxxxxxxxxx> + diff --git a/fsmonitor.c b/fsmonitor.c + --- a/fsmonitor.c + +++ b/fsmonitor.c +@@ + } + istate->fsmonitor_dirty = fsmonitor_dirty; + +- if (istate->fsmonitor_dirty->bit_size > istate->cache_nr) +- BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)", +- (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr); ++ if (!istate->split_index && istate->fsmonitor_dirty->bit_size > istate->cache_nr) ++ BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %"PRIuMAX")", ++ (uintmax_t)istate->fsmonitor_dirty->bit_size, (uintmax_t)istate->cache_nr); ++ + + trace_printf_key(&trace_fsmonitor, "read fsmonitor extension successful"); + return 0; +@@ + uint32_t ewah_size = 0; + int fixup = 0; + +- if (istate->fsmonitor_dirty->bit_size > istate->cache_nr) +- BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)", +- (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr); ++ if (!istate->split_index && istate->fsmonitor_dirty->bit_size > istate->cache_nr) ++ BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %"PRIuMAX")", ++ (uintmax_t)istate->fsmonitor_dirty->bit_size, (uintmax_t)istate->cache_nr); + + put_be32(&hdr_version, INDEX_EXTENSION_VERSION); + strbuf_add(sb, &hdr_version, sizeof(uint32_t)); +@@ + } + if (bol < query_result.len) + fsmonitor_refresh_callback(istate, buf + bol); ++ ++ if (istate->untracked) ++ istate->untracked->use_fsmonitor = 1; + } else { + /* Mark all entries invalid */ + for (i = 0; i < istate->cache_nr; i++) +@@ + (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr); + ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate); + +- /* Now mark the untracked cache for fsmonitor usage */ +- if (istate->untracked) +- istate->untracked->use_fsmonitor = 1; ++ refresh_fsmonitor(istate); + } + + ewah_free(istate->fsmonitor_dirty); + + diff --git a/t/t7113-post-index-change-hook.sh b/t/t7113-post-index-change-hook.sh + --- a/t/t7113-post-index-change-hook.sh + +++ b/t/t7113-post-index-change-hook.sh +@@ + git checkout -- dir1/file1.txt && + test_path_is_file testsuccess && rm -f testsuccess && + test_path_is_missing testfailure && +- git update-index && +- test_path_is_missing testsuccess && +- test_path_is_missing testfailure && + git reset --soft && + test_path_is_missing testsuccess && + test_path_is_missing testfailure + + diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh + --- a/t/t7519-status-fsmonitor.sh + +++ b/t/t7519-status-fsmonitor.sh +@@ + + # test that "update-index --fsmonitor-valid" sets the fsmonitor valid bit + test_expect_success 'update-index --fsmonitor-valid" sets the fsmonitor valid bit' ' ++ write_script .git/hooks/fsmonitor-test<<-\EOF && ++ EOF + git update-index --fsmonitor && + git update-index --fsmonitor-valid dir1/modified && + git update-index --fsmonitor-valid dir2/modified && +@@ + + # test that newly added files are marked valid + test_expect_success 'newly added files are marked valid' ' ++ write_script .git/hooks/fsmonitor-test<<-\EOF && ++ EOF + git add new && + git add dir1/new && + git add dir2/new && +@@ + # Ensure commands that call refresh_index() to move the index back in time + # properly invalidate the fsmonitor cache + test_expect_success 'refresh_index() invalidates fsmonitor cache' ' +- write_script .git/hooks/fsmonitor-test<<-\EOF && +- EOF + clean_repo && ++ write_integration_script && + dirty_repo && + git add . && ++ write_script .git/hooks/fsmonitor-test<<-\EOF && ++ EOF + git commit -m "to reset" && + git reset HEAD~1 && + git status >actual && + diff --git a/unpack-trees.c b/unpack-trees.c --- a/unpack-trees.c +++ b/unpack-trees.c @@ + o->merge_size = len; + mark_all_ce_unused(o->src_index); + ++ if (o->src_index->fsmonitor_last_update) ++ o->result.fsmonitor_last_update = o->src_index->fsmonitor_last_update; ++ + /* + * Sparse checkout loop #1: set NEW_SKIP_WORKTREE on existing entries + */ +@@ if (old && same(old, a)) { int update = 0; - if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old)) { + if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old) && -+ !(old->ce_flags & CE_FSMONITOR_VALID)) { ++ !(old->ce_flags & CE_FSMONITOR_VALID)) { struct stat st; if (lstat(old->name, &st) || ie_match_stat(o->src_index, old, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE)) -- gitgitgadget