[PATCH v2 0/1] unpack-trees: skip stat on fsmonitor-valid files

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The index might be aware that a file hasn't modified via fsmonitor, but
unpack-trees did not pay attention to it and checked via ie_match_stat which
can be inefficient on certain filesystems. This significantly slows down
commands that run oneway_merge, like checkout and reset --hard.

This patch makes oneway_merge check whether a file is considered unchanged
through fsmonitor and skips ie_match_stat on it. unpack-trees also now
correctly copies over fsmonitor validity state from the source index.
Finally, for correctness, we force a refresh of fsmonitor state in
tweak_fsmonitor.

After this change, commands like stash (that use reset --hard internally) go
from 8s or more to ~2s on a 250k file repository on a mac.

Signed-off-by: Utsav Shah utsav@xxxxxxxxxxx [utsav@xxxxxxxxxxx]

Utsav Shah (1):
  unpack-trees: skip stat on fsmonitor-valid files

 fsmonitor.c                       | 20 +++++++++++---------
 t/t7113-post-index-change-hook.sh |  3 ---
 t/t7519-status-fsmonitor.sh       |  9 +++++++--
 unpack-trees.c                    |  6 +++++-
 4 files changed, 23 insertions(+), 15 deletions(-)


base-commit: 566a1439f6f56c2171b8853ddbca0ad3f5098770
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-424%2FUtsav2%2Fskip-lstat-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-424/Utsav2/skip-lstat-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/424

Range-diff vs v1:

 1:  609c7c5047 ! 1:  f76ba554ed unpack-trees: skip lstat based on fsmonitor
     @@ -1,24 +1,147 @@
      Author: Utsav Shah <utsav@xxxxxxxxxxx>
      
     -    unpack-trees: skip lstat based on fsmonitor
     +    unpack-trees: skip stat on fsmonitor-valid files
      
     -    git stash runs git reset --hard as its final step, which can be fairly slow on a large repository.
     -    This change lets us skip that if fsmonitor thinks those files aren't modified.
     +    The index might be aware that a file hasn't modified via fsmonitor, but
     +    unpack-trees did not pay attention to it and checked via ie_match_stat
     +    which can be inefficient on certain filesystems. This significantly slows
     +    down commands that run oneway_merge, like checkout and reset --hard.
      
     -    git stash goes from ~8s -> 2s on my repository after this change.
     +    This patch makes oneway_merge check whether a file is considered
     +    unchanged through fsmonitor and skips ie_match_stat on it. unpack-trees
     +    also now correctly copies over fsmonitor validity state from the source
     +    index. Finally, for correctness, we force a refresh of fsmonitor state in
     +    tweak_fsmonitor.
     +
     +    After this change, commands like stash (that use reset --hard
     +    internally) go from 8s or more to ~2s on a 250k file repository on a
     +    mac.
      
          Signed-off-by: Utsav Shah <utsav@xxxxxxxxxxx>
      
     + diff --git a/fsmonitor.c b/fsmonitor.c
     + --- a/fsmonitor.c
     + +++ b/fsmonitor.c
     +@@
     + 	}
     + 	istate->fsmonitor_dirty = fsmonitor_dirty;
     + 
     +-	if (istate->fsmonitor_dirty->bit_size > istate->cache_nr)
     +-		BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
     +-		    (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr);
     ++	if (!istate->split_index && istate->fsmonitor_dirty->bit_size > istate->cache_nr)
     ++		BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %"PRIuMAX")",
     ++		    (uintmax_t)istate->fsmonitor_dirty->bit_size, (uintmax_t)istate->cache_nr);
     ++
     + 
     + 	trace_printf_key(&trace_fsmonitor, "read fsmonitor extension successful");
     + 	return 0;
     +@@
     + 	uint32_t ewah_size = 0;
     + 	int fixup = 0;
     + 
     +-	if (istate->fsmonitor_dirty->bit_size > istate->cache_nr)
     +-		BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
     +-		    (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr);
     ++	if (!istate->split_index && istate->fsmonitor_dirty->bit_size > istate->cache_nr)
     ++		BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %"PRIuMAX")",
     ++		    (uintmax_t)istate->fsmonitor_dirty->bit_size, (uintmax_t)istate->cache_nr);
     + 
     + 	put_be32(&hdr_version, INDEX_EXTENSION_VERSION);
     + 	strbuf_add(sb, &hdr_version, sizeof(uint32_t));
     +@@
     + 		}
     + 		if (bol < query_result.len)
     + 			fsmonitor_refresh_callback(istate, buf + bol);
     ++
     ++		if (istate->untracked)
     ++			istate->untracked->use_fsmonitor = 1;
     + 	} else {
     + 		/* Mark all entries invalid */
     + 		for (i = 0; i < istate->cache_nr; i++)
     +@@
     + 				    (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr);
     + 			ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate);
     + 
     +-			/* Now mark the untracked cache for fsmonitor usage */
     +-			if (istate->untracked)
     +-				istate->untracked->use_fsmonitor = 1;
     ++			refresh_fsmonitor(istate);
     + 		}
     + 
     + 		ewah_free(istate->fsmonitor_dirty);
     +
     + diff --git a/t/t7113-post-index-change-hook.sh b/t/t7113-post-index-change-hook.sh
     + --- a/t/t7113-post-index-change-hook.sh
     + +++ b/t/t7113-post-index-change-hook.sh
     +@@
     + 	git checkout -- dir1/file1.txt &&
     + 	test_path_is_file testsuccess && rm -f testsuccess &&
     + 	test_path_is_missing testfailure &&
     +-	git update-index &&
     +-	test_path_is_missing testsuccess &&
     +-	test_path_is_missing testfailure &&
     + 	git reset --soft &&
     + 	test_path_is_missing testsuccess &&
     + 	test_path_is_missing testfailure
     +
     + diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
     + --- a/t/t7519-status-fsmonitor.sh
     + +++ b/t/t7519-status-fsmonitor.sh
     +@@
     + 
     + # test that "update-index --fsmonitor-valid" sets the fsmonitor valid bit
     + test_expect_success 'update-index --fsmonitor-valid" sets the fsmonitor valid bit' '
     ++	write_script .git/hooks/fsmonitor-test<<-\EOF &&
     ++	EOF
     + 	git update-index --fsmonitor &&
     + 	git update-index --fsmonitor-valid dir1/modified &&
     + 	git update-index --fsmonitor-valid dir2/modified &&
     +@@
     + 
     + # test that newly added files are marked valid
     + test_expect_success 'newly added files are marked valid' '
     ++	write_script .git/hooks/fsmonitor-test<<-\EOF &&
     ++	EOF
     + 	git add new &&
     + 	git add dir1/new &&
     + 	git add dir2/new &&
     +@@
     + # Ensure commands that call refresh_index() to move the index back in time
     + # properly invalidate the fsmonitor cache
     + test_expect_success 'refresh_index() invalidates fsmonitor cache' '
     +-	write_script .git/hooks/fsmonitor-test<<-\EOF &&
     +-	EOF
     + 	clean_repo &&
     ++	write_integration_script &&
     + 	dirty_repo &&
     + 	git add . &&
     ++	write_script .git/hooks/fsmonitor-test<<-\EOF &&
     ++	EOF
     + 	git commit -m "to reset" &&
     + 	git reset HEAD~1 &&
     + 	git status >actual &&
     +
       diff --git a/unpack-trees.c b/unpack-trees.c
       --- a/unpack-trees.c
       +++ b/unpack-trees.c
      @@
     + 	o->merge_size = len;
     + 	mark_all_ce_unused(o->src_index);
     + 
     ++	if (o->src_index->fsmonitor_last_update)
     ++		o->result.fsmonitor_last_update = o->src_index->fsmonitor_last_update;
     ++
     + 	/*
     + 	 * Sparse checkout loop #1: set NEW_SKIP_WORKTREE on existing entries
     + 	 */
     +@@
       
       	if (old && same(old, a)) {
       		int update = 0;
      -		if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old)) {
      +		if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old) &&
     -+		    !(old->ce_flags & CE_FSMONITOR_VALID)) {
     ++			!(old->ce_flags & CE_FSMONITOR_VALID)) {
       			struct stat st;
       			if (lstat(old->name, &st) ||
       			    ie_match_stat(o->src_index, old, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE))

-- 
gitgitgadget



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux