Shaoxuan Yuan wrote: > Originally, moving a <source> directory which is not on-disk due > to its existence outside of sparse-checkout cone, "giv mv" command > errors out with "bad source". > > Add a helper check_dir_in_index() function to see if a directory > name exists in the index. Also add a SKIP_WORKTREE_DIR bit to mark > such directories. > > Change the checking logic, so that such <source> directory makes > "giv mv" command warns with "advise_on_updating_sparse_paths()" > instead of "bad source"; also user now can supply a "--sparse" flag so > this operation can be carried out successfully. > > Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@xxxxxxxxx> > --- > builtin/mv.c | 49 +++++++++++++++++++++++++++++++---- > t/t7002-mv-sparse-checkout.sh | 4 +-- > 2 files changed, 46 insertions(+), 7 deletions(-) > > diff --git a/builtin/mv.c b/builtin/mv.c > index 7ce7992d6c..cb3441c7cb 100644 > --- a/builtin/mv.c > +++ b/builtin/mv.c > @@ -123,6 +123,37 @@ static int index_range_of_same_dir(const char *src, int length, > return last - first; > } > > +/* > + * Check if an out-of-cone directory should be in the index. Imagine this case > + * that all the files under a directory are marked with 'CE_SKIP_WORKTREE' bit > + * and thus the directory is sparsified.> + * > + * Return 0 if such directory exist (i.e. with any of its contained files not > + * marked with CE_SKIP_WORKTREE, the directory would be present in working tree). > + * Return 1 otherwise. > + */ This explanation is helpful in clarifying that you don't mean *sparse directories* (that is, directory entries in a sparse index), you mean directories whose contents are all sparse. It's a tricky distinction, but you handled it nicely here. > +static int check_dir_in_index(const char *name, int namelen) > +{ > + int ret = 1; > + const char *with_slash = add_slash(name); > + int length = namelen + 1; > + > + int pos = cache_name_pos(with_slash, length); > + const struct cache_entry *ce; > + > + if (pos < 0) { > + pos = -pos - 1; > + if (pos >= the_index.cache_nr) > + return ret; > + ce = active_cache[pos]; > + if (strncmp(with_slash, ce->name, length)) > + return ret; > + if (ce_skip_worktree(ce)) > + return ret = 0; > + } > + return ret; The way 'ret' is handled here is a bit difficult to follow. Would you be opposed to returning hardcoded '0' or '1', rather than changing the value of 'ret' throughout? Something like: static int check_dir_in_index(const char *name, int namelen) { int pos, length = namelen + 1; const struct cache_entry *ce; const char *with_slash = add_slash(name); pos = cache_name_pos(with_slash, length); if (pos < 0) { pos = -pos - 1; if (pos >= the_index.cache_nr) return 1; ce = active_cache[pos]; if (strncmp(with_slash, ce->name, length)) return 1; if (ce_skip_worktree(ce)) return 0; } return 1; } > +} > + > int cmd_mv(int argc, const char **argv, const char *prefix) > { > int i, flags, gitmodules_modified = 0; > @@ -184,7 +215,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) > /* Checking */ > for (i = 0; i < argc; i++) { > const char *src = source[i], *dst = destination[i]; > - int length, src_is_dir; > + int length; > const char *bad = NULL; > int skip_sparse = 0; > > @@ -198,12 +229,17 @@ int cmd_mv(int argc, const char **argv, const char *prefix) > > pos = cache_name_pos(src, length); > if (pos < 0) { > + const char *src_w_slash = add_slash(src); > + if (!check_dir_in_index(src, length) && > + !path_in_sparse_checkout(src_w_slash, &the_index)) { In checks like these, the less "expensive" one should come first (so that if it returns 'false', we completely skip the more expensive one). Since 'check_dir_in_index()' requires binary searching the index, it's likely to be more expensive than 'path_in_sparse_checkout()', so the condition order should be flipped: if (!path_in_sparse_checkout(src_w_slash, &the_index) && !check_dir_in_index(src, length)) { Also nit: alignment (more details on why/how in my last message [1]). [1] https://lore.kernel.org/git/01b39c63-5652-4293-0424-ff99b6f9f7d2@xxxxxxxxxx/ > + modes[i] |= SKIP_WORKTREE_DIR; > + goto dir_check; > + } > /* only error if existence is expected. */ > if (!(modes[i] & SPARSE)) > bad = _("bad source"); > goto act_on_entry; > } > - > ce = active_cache[pos]; > if (!ce_skip_worktree(ce)) { > bad = _("bad source"); > @@ -230,14 +266,17 @@ int cmd_mv(int argc, const char **argv, const char *prefix) > bad = _("can not move directory into itself"); > goto act_on_entry; > } > - if ((src_is_dir = S_ISDIR(st.st_mode)) > + if (S_ISDIR(st.st_mode) > && lstat(dst, &st) == 0) { > bad = _("cannot move directory over file"); > goto act_on_entry; > } > - if (src_is_dir) { > + > +dir_check: > + if (S_ISDIR(st.st_mode)) { > int j, dst_len, n; > - int first = cache_name_pos(src, length), last; > + int first, last; > + first = cache_name_pos(src, length); Super-nit: why did this line change? It looks like it just rearranges the lines for no functional purpose. > > if (first >= 0) { > prepare_move_submodule(src, first, > diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh > index 5b61fbad5f..30e13b9979 100755 > --- a/t/t7002-mv-sparse-checkout.sh > +++ b/t/t7002-mv-sparse-checkout.sh > @@ -219,7 +219,7 @@ test_expect_success 'refuse to move file to non-skip-worktree sparse path' ' > test_cmp expect stderr > ' > > -test_expect_failure 'refuse to move out-of-cone directory without --sparse' ' > +test_expect_success 'refuse to move out-of-cone directory without --sparse' ' > test_when_finished "cleanup_sparse_checkout" && > setup_sparse_checkout && > > @@ -230,7 +230,7 @@ test_expect_failure 'refuse to move out-of-cone directory without --sparse' ' > test_cmp expect stderr > ' > > -test_expect_failure 'can move out-of-cone directory with --sparse' ' > +test_expect_success 'can move out-of-cone directory with --sparse' ' > test_when_finished "cleanup_sparse_checkout" && > setup_sparse_checkout && >