At a checkpoint, fast-import resets all branches and tags on disk to the OID that we have in memory. If --force is not given, only branch resets that result in fast forwards with respect to the state on disk are allowed. With this approach, even for refs that fast-import has not been commanded to change for a long time (or ever), at each checkpoint, we will systematically reset them to the last state this process knows about, a state which may have been set before the previous checkpoint. Any changes made to these refs by a different process since the last checkpoint will be overwritten. 1> is one process, 2> is another: 1> $ git fast-import --force 1> reset refs/heads/master 1> from $A 1> checkpoint 2> $ git branch -f refs/heads/master $B 1> reset refs/heads/tip 1> from $C 1> checkpoint At this point master points again to $A. This problem is mitigated somewhat for branches when --force is not specified (the default), as requiring a fast forward means in practice that concurrent external changes are likely to be preserved. But it's not foolproof either: 1> $ git fast-import 1> reset refs/heads/master 1> from $A 1> checkpoint 2> $ git branch -f refs/heads/master refs/heads/master~1 1> reset refs/heads/tip 1> from $C 1> checkpoint At this point, master points again to $A, not $A~1 as requested by the second process. We now keep track of whether branches and tags have been changed by this fast-import process since our last checkpoint (or startup). At the next checkpoint, only refs and tags that new commands have changed are written to disk. --- fast-import.c | 14 +++++++++++ t/t9300-fast-import.sh | 53 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) Please let me know what you think of the change itself, before I spend some quality time bashing out a way to avoid using sleep(1) in the tests. Thanks. diff --git a/fast-import.c b/fast-import.c index 95600c78e048..d25be5c83110 100644 --- a/fast-import.c +++ b/fast-import.c @@ -250,6 +250,7 @@ struct branch { uintmax_t num_notes; unsigned active : 1; unsigned delete : 1; + unsigned changed : 1; unsigned pack_id : PACK_ID_BITS; struct object_id oid; }; @@ -257,6 +258,7 @@ struct branch { struct tag { struct tag *next_tag; const char *name; + unsigned changed : 1; unsigned int pack_id; struct object_id oid; }; @@ -728,6 +730,7 @@ static struct branch *new_branch(const char *name) b->branch_tree.versions[1].mode = S_IFDIR; b->num_notes = 0; b->active = 0; + b->changed = 0; b->pack_id = MAX_PACK_ID; branch_table[hc] = b; branch_count++; @@ -1715,6 +1718,10 @@ static int update_branch(struct branch *b) struct object_id old_oid; struct strbuf err = STRBUF_INIT; + if (!b->changed) + return 0; + b->changed = 0; + if (is_null_oid(&b->oid)) { if (b->delete) delete_ref(NULL, b->name, NULL, 0); @@ -1780,6 +1787,10 @@ static void dump_tags(void) goto cleanup; } for (t = first_tag; t; t = t->next_tag) { + if (!t->changed) + continue; + t->changed = 0; + strbuf_reset(&ref_name); strbuf_addf(&ref_name, "refs/tags/%s", t->name); @@ -2813,6 +2824,7 @@ static void parse_new_commit(const char *arg) if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark)) b->pack_id = pack_id; + b->changed = 1; b->last_commit = object_count_by_type[OBJ_COMMIT]; } @@ -2894,6 +2906,7 @@ static void parse_new_tag(const char *arg) t->pack_id = MAX_PACK_ID; else t->pack_id = pack_id; + t->changed = 1; } static void parse_reset_branch(const char *arg) @@ -2914,6 +2927,7 @@ static void parse_reset_branch(const char *arg) b = new_branch(arg); read_next_command(); parse_from(b); + b->changed = 1; if (command_buf.len > 0) unread_command_buf = 1; } diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 40fe7e49767a..548994dfbeb3 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -3123,6 +3123,9 @@ test_expect_success 'U: validate root delete result' ' # The commands in input_file should not produce any output on the file # descriptor set with --cat-blob-fd (or stdout if unspecified). +# +# If input_file2 is specified, sleep 2 seconds before writing it to fast-import +# stdin. # # To make sure you're observing the side effects of checkpoint *before* # fast-import terminates (and thus writes out its state), check that the @@ -3131,6 +3134,7 @@ test_expect_success 'U: validate root delete result' ' background_import_then_checkpoint () { options=$1 input_file=$2 + input_file2=$3 mkfifo V.input exec 8<>V.input @@ -3155,6 +3159,7 @@ background_import_then_checkpoint () { # because there is no reader on &9 yet. ( cat "$input_file" + [ -n "$input_file2" ] && sleep 2 && cat "$input_file2" echo "checkpoint" echo "progress checkpoint" ) >&8 & @@ -3262,4 +3267,52 @@ test_expect_success PIPE 'V: checkpoint updates tags after tag' ' background_import_still_running ' +test_expect_success PIPE 'V: checkpoint only resets changed branches' ' + cat >input <<-INPUT_END && + reset refs/heads/V3 + from refs/heads/U + + checkpoint + INPUT_END + + cat >input2 <<-INPUT_END && + reset refs/heads/V4 + from refs/heads/U + + INPUT_END + + background_import_then_checkpoint "" input input2 && + sleep 1 && + git branch -f V3 refs/heads/V && + sleep 2 && + test "$(git rev-parse --verify V3)" = "$(git rev-parse --verify V)" && + background_import_still_running +' + +test_expect_success PIPE 'V: checkpoint only updates changed tags' ' + cat >input <<-INPUT_END && + tag Vtag2 + from refs/heads/U + tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data 0 + + checkpoint + INPUT_END + + cat >input2 <<-INPUT_END && + tag Vtag3 + from refs/heads/U + tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data 0 + + INPUT_END + + background_import_then_checkpoint "" input input2 && + sleep 1 && + git tag -f Vtag2 refs/heads/V && + sleep 2 && + test "$(git rev-parse --verify Vtag2)" = "$(git rev-parse --verify V)" && + background_import_still_running +' + test_done -- 2.19.1