In a partial clone, whenever Git needs a missing object, it will fetch it from the repo's promisor remote(s), sometimes as part of a bulk prefetch. Currently, if the server handling such fetches does not accept requests for objects unless the objects are reachable, it needs to compute reachability from all refs. This can be improved by sending the commit from which the prefetch request came - a server that understands this would then only need to check that this commit is reachable, and check that the object is reachable from that commit. Therefore, teach the partial clone fetching mechanism to support a "provenance" argument, and plumb the commit provenance from checkout to the partial clone fetching mechanism. In the future, other commands can be similarly upgraded. Other possible future improvements include better diagnostic messages when a prefetch fails. Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx> --- Sending a commit hint in this situation is something we've been discussing at $DAYJOB, so here's a preliminary version - the client side is done, but neither the server side nor the exact protocol details are finished. Protocol-wise, in this patch, I'm just sending the provenance commit as a server option. This essentially splits reachability-of-blob, which almost certainly requires loading a bitmap, into 2 parts: reachability-of-commit (which, from my limited experience, can be more quickly done using a regular object walk) and reachability-of-blob-from-commit (which, at worst, requires fewer bitmaps to be loaded). I don't have timings for how it works in practice, though. I'm also not sure if the final version should have the client declare that all blobs are reachable from the root tree(s) of the provenance commit(s), or merely that all blobs are reachable from the provenance commit(s) (that is, including their ancestors). I'm sending this out early in the hope that other people using partial clone will chime in. --- builtin/checkout.c | 4 ++++ builtin/index-pack.c | 2 +- builtin/pack-objects.c | 2 +- diff.c | 2 +- diffcore-rename.c | 2 +- promisor-remote.c | 12 +++++++++--- promisor-remote.h | 3 ++- sha1-file.c | 2 +- t/t5616-partial-clone.sh | 7 +++++-- unpack-trees.c | 3 ++- unpack-trees.h | 7 +++++++ 11 files changed, 34 insertions(+), 12 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index 9b82119129..7f9fd65808 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -634,6 +634,8 @@ static int reset_tree(struct tree *tree, const struct checkout_opts *o, opts.verbose_update = o->show_progress; opts.src_index = &the_index; opts.dst_index = &the_index; + opts.provenance_commit = + info->commit ? &info->commit->object.oid : NULL; init_checkout_metadata(&opts.meta, info->refname, info->commit ? &info->commit->object.oid : &null_oid, NULL); @@ -724,6 +726,8 @@ static int merge_working_tree(const struct checkout_opts *opts, topts.quiet = opts->merge && old_branch_info->commit; topts.verbose_update = opts->show_progress; topts.fn = twoway_merge; + topts.provenance_commit = new_branch_info->commit ? + &new_branch_info->commit->object.oid : NULL; init_checkout_metadata(&topts.meta, new_branch_info->refname, new_branch_info->commit ? &new_branch_info->commit->object.oid : diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 4b8d86e0ad..83801826d6 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1402,7 +1402,7 @@ static void fix_unresolved_deltas(struct hashfile *f) oid_array_append(&to_fetch, &d->oid); } promisor_remote_get_direct(the_repository, - to_fetch.oid, to_fetch.nr); + to_fetch.oid, to_fetch.nr, NULL); oid_array_clear(&to_fetch); } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5617c01b5a..741dcc9a24 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1720,7 +1720,7 @@ static void prefetch_to_pack(uint32_t object_index_start) { oid_array_append(&to_fetch, &entry->idx.oid); } promisor_remote_get_direct(the_repository, - to_fetch.oid, to_fetch.nr); + to_fetch.oid, to_fetch.nr, NULL); oid_array_clear(&to_fetch); } diff --git a/diff.c b/diff.c index 643f4f3f6d..68491fc398 100644 --- a/diff.c +++ b/diff.c @@ -6622,7 +6622,7 @@ void diff_queued_diff_prefetch(void *repository) /* * NEEDSWORK: Consider deduplicating the OIDs sent. */ - promisor_remote_get_direct(repo, to_fetch.oid, to_fetch.nr); + promisor_remote_get_direct(repo, to_fetch.oid, to_fetch.nr, NULL); oid_array_clear(&to_fetch); } diff --git a/diffcore-rename.c b/diffcore-rename.c index d367a6d244..ebc65fa272 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -161,7 +161,7 @@ static void prefetch(void *prefetch_options) diff_add_if_missing(options->repo, &to_fetch, rename_src[i].p->one); } - promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr); + promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr, NULL); oid_array_clear(&to_fetch); } diff --git a/promisor-remote.c b/promisor-remote.c index 3c572b1c81..ee44d1334e 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -14,7 +14,8 @@ void set_repository_format_partial_clone(char *partial_clone) static int fetch_objects(const char *remote_name, const struct object_id *oids, - int oid_nr) + int oid_nr, + struct object_id *provenance_commit) { struct child_process child = CHILD_PROCESS_INIT; int i; @@ -26,6 +27,9 @@ static int fetch_objects(const char *remote_name, "fetch", remote_name, "--no-tags", "--no-write-fetch-head", "--recurse-submodules=no", "--filter=blob:none", "--stdin", NULL); + if (provenance_commit) + strvec_pushf(&child.args, "--server-option=provenance=%s", + oid_to_hex(provenance_commit)); if (start_command(&child)) die(_("promisor-remote: unable to fork off fetch subprocess")); child_in = xfdopen(child.in, "w"); @@ -224,7 +228,8 @@ static int remove_fetched_oids(struct repository *repo, int promisor_remote_get_direct(struct repository *repo, const struct object_id *oids, - int oid_nr) + int oid_nr, + struct object_id *provenance_commit) { struct promisor_remote *r; struct object_id *remaining_oids = (struct object_id *)oids; @@ -238,7 +243,8 @@ int promisor_remote_get_direct(struct repository *repo, promisor_remote_init(); for (r = promisors; r; r = r->next) { - if (fetch_objects(r->name, remaining_oids, remaining_nr) < 0) { + if (fetch_objects(r->name, remaining_oids, remaining_nr, + provenance_commit) < 0) { if (remaining_nr == 1) continue; remaining_nr = remove_fetched_oids(repo, &remaining_oids, diff --git a/promisor-remote.h b/promisor-remote.h index c7a14063c5..44d1b152e6 100644 --- a/promisor-remote.h +++ b/promisor-remote.h @@ -30,7 +30,8 @@ int has_promisor_remote(void); */ int promisor_remote_get_direct(struct repository *repo, const struct object_id *oids, - int oid_nr); + int oid_nr, + struct object_id *provenance_commit); /* * This should be used only once from setup.c to set the value we got diff --git a/sha1-file.c b/sha1-file.c index c3c49d2fa5..51ed339a0c 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1534,7 +1534,7 @@ static int do_oid_object_info_extended(struct repository *r, * promisor_remote_get_direct(), such that arbitrary * repositories work. */ - promisor_remote_get_direct(r, real, 1); + promisor_remote_get_direct(r, real, 1, NULL); already_retried = 1; continue; } diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 2ea66205cf..4b290060f5 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -56,11 +56,14 @@ test_expect_success 'verify that .promisor file contains refs fetched' ' # checkout master to force dynamic object fetch of blobs at HEAD. test_expect_success 'verify checkout with dynamic object fetch' ' + rm -f trace && git -C pc1 rev-list --quiet --objects --missing=print HEAD >observed && test_line_count = 4 observed && - git -C pc1 checkout master && + GIT_TRACE_PACKET="$(pwd)/trace" git -C pc1 checkout master && git -C pc1 rev-list --quiet --objects --missing=print HEAD >observed && - test_line_count = 0 observed + test_line_count = 0 observed && + HEAD_HASH="$(git -C pc1 rev-parse HEAD)" && + grep "server-option=provenance=$HEAD_HASH" trace ' # create new commits in "src" repo to establish a blame history on file.1.txt diff --git a/unpack-trees.c b/unpack-trees.c index 323280dd48..d5dc06a70a 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -457,7 +457,8 @@ static int check_updates(struct unpack_trees_options *o, oid_array_append(&to_fetch, &ce->oid); } promisor_remote_get_direct(the_repository, - to_fetch.oid, to_fetch.nr); + to_fetch.oid, to_fetch.nr, + o->provenance_commit); oid_array_clear(&to_fetch); } for (i = 0; i < index->cache_nr; i++) { diff --git a/unpack-trees.h b/unpack-trees.h index 2e87875b15..a6e126c39d 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -89,6 +89,13 @@ struct unpack_trees_options { struct pattern_list *pl; /* for internal use */ struct checkout_metadata meta; + + /* + * Optional: The commit that this request comes from. Only used when + * this repository is a partial clone; this is a hint to the server when + * prefetching is needed. + */ + struct object_id *provenance_commit; }; int unpack_trees(unsigned n, struct tree_desc *t, -- 2.29.2.684.gfbc64c5ab5-goog