When fetching into a repository, a connectivity check is first made by check_exist_and_connected() in builtin/fetch.c that runs: git rev-list --objects --stdin --not --all --quiet <(list of objects) If the client repository has many refs, this command can be slow, regardless of the nature of the server repository or what is being fetched. A profiler reveals that most of the time is spent in setup_revisions() (approx. 60/63), and of the time spent in setup_revisions(), most of it is spent in parse_object() (approx. 49/60). This is because setup_revisions() parses the target of every ref (from "--all"), and parse_object() reads the buffer of the object. Reading the buffer is unnecessary if the repository has a commit graph and if the ref points to a commit (which is typically the case). This patch uses the commit graph wherever possible; on my computer, when I run the above command with a list of 1 object on a many-ref repository, I get a speedup from 1.8s to 1.0s. Another way to accomplish this effect would be to modify parse_object() to use the commit graph if possible; however, I did not want to change parse_object()'s current behavior of always checking the object signature of the returned object. Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx> --- This patch is now on master. v2 makes use of the optimization Stolee describes in [1], except that I have arranged the functions slightly differently. In particular, I didn't want to add even more ways to obtain objects, so I let parse_commit_in_graph() be able to take in either a commit shell or an OID, and did not create the parse_probably_commit() function he suggested. But I'm not really attached to this design choice, and can change it if requested. [1] https://public-inbox.org/git/aa0cd481-c135-47aa-2a69-e3dc71661caa@xxxxxxxxx/ --- commit-graph.c | 38 ++++++++++++++++++++++++++++---------- commit-graph.h | 12 ++++++++---- commit.c | 2 +- revision.c | 5 ++++- t/helper/test-repository.c | 4 ++-- 5 files changed, 43 insertions(+), 18 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 40c855f185..a571b523b7 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -286,7 +286,8 @@ void close_commit_graph(struct repository *r) r->objects->commit_graph = NULL; } -static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos) +static int bsearch_graph(struct commit_graph *g, const struct object_id *oid, + uint32_t *pos) { return bsearch_hash(oid->hash, g->chunk_oid_fanout, g->chunk_oid_lookup, g->hash_len, pos); @@ -374,24 +375,41 @@ static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uin } } -static int parse_commit_in_graph_one(struct commit_graph *g, struct commit *item) +static struct commit *parse_commit_in_graph_one(struct repository *r, + struct commit_graph *g, + struct commit *shell, + const struct object_id *oid) { uint32_t pos; - if (item->object.parsed) - return 1; + if (shell && shell->object.parsed) + return shell; - if (find_commit_in_graph(item, g, &pos)) - return fill_commit_in_graph(item, g, pos); + if (shell && shell->graph_pos != COMMIT_NOT_FROM_GRAPH) { + pos = shell->graph_pos; + } else if (bsearch_graph(g, shell ? &shell->object.oid : oid, &pos)) { + /* bsearch_graph sets pos */ + } else { + return NULL; + } - return 0; + if (!shell) { + shell = lookup_commit(r, oid); + if (!shell) + return NULL; + } + + fill_commit_in_graph(shell, g, pos); + return shell; } -int parse_commit_in_graph(struct repository *r, struct commit *item) +struct commit *parse_commit_in_graph(struct repository *r, struct commit *shell, + const struct object_id *oid) { if (!prepare_commit_graph(r)) return 0; - return parse_commit_in_graph_one(r->objects->commit_graph, item); + return parse_commit_in_graph_one(r, r->objects->commit_graph, shell, + oid); } void load_commit_graph_info(struct repository *r, struct commit *item) @@ -1025,7 +1043,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) } graph_commit = lookup_commit(r, &cur_oid); - if (!parse_commit_in_graph_one(g, graph_commit)) + if (!parse_commit_in_graph_one(r, g, graph_commit, NULL)) graph_report("failed to parse %s from commit-graph", oid_to_hex(&cur_oid)); } diff --git a/commit-graph.h b/commit-graph.h index 9db40b4d3a..8b7b5985dc 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -13,16 +13,20 @@ struct commit; char *get_commit_graph_filename(const char *obj_dir); /* - * Given a commit struct, try to fill the commit struct info, including: + * If the given commit (identified by shell->object.oid or oid) is in the + * commit graph, returns a commit struct (reusing shell if it is not NULL) + * including the following info: * 1. tree object * 2. date * 3. parents. * - * Returns 1 if and only if the commit was found in the packed graph. + * If not, returns NULL. See parse_commit_buffer() for the fallback after this + * call. * - * See parse_commit_buffer() for the fallback after this call. + * Either shell or oid must be non-NULL. If both are non-NULL, oid is ignored. */ -int parse_commit_in_graph(struct repository *r, struct commit *item); +struct commit *parse_commit_in_graph(struct repository *r, struct commit *shell, + const struct object_id *oid); /* * It is possible that we loaded commit contents from the commit buffer, diff --git a/commit.c b/commit.c index d13a7bc374..88eb580c5a 100644 --- a/commit.c +++ b/commit.c @@ -456,7 +456,7 @@ int parse_commit_internal(struct commit *item, int quiet_on_missing, int use_com return -1; if (item->object.parsed) return 0; - if (use_commit_graph && parse_commit_in_graph(the_repository, item)) + if (use_commit_graph && parse_commit_in_graph(the_repository, item, NULL)) return 0; buffer = read_object_file(&item->object.oid, &type, &size); if (!buffer) diff --git a/revision.c b/revision.c index 13e0519c02..05fddb5880 100644 --- a/revision.c +++ b/revision.c @@ -213,7 +213,10 @@ static struct object *get_reference(struct rev_info *revs, const char *name, { struct object *object; - object = parse_object(revs->repo, oid); + object = (struct object *) parse_commit_in_graph(revs->repo, NULL, oid); + if (!object) + object = parse_object(revs->repo, oid); + if (!object) { if (revs->ignore_missing) return object; diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c index 6a84a53efb..63b928a883 100644 --- a/t/helper/test-repository.c +++ b/t/helper/test-repository.c @@ -22,7 +22,7 @@ static void test_parse_commit_in_graph(const char *gitdir, const char *worktree, c = lookup_commit(&r, commit_oid); - if (!parse_commit_in_graph(&r, c)) + if (!parse_commit_in_graph(&r, c, NULL)) die("Couldn't parse commit"); printf("%"PRItime, c->date); @@ -52,7 +52,7 @@ static void test_get_commit_tree_in_graph(const char *gitdir, * get_commit_tree_in_graph does not automatically parse the commit, so * parse it first. */ - if (!parse_commit_in_graph(&r, c)) + if (!parse_commit_in_graph(&r, c, NULL)) die("Couldn't parse commit"); tree = get_commit_tree_in_graph(&r, c); if (!tree) -- 2.19.0.271.gfe8321ec05.dirty