For large repositories, enumerating the list of all promisor objects (in order to exclude them from a rev-list walk) can take a significant amount of time). When --exclude-promisor-objects is passed to rev-list, don't enumerate the promisor objects. Instead, filter them (and any children objects) during the actual graph walk. Remove the mark_uninteresting() function as it's not used anywhere else. When testing against a large repo [1], this patch reduces the connectivity check runtime from 3 minutes to ~7 seconds. [1]: https://android.googlesource.com/platform/frameworks/base/ Helped-By: Jonathan Tan <jonathantanmy@xxxxxxxxxx> Helped-By: Jeff King <peff@xxxxxxxx> Helped-By: Jonathan Nieder <jrnieder@xxxxxxxxx> Signed-off-by: Josh Steadmon <steadmon@xxxxxxxxxx> Signed-off-by: Josh Steadmon <steadmon@xxxxxxxxxx> --- Changes since V2: * Pulled the "OK to skip?" logic into a separate function. Changes since V1: * Switched to alternate approach, we now do the regular rev-list walk but skip promisor objects at walk time, rather than pre-excluding them. Range-diff against v2: 1: 9f327d6d8d ! 1: 9856e7fc74 rev-list: exclude promisor objects at walk time @@ -10,9 +10,15 @@ the promisor objects. Instead, filter them (and any children objects) during the actual graph walk. + When testing against a large repo [1], this reduces the connectivity + check runtime from 3 minutes to ~7 seconds. + + [1]: https://android.googlesource.com/platform/frameworks/base/ + Helped-By: Jonathan Tan <jonathantanmy@xxxxxxxxxx> Helped-By: Jeff King <peff@xxxxxxxx> Helped-By: Jonathan Nieder <jrnieder@xxxxxxxxx> + Signed-off-by: Josh Steadmon <steadmon@xxxxxxxxxx> @@ -20,78 +26,55 @@ --- a/list-objects.c +++ b/list-objects.c @@ - struct object *obj = &blob->object; - size_t pathlen; - enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; -+ struct object_info oi = OBJECT_INFO_INIT; + void *filter_data; + }; - if (!ctx->revs->blob_objects) - return; ++static int should_skip_promisor_object(const struct rev_info *revs, ++ const struct object_id *oid) ++{ ++ struct object_info oi = OBJECT_INFO_INIT; ++ return (revs->exclude_promisor_objects && ++ !oid_object_info_extended(the_repository, oid, &oi, 0) && ++ oi.whence == OI_PACKED && ++ oi.u.packed.pack->pack_promisor); ++} ++ + static void process_blob(struct traversal_context *ctx, + struct blob *blob, + struct strbuf *path, @@ die("bad blob object"); if (obj->flags & (UNINTERESTING | SEEN)) return; -+ if (ctx->revs->exclude_promisor_objects && -+ !oid_object_info_extended(the_repository, &obj->oid, &oi, 0) && -+ oi.whence == OI_PACKED && -+ oi.u.packed.pack->pack_promisor) ++ if (should_skip_promisor_object(ctx->revs, &obj->oid)) + return; /* * Pre-filter known-missing objects when explicitly requested. -@@ - int baselen = base->len; - enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; - int failed_parse; -+ struct object_info oi = OBJECT_INFO_INIT; - - if (!revs->tree_objects) - return; @@ die("bad tree object"); if (obj->flags & (UNINTERESTING | SEEN)) return; -+ if (ctx->revs->exclude_promisor_objects && -+ !oid_object_info_extended(the_repository, &obj->oid, &oi, 0) && -+ oi.whence == OI_PACKED && -+ oi.u.packed.pack->pack_promisor) ++ if (should_skip_promisor_object(ctx->revs, &obj->oid)) + return; failed_parse = parse_tree_gently(tree, 1); if (failed_parse) { -@@ - struct strbuf *base) - { - int i; -+ struct object_info oi = OBJECT_INFO_INIT; - - assert(base->len == 0); - @@ struct object *obj = pending->item; const char *name = pending->name; const char *path = pending->path; -+ if (ctx->revs->exclude_promisor_objects && -+ !oid_object_info_extended(the_repository, &obj->oid, &oi, 0) && -+ oi.whence == OI_PACKED && -+ oi.u.packed.pack->pack_promisor) ++ if (should_skip_promisor_object(ctx->revs, &obj->oid)) + continue; + if (obj->flags & (UNINTERESTING | SEEN)) continue; if (obj->type == OBJ_TAG) { @@ - { - struct commit *commit; - struct strbuf csp; /* callee's scratch pad */ -+ struct object_info oi = OBJECT_INFO_INIT; strbuf_init(&csp, PATH_MAX); while ((commit = get_revision(ctx->revs)) != NULL) { -+ if (ctx->revs->exclude_promisor_objects && -+ !oid_object_info_extended(the_repository, &commit->object.oid, &oi, 0) && -+ oi.whence == OI_PACKED && -+ oi.u.packed.pack->pack_promisor) ++ if (should_skip_promisor_object(ctx->revs, &commit->object.oid)) + continue; + /* list-objects.c | 20 ++++++++++++++++++++ revision.c | 16 ---------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/list-objects.c b/list-objects.c index dc77361e11..c153ee5dfb 100644 --- a/list-objects.c +++ b/list-objects.c @@ -22,6 +22,16 @@ struct traversal_context { void *filter_data; }; +static int should_skip_promisor_object(const struct rev_info *revs, + const struct object_id *oid) +{ + struct object_info oi = OBJECT_INFO_INIT; + return (revs->exclude_promisor_objects && + !oid_object_info_extended(the_repository, oid, &oi, 0) && + oi.whence == OI_PACKED && + oi.u.packed.pack->pack_promisor); +} + static void process_blob(struct traversal_context *ctx, struct blob *blob, struct strbuf *path, @@ -37,6 +47,8 @@ static void process_blob(struct traversal_context *ctx, die("bad blob object"); if (obj->flags & (UNINTERESTING | SEEN)) return; + if (should_skip_promisor_object(ctx->revs, &obj->oid)) + return; /* * Pre-filter known-missing objects when explicitly requested. @@ -156,6 +168,8 @@ static void process_tree(struct traversal_context *ctx, die("bad tree object"); if (obj->flags & (UNINTERESTING | SEEN)) return; + if (should_skip_promisor_object(ctx->revs, &obj->oid)) + return; failed_parse = parse_tree_gently(tree, 1); if (failed_parse) { @@ -326,6 +340,9 @@ static void traverse_trees_and_blobs(struct traversal_context *ctx, struct object *obj = pending->item; const char *name = pending->name; const char *path = pending->path; + if (should_skip_promisor_object(ctx->revs, &obj->oid)) + continue; + if (obj->flags & (UNINTERESTING | SEEN)) continue; if (obj->type == OBJ_TAG) { @@ -356,6 +373,9 @@ static void do_traverse(struct traversal_context *ctx) strbuf_init(&csp, PATH_MAX); while ((commit = get_revision(ctx->revs)) != NULL) { + if (should_skip_promisor_object(ctx->revs, &commit->object.oid)) + continue; + /* * an uninteresting boundary commit may not have its tree * parsed yet, but we are not going to show them anyway diff --git a/revision.c b/revision.c index eb8e51bc63..85974e941d 100644 --- a/revision.c +++ b/revision.c @@ -3067,17 +3067,6 @@ void reset_revision_walk(void) clear_object_flags(SEEN | ADDED | SHOWN); } -static int mark_uninteresting(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *cb) -{ - struct rev_info *revs = cb; - struct object *o = parse_object(revs->repo, oid); - o->flags |= UNINTERESTING | SEEN; - return 0; -} - define_commit_slab(indegree_slab, int); define_commit_slab(author_date_slab, timestamp_t); @@ -3316,11 +3305,6 @@ int prepare_revision_walk(struct rev_info *revs) (revs->limited && limiting_can_increase_treesame(revs))) revs->treesame.name = "treesame"; - if (revs->exclude_promisor_objects) { - for_each_packed_object(mark_uninteresting, revs, - FOR_EACH_OBJECT_PROMISOR_ONLY); - } - if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED) commit_list_sort_by_date(&revs->commits); if (revs->no_walk) -- 2.21.0.392.gf8f6787159e-goog