When trying to get a list of remote tags to see if we need to fetch any we were doing a linear search for the matching tag ref for the tag^{} commit entries. This proves to be incredibly slow for large numbers of tags. Rewrite the function so that we build up a string_list of refs to fetch and then process that instead. As an extreme example, for a repository with 50000 tags (and just a single commit on a single branch), a fetch that does nothing goes from ~1m50s to ~4.1s. Signed-off-by: Julian Phillips <julian@xxxxxxxxxxxxxxxxx> --- Ok, so here it is ... Sometimes I forget just much we git users value our time and resources. ;) builtin-fetch.c | 98 ++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 65 insertions(+), 33 deletions(-) diff --git a/builtin-fetch.c b/builtin-fetch.c index cb48c57..acb08e4 100644 --- a/builtin-fetch.c +++ b/builtin-fetch.c @@ -504,57 +504,89 @@ static int will_fetch(struct ref **head, const unsigned char *sha1) return 0; } +struct tag_data { + struct ref **head; + struct ref ***tail; +}; + +static int add_to_tail(struct string_list_item *item, void *cb_data) +{ + struct tag_data *data = (struct tag_data *)cb_data; + struct ref *rm = NULL; + + /* We have already decided to ignore this item */ + if (!item->util) + return 0; + + rm = alloc_ref(item->string); + rm->peer_ref = alloc_ref(item->string); + hashcpy(rm->old_sha1, item->util); + + **data->tail = rm; + *data->tail = &rm->next; + + return 0; +} + static void find_non_local_tags(struct transport *transport, struct ref **head, struct ref ***tail) { struct string_list existing_refs = { NULL, 0, 0, 0 }; - struct string_list new_refs = { NULL, 0, 0, 1 }; - char *ref_name; - int ref_name_len; - const unsigned char *ref_sha1; - const struct ref *tag_ref; - struct ref *rm = NULL; + struct string_list remote_refs = { NULL, 0, 0, 0 }; + struct tag_data data = {head, tail}; const struct ref *ref; + struct string_list_item *item = NULL; for_each_ref(add_existing, &existing_refs); for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) { if (prefixcmp(ref->name, "refs/tags")) continue; - ref_name = xstrdup(ref->name); - ref_name_len = strlen(ref_name); - ref_sha1 = ref->old_sha1; - - if (!strcmp(ref_name + ref_name_len - 3, "^{}")) { - ref_name[ref_name_len - 3] = 0; - tag_ref = transport_get_remote_refs(transport); - while (tag_ref) { - if (!strcmp(tag_ref->name, ref_name)) { - ref_sha1 = tag_ref->old_sha1; - break; - } - tag_ref = tag_ref->next; - } + /* the peeled ref always follows the matching base ref, so if we + * see a peeled ref that we don't want to fetch then we can mark + * the ref entry in the list as one to ignore by setting util to + * NULL. */ + if (!strcmp(ref->name + strlen(ref->name) - 3, "^{}")) { + if (item && !has_sha1_file(ref->old_sha1) && + !will_fetch(head, ref->old_sha1) && + !has_sha1_file(item->util) && + !will_fetch(head, item->util) ) + item->util = NULL; + item = NULL; + continue; } - if (!string_list_has_string(&existing_refs, ref_name) && - !string_list_has_string(&new_refs, ref_name) && - (has_sha1_file(ref->old_sha1) || - will_fetch(head, ref->old_sha1))) { - string_list_insert(ref_name, &new_refs); + /* If item is non-NULL here, then we previously saw a ref not + * followed by a peeled reference, so we need to check if it is + * a lightweight tag that we want to fetch */ + if (item && !has_sha1_file(item->util) && + !will_fetch(head, item->util) ) + item->util = NULL; - rm = alloc_ref(ref_name); - rm->peer_ref = alloc_ref(ref_name); - hashcpy(rm->old_sha1, ref_sha1); + item = NULL; - **tail = rm; - *tail = &rm->next; - } - free(ref_name); + /* skip duplicates and refs that we already have */ + if (string_list_has_string(&remote_refs, ref->name) || + string_list_has_string(&existing_refs, ref->name)) + continue; + + item = string_list_insert(ref->name, &remote_refs); + item->util = (void *)ref->old_sha1; } string_list_clear(&existing_refs, 0); - string_list_clear(&new_refs, 0); + + /* We may have a final lightweight tag that needs to be checked to see + * if it needs fetching. */ + if (item && !has_sha1_file(item->util) && + !will_fetch(head, item->util) ) + item->util = NULL; + + /* For all the tags in the remote_refs string list, call add_to_tail to + * add them to the list of refs to be fetched */ + for_each_string_list(add_to_tail, &remote_refs, &data); + + string_list_clear(&remote_refs, 0); } static void check_not_current_branch(struct ref *ref_map) -- 1.6.4.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html