When trying to get a list of remote tags to see if we need to fetch any we were doing a linear search for the matching tag ref for the tag^{} commit entries. This proves to be incredibly slow for large numbers of tags. Rewrite the function so that we can do lookup in string_lists instead. For a repository with 50000 tags (and just a single commit on a single branch), a fetch that does nothing goes from ~ 1m50s to ~4.2s. Signed-off-by: Julian Phillips <julian@xxxxxxxxxxxxxxxxx> --- Not only does this not require a custom hash table, it is also slightly faster than the last version (~4.2s vs ~4.5s). If nothing else, having rewritten it completely, at least I now understand what the old find_non_local_tags function was actually doing ... ;) builtin-fetch.c | 85 ++++++++++++++++++++++++++++++++++++++----------------- 1 files changed, 59 insertions(+), 26 deletions(-) diff --git a/builtin-fetch.c b/builtin-fetch.c index cb48c57..c9a2563 100644 --- a/builtin-fetch.c +++ b/builtin-fetch.c @@ -504,57 +504,90 @@ static int will_fetch(struct ref **head, const unsigned char *sha1) return 0; } +struct tag_data { + struct ref **head; + struct ref ***tail; + struct string_list *refs; +}; + +static int add_to_tail(struct string_list_item *item, void *cb_data) +{ + struct tag_data *data = (struct tag_data *)cb_data; + unsigned char *commit = (unsigned char *)item->util; + struct ref *rm = NULL; + struct string_list_item *sli; + + /* Tag objects will have the commit sha1 associated with the peeled + * ref, if there is not a peeled ref then the ref is probably a + * lightweight tag and so refers to a commit directly */ + sli = string_list_lookup(item->string, data->refs); + if (sli) + commit = sli->util; + + /* skip over tags that we don't have the commits for. */ + if (!has_sha1_file(commit) && !will_fetch(data->head, commit)) + return 0; + + rm = alloc_ref(item->string); + rm->peer_ref = alloc_ref(item->string); + hashcpy(rm->old_sha1, item->util); + + **data->tail = rm; + *data->tail = &rm->next; + + return 0; +} + static void find_non_local_tags(struct transport *transport, struct ref **head, struct ref ***tail) { struct string_list existing_refs = { NULL, 0, 0, 0 }; - struct string_list new_refs = { NULL, 0, 0, 1 }; + struct string_list peeled_refs = { NULL, 0, 0, 1 }; + struct string_list remote_refs = { NULL, 0, 0, 1 }; + struct tag_data data = {head, tail, &peeled_refs}; + struct string_list *refs; char *ref_name; int ref_name_len; - const unsigned char *ref_sha1; - const struct ref *tag_ref; - struct ref *rm = NULL; const struct ref *ref; + struct string_list_item *item; for_each_ref(add_existing, &existing_refs); for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) { if (prefixcmp(ref->name, "refs/tags")) continue; + /* skip duplicates */ + if (string_list_lookup(ref->name, &remote_refs)) + continue; + + refs = &remote_refs; ref_name = xstrdup(ref->name); ref_name_len = strlen(ref_name); - ref_sha1 = ref->old_sha1; + /* we want to store peeled refs by the base ref name in the + * peeled_refs string list */ if (!strcmp(ref_name + ref_name_len - 3, "^{}")) { ref_name[ref_name_len - 3] = 0; - tag_ref = transport_get_remote_refs(transport); - while (tag_ref) { - if (!strcmp(tag_ref->name, ref_name)) { - ref_sha1 = tag_ref->old_sha1; - break; - } - tag_ref = tag_ref->next; - } + refs = &peeled_refs; } - if (!string_list_has_string(&existing_refs, ref_name) && - !string_list_has_string(&new_refs, ref_name) && - (has_sha1_file(ref->old_sha1) || - will_fetch(head, ref->old_sha1))) { - string_list_insert(ref_name, &new_refs); - - rm = alloc_ref(ref_name); - rm->peer_ref = alloc_ref(ref_name); - hashcpy(rm->old_sha1, ref_sha1); - - **tail = rm; - *tail = &rm->next; + /* ignore refs that we already have */ + if (!string_list_has_string(&existing_refs, ref_name)) { + item = string_list_insert(ref_name, refs); + item->util = (void *)ref->old_sha1; } + free(ref_name); } + + /* For all the tags in the remote_refs string list, call add_to_tail to + * add them to the list of refs to be fetched */ + for_each_string_list(add_to_tail, &remote_refs, &data); + string_list_clear(&existing_refs, 0); - string_list_clear(&new_refs, 0); + string_list_clear(&peeled_refs, 0); + string_list_clear(&remote_refs, 0); } static void check_not_current_branch(struct ref *ref_map) -- 1.6.4.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html