[RFC/PATCH v2] fetch: Speed up fetch by rewriting find_non_local_tags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When trying to get a list of remote tags to see if we need to fetch
any we were doing a linear search for the matching tag ref for the
tag^{} commit entries.  This proves to be incredibly slow for large
numbers of tags.  Rewrite the function so that we can do lookup in
string_lists instead.

For a repository with 50000 tags (and just a single commit on a single
branch), a fetch that does nothing goes from ~ 1m50s to ~4.2s.

Signed-off-by: Julian Phillips <julian@xxxxxxxxxxxxxxxxx>
---

Not only does this not require a custom hash table, it is also slightly
faster than the last version (~4.2s vs ~4.5s).

If nothing else, having rewritten it completely, at least I now
understand what the old find_non_local_tags function was actually doing
... ;)

 builtin-fetch.c |   85 ++++++++++++++++++++++++++++++++++++++-----------------
 1 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/builtin-fetch.c b/builtin-fetch.c
index cb48c57..c9a2563 100644
--- a/builtin-fetch.c
+++ b/builtin-fetch.c
@@ -504,57 +504,90 @@ static int will_fetch(struct ref **head, const unsigned char *sha1)
 	return 0;
 }
 
+struct tag_data {
+	struct ref **head;
+	struct ref ***tail;
+	struct string_list *refs;
+};
+
+static int add_to_tail(struct string_list_item *item, void *cb_data)
+{
+	struct tag_data *data = (struct tag_data *)cb_data;
+	unsigned char *commit = (unsigned char *)item->util;
+	struct ref *rm = NULL;
+	struct string_list_item *sli;
+
+	/* Tag objects will have the commit sha1 associated with the peeled
+	 * ref, if there is not a peeled ref then the ref is probably a
+	 * lightweight tag and so refers to a commit directly */
+	sli = string_list_lookup(item->string, data->refs);
+	if (sli)
+		commit = sli->util;
+
+	/* skip over tags that we don't have the commits for. */
+	if (!has_sha1_file(commit) && !will_fetch(data->head, commit))
+		return 0;
+
+	rm = alloc_ref(item->string);
+	rm->peer_ref = alloc_ref(item->string);
+	hashcpy(rm->old_sha1, item->util);
+
+	**data->tail = rm;
+	*data->tail = &rm->next;
+
+	return 0;
+}
+
 static void find_non_local_tags(struct transport *transport,
 			struct ref **head,
 			struct ref ***tail)
 {
 	struct string_list existing_refs = { NULL, 0, 0, 0 };
-	struct string_list new_refs = { NULL, 0, 0, 1 };
+	struct string_list peeled_refs = { NULL, 0, 0, 1 };
+	struct string_list remote_refs = { NULL, 0, 0, 1 };
+	struct tag_data data = {head, tail, &peeled_refs};
+	struct string_list *refs;
 	char *ref_name;
 	int ref_name_len;
-	const unsigned char *ref_sha1;
-	const struct ref *tag_ref;
-	struct ref *rm = NULL;
 	const struct ref *ref;
+	struct string_list_item *item;
 
 	for_each_ref(add_existing, &existing_refs);
 	for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) {
 		if (prefixcmp(ref->name, "refs/tags"))
 			continue;
 
+		/* skip duplicates */
+		if (string_list_lookup(ref->name, &remote_refs))
+			continue;
+
+		refs = &remote_refs;
 		ref_name = xstrdup(ref->name);
 		ref_name_len = strlen(ref_name);
-		ref_sha1 = ref->old_sha1;
 
+		/* we want to store peeled refs by the base ref name in the
+		 * peeled_refs string list */
 		if (!strcmp(ref_name + ref_name_len - 3, "^{}")) {
 			ref_name[ref_name_len - 3] = 0;
-			tag_ref = transport_get_remote_refs(transport);
-			while (tag_ref) {
-				if (!strcmp(tag_ref->name, ref_name)) {
-					ref_sha1 = tag_ref->old_sha1;
-					break;
-				}
-				tag_ref = tag_ref->next;
-			}
+			refs = &peeled_refs;
 		}
 
-		if (!string_list_has_string(&existing_refs, ref_name) &&
-		    !string_list_has_string(&new_refs, ref_name) &&
-		    (has_sha1_file(ref->old_sha1) ||
-		     will_fetch(head, ref->old_sha1))) {
-			string_list_insert(ref_name, &new_refs);
-
-			rm = alloc_ref(ref_name);
-			rm->peer_ref = alloc_ref(ref_name);
-			hashcpy(rm->old_sha1, ref_sha1);
-
-			**tail = rm;
-			*tail = &rm->next;
+		/* ignore refs that we already have */
+		if (!string_list_has_string(&existing_refs, ref_name)) {
+			item = string_list_insert(ref_name, refs);
+			item->util = (void *)ref->old_sha1;
 		}
+
 		free(ref_name);
 	}
+
+	/* For all the tags in the remote_refs string list, call add_to_tail to
+	 * add them to the list of refs to be fetched */
+	for_each_string_list(add_to_tail, &remote_refs, &data);
+
 	string_list_clear(&existing_refs, 0);
-	string_list_clear(&new_refs, 0);
+	string_list_clear(&peeled_refs, 0);
+	string_list_clear(&remote_refs, 0);
 }
 
 static void check_not_current_branch(struct ref *ref_map)
-- 
1.6.4.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]