Remotes are stored as an array, so looking one up or adding one without duplication is an O(n) operation. Reading an entire config file full of remotes is O(n^2) in the number of remotes. For a repository with tens of thousands of remotes, the running time can hit multiple minutes. Hash tables are way faster. So we add a hashmap from remote name to struct remote and use it for all lookups. The time to add a new remote to a repo that already has 50,000 remotes drops from ~2 minutes to < 1 second. We retain the old array of remotes so iterators proceed in config-file order. Signed-off-by: Patrick Reynolds <patrick.reynolds@xxxxxxxxxx> --- mark function with no arguments as taking void remote.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++----------------- remote.h | 3 +++ 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/remote.c b/remote.c index a0701f6..52533e4 100644 --- a/remote.c +++ b/remote.c @@ -42,6 +42,7 @@ struct rewrites { static struct remote **remotes; static int remotes_alloc; static int remotes_nr; +static struct hashmap remotes_hash; static struct branch **branches; static int branches_alloc; @@ -136,26 +137,51 @@ static void add_url_alias(struct remote *remote, const char *url) add_pushurl_alias(remote, url); } +struct remotes_hash_key { + const char *str; + int len; +}; + +static int remotes_hash_cmp(const struct remote *a, const struct remote *b, const struct remotes_hash_key *key) +{ + if (key) + return strncmp(a->name, key->str, key->len) || a->name[key->len]; + else + return strcmp(a->name, b->name); +} + +static inline void init_remotes_hash(void) +{ + if (!remotes_hash.cmpfn) + hashmap_init(&remotes_hash, (hashmap_cmp_fn)remotes_hash_cmp, 0); +} + static struct remote *make_remote(const char *name, int len) { - struct remote *ret; - int i; + struct remote *ret, *replaced; + struct remotes_hash_key lookup; + struct hashmap_entry lookup_entry; - for (i = 0; i < remotes_nr; i++) { - if (len ? (!strncmp(name, remotes[i]->name, len) && - !remotes[i]->name[len]) : - !strcmp(name, remotes[i]->name)) - return remotes[i]; - } + if (!len) + len = strlen(name); + + init_remotes_hash(); + lookup.str = name; + lookup.len = len; + hashmap_entry_init(&lookup_entry, memhash(name, len)); + + if ((ret = hashmap_get(&remotes_hash, &lookup_entry, &lookup)) != NULL) + return ret; ret = xcalloc(1, sizeof(struct remote)); ret->prune = -1; /* unspecified */ ALLOC_GROW(remotes, remotes_nr + 1, remotes_alloc); remotes[remotes_nr++] = ret; - if (len) - ret->name = xstrndup(name, len); - else - ret->name = xstrdup(name); + ret->name = xstrndup(name, len); + + hashmap_entry_init(ret, lookup_entry.hash); + replaced = hashmap_put(&remotes_hash, ret); + assert(replaced == NULL); /* no previous entry overwritten */ return ret; } @@ -722,13 +748,16 @@ struct remote *pushremote_get(const char *name) int remote_is_configured(const char *name) { - int i; + struct remotes_hash_key lookup; + struct hashmap_entry lookup_entry; read_config(); - for (i = 0; i < remotes_nr; i++) - if (!strcmp(name, remotes[i]->name)) - return 1; - return 0; + init_remotes_hash(); + lookup.str = name; + lookup.len = strlen(name); + hashmap_entry_init(&lookup_entry, memhash(name, lookup.len)); + + return hashmap_get(&remotes_hash, &lookup_entry, &lookup) != NULL; } int for_each_remote(each_remote_fn fn, void *priv) diff --git a/remote.h b/remote.h index 917d383..81cb5ff 100644 --- a/remote.h +++ b/remote.h @@ -1,6 +1,7 @@ #ifndef REMOTE_H #define REMOTE_H +#include "hashmap.h" #include "parse-options.h" enum { @@ -10,6 +11,8 @@ enum { }; struct remote { + struct hashmap_entry ent; /* must be first */ + const char *name; int origin; -- 2.0.0.rc4