get_ref_cache used a linear list, which obviously is O(n^2). Use a fixed bucket hash which just takes a factor of 100000 (~ 317^2) out of the n^2 - which is enough. Signed-off-by: Andreas Krey <a.krey@xxxxxx> --- This brings 'git clean -ndx' times down from 17 minutes to 11 seconds on one of our workspaces (which accumulated a lot of ignored directories). Actuallly using adaptive hashing or other structures seems overkill. refs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/refs.c b/refs.c index e23542b..8198d9e 100644 --- a/refs.c +++ b/refs.c @@ -982,6 +982,8 @@ struct packed_ref_cache { struct stat_validity validity; }; +#define REF_CACHE_HASH 317 + /* * Future: need to be in "struct repository" * when doing a full libification. @@ -996,7 +998,7 @@ static struct ref_cache { * is initialized correctly. */ char name[1]; -} ref_cache, *submodule_ref_caches; +} ref_cache, *submodule_ref_caches[REF_CACHE_HASH]; /* Lock used for the main packed-refs file: */ static struct lock_file packlock; @@ -1065,18 +1067,19 @@ static struct ref_cache *create_ref_cache(const char *submodule) */ static struct ref_cache *get_ref_cache(const char *submodule) { - struct ref_cache *refs; + struct ref_cache *refs, **bucketp; + bucketp = submodule_ref_caches + strhash(submodule) % REF_CACHE_HASH; if (!submodule || !*submodule) return &ref_cache; - for (refs = submodule_ref_caches; refs; refs = refs->next) + for (refs = *bucketp; refs; refs = refs->next) if (!strcmp(submodule, refs->name)) return refs; refs = create_ref_cache(submodule); - refs->next = submodule_ref_caches; - submodule_ref_caches = refs; + refs->next = *bucketp; + *bucketp = refs; return refs; } -- 2.3.2.223.g7a9409c -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html