When iterating over references, reference priming is used to make sure that loose references are read into the ref-cache before packed references, to avoid races. It used to be that the prefix passed to reference iterators almost always ended in `/`, for example `refs/heads/`. In that case, the priming code would read all loose references under `find_containing_dir("refs/heads/")`, which is "refs/heads/". That's just what we want. But now that `ref-filter` knows how to pass refname prefixes to `for_each_fullref_in()`, the prefix might come from user input; for example, git for-each-ref refs/heads Since the argument doesn't include a trailing slash, the reference iteration code would prime all of the loose references under `find_containing_dir("refs/heads")`, which is "refs/". Thus we would unnecessarily read tags, remote-tracking references, etc., when the user is only interested in branches. It is a bit awkward to get around this problem. We can't just append a slash to the argument, because we don't know ab initio whether an argument like `refs/tags/release` corresponds to a single tag or to a directory containing tags. Moreover, until now a `prefix_ref_iterator` was used to make the final decision about which references fall within the prefix (the `cache_ref_iterator` only did a rough cut). This is also inefficient, because the `prefix_ref_iterator` can't know, for example, that while you are in a subdirectory that is completely within the prefix, you don't have to do the prefix check. So: * Move the responsibility for doing the prefix check directly to `cache_ref_iterator`. This means that `cache_ref_iterator_begin()` never has to wrap its return value in a `prefix_ref_iterator`. * Teach `cache_ref_iterator_begin()` (and `prime_ref_dir()`) to be stricter about what they iterate over and what directories they prime. * Teach `cache_ref_iterator` to keep track of whether the current `cache_ref_iterator_level` is fully within the prefix. If so, skip the prefix checks entirely. The main benefit of these optimizations is for loose references, since packed references are always read all at once. Note that after this change, `prefix_ref_iterator` is only ever used for its trimming feature and not for its "prefix" feature. But I'm not ripping out the latter yet, because it might be useful for another patch series that I'm working on. Signed-off-by: Michael Haggerty <mhagger@xxxxxxxxxxxx> --- refs/ref-cache.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 10 deletions(-) diff --git a/refs/ref-cache.c b/refs/ref-cache.c index ec97f3a38a..fda3942dbe 100644 --- a/refs/ref-cache.c +++ b/refs/ref-cache.c @@ -312,11 +312,42 @@ static void sort_ref_dir(struct ref_dir *dir) dir->sorted = dir->nr = i; } +enum prefix_state { + /* All refs within the directory would match prefix: */ + PREFIX_CONTAINS_DIR, + + /* Some, but not all, refs within the directory might match prefix: */ + PREFIX_WITHIN_DIR, + + /* No refs within the directory could possibly match prefix: */ + PREFIX_EXCLUDES_DIR +}; + /* - * Load all of the refs from `dir` (recursively) into our in-memory - * cache. + * Return a `prefix_state` constant describing the relationship + * between the directory with the specified `dirname` and `prefix`. */ -static void prime_ref_dir(struct ref_dir *dir) +static enum prefix_state overlaps_prefix(const char *dirname, + const char *prefix) +{ + while (*prefix && *dirname == *prefix) { + dirname++; + prefix++; + } + if (!*prefix) + return PREFIX_CONTAINS_DIR; + else if (!*dirname) + return PREFIX_WITHIN_DIR; + else + return PREFIX_EXCLUDES_DIR; +} + +/* + * Load all of the refs from `dir` (recursively) that could possibly + * contain references matching `prefix` into our in-memory cache. If + * `prefix` is NULL, prime unconditionally. + */ +static void prime_ref_dir(struct ref_dir *dir, const char *prefix) { /* * The hard work of loading loose refs is done by get_ref_dir(), so we @@ -327,8 +358,29 @@ static void prime_ref_dir(struct ref_dir *dir) int i; for (i = 0; i < dir->nr; i++) { struct ref_entry *entry = dir->entries[i]; - if (entry->flag & REF_DIR) - prime_ref_dir(get_ref_dir(entry)); + if (!(entry->flag & REF_DIR)) { + /* Not a directory; no need to recurse. */ + } else if (!prefix) { + /* Recurse in any case: */ + prime_ref_dir(get_ref_dir(entry), NULL); + } else { + switch (overlaps_prefix(entry->name, prefix)) { + case PREFIX_CONTAINS_DIR: + /* + * Recurse, and from here down we + * don't have to check the prefix + * anymore: + */ + prime_ref_dir(get_ref_dir(entry), NULL); + break; + case PREFIX_WITHIN_DIR: + prime_ref_dir(get_ref_dir(entry), prefix); + break; + case PREFIX_EXCLUDES_DIR: + /* No need to prime this directory. */ + break; + } + } } } @@ -343,6 +395,8 @@ struct cache_ref_iterator_level { */ struct ref_dir *dir; + enum prefix_state prefix_state; + /* * The index of the current entry within dir (which might * itself be a directory). If index == -1, then the iteration @@ -369,6 +423,13 @@ struct cache_ref_iterator { /* The number of levels that have been allocated on the stack */ size_t levels_alloc; + /* + * Only include references with this prefix in the iteration. + * The prefix is matched textually, without regard for path + * component boundaries. + */ + const char *prefix; + /* * A stack of levels. levels[0] is the uppermost level that is * being iterated over in this iteration. (This is not @@ -390,6 +451,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) &iter->levels[iter->levels_nr - 1]; struct ref_dir *dir = level->dir; struct ref_entry *entry; + enum prefix_state entry_prefix_state; if (level->index == -1) sort_ref_dir(dir); @@ -404,6 +466,14 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) entry = dir->entries[level->index]; + if (level->prefix_state == PREFIX_WITHIN_DIR) { + entry_prefix_state = overlaps_prefix(entry->name, iter->prefix); + if (entry_prefix_state == PREFIX_EXCLUDES_DIR) + continue; + } else { + entry_prefix_state = level->prefix_state; + } + if (entry->flag & REF_DIR) { /* push down a level */ ALLOC_GROW(iter->levels, iter->levels_nr + 1, @@ -411,6 +481,7 @@ static int cache_ref_iterator_advance(struct ref_iterator *ref_iterator) level = &iter->levels[iter->levels_nr++]; level->dir = get_ref_dir(entry); + level->prefix_state = entry_prefix_state; level->index = -1; } else { iter->base.refname = entry->name; @@ -496,10 +567,10 @@ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache, dir = find_containing_dir(dir, prefix, 0); if (!dir) /* There's nothing to iterate over. */ - return empty_ref_iterator_begin(); + return empty_ref_iterator_begin(); if (prime_dir) - prime_ref_dir(dir); + prime_ref_dir(dir, prefix); iter = xcalloc(1, sizeof(*iter)); ref_iterator = &iter->base; @@ -511,9 +582,12 @@ struct ref_iterator *cache_ref_iterator_begin(struct ref_cache *cache, level->index = -1; level->dir = dir; - if (prefix && *prefix) - ref_iterator = prefix_ref_iterator_begin(ref_iterator, - prefix, 0); + if (prefix && *prefix) { + iter->prefix = xstrdup(prefix); + level->prefix_state = PREFIX_WITHIN_DIR; + } else { + level->prefix_state = PREFIX_CONTAINS_DIR; + } return ref_iterator; } -- 2.11.0