David Turner <dturner@xxxxxxxxxxxxxxxx> writes: > Instead of a linear search over common_list to check whether > a path is common, use a trie. The trie search operates on > path prefixes, and handles excludes. > > Signed-off-by: David Turner <dturner@xxxxxxxxxxxxxxxx> > --- > path.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++---- > t/t0060-path-utils.sh | 1 + > 2 files changed, 213 insertions(+), 14 deletions(-) > > diff --git a/path.c b/path.c > index d24bfa2..c7a4c40 100644 > --- a/path.c > +++ b/path.c > @@ -121,25 +121,223 @@ struct common_dir common_list[] = { > { 0, 0, 0, NULL } > }; > > -static void update_common_dir(struct strbuf *buf, int git_dir_len) > +/* > + * A compressed trie. A trie node consists of zero or more characters that > + * are common to all elements with this prefix, optionally followed by some > + * children. If value is not NULL, the trie node is a terminal node. > + * > + * For example, consider the following set of strings: > + * abc > + * def > + * definite > + * definition > + * > + * The trie would look look like: > + * root: len = 0, value = (something), children a and d non-NULL. "value = NULL", as there is no empty string registered in the trie? > + * a: len = 2, contents = bc "value = NULL" here, too (just showing I am following along, not just skimming)? > + * d: len = 2, contents = ef, children i non-NULL, value = (something) > + * i: len = 3, contents = nit, children e and i non-NULL, value = NULL > + * e: len = 0, children all NULL, value = (something) > + * i: len = 2, contents = on, children all NULL, value = (something) > + */ > +struct trie { > + struct trie *children[256]; > + int len; > + char *contents; > + void *value; > +}; > + > +static struct trie *make_trie_node(const char *key, void *value) > { > - char *base = buf->buf + git_dir_len; > - const struct common_dir *p; > + struct trie *new_node = xcalloc(1, sizeof(*new_node)); > + new_node->len = strlen(key); > + if (new_node->len) { > + new_node->contents = xmalloc(new_node->len); > + memcpy(new_node->contents, key, new_node->len); > + } > + new_node->value = value; > + return new_node; > +} > > - if (is_dir_file(base, "logs", "HEAD") || > - is_dir_file(base, "info", "sparse-checkout")) > - return; /* keep this in $GIT_DIR */ > - for (p = common_list; p->dirname; p++) { > - const char *path = p->dirname; > - if (p->is_dir && dir_prefix(base, path)) { > - replace_dir(buf, git_dir_len, get_git_common_dir()); > - return; > +/* > + * Add a key/value pair to a trie. The key is assumed to be \0-terminated. > + * If there was an existing value for this key, return it. > + */ > +static void *add_to_trie(struct trie *root, const char *key, void *value) > +{ > + struct trie *child; > + void *old; > + int i; > + > + if (!*key) { > + /* we have reached the end of the key */ > + old = root->value; > + root->value = value; > + return old; > + } > + > + for (i = 0; i < root->len; ++i) { > + if (root->contents[i] == key[i]) > + continue; > + > + /* > + * Split this node: child will contain this node's > + * existing children. > + */ > + child = malloc(sizeof(*child)); > + memcpy(child->children, root->children, sizeof(root->children)); > + > + child->len = root->len - i - 1; > + if (child->len) { > + child->contents = strndup(root->contents + i + 1, > + child->len); > } > - if (!p->is_dir && !strcmp(base, path)) { > - replace_dir(buf, git_dir_len, get_git_common_dir()); > - return; > + child->value = root->value; > + root->value = NULL; > + root->len = i; > + > + memset(root->children, 0, sizeof(root->children)); > + root->children[(unsigned char)root->contents[i]] = child; > + > + /* This is the newly-added child. */ > + root->children[(unsigned char)key[i]] = > + make_trie_node(key + i + 1, value); > + return NULL; > + } > + > + /* We have matched the entire compressed section */ > + if (key[i]) { > + child = root->children[(unsigned char)key[root->len]]; > + if (child) { > + return add_to_trie(child, key + root->len + 1, value); > + } else { > + child = make_trie_node(key + root->len + 1, value); > + root->children[(unsigned char)key[root->len]] = child; > + return NULL; > } > } > + > + old = root->value; > + root->value = value; > + return old; > +} > + > +typedef int (*match_fn)(const char *unmatched, void *data, void *baton); > + > +/* > + * Search a trie for some key. Find the longest /-or-\0-terminated > + * prefix of the key for which the trie contains a value. Call fn > + * with the unmatched portion of the key and the found value, and > + * return its return value. If there is no such prefix, return -1. > + * > + * The key is partially normalized: consecutive slashes are skipped. > + * > + * For example, consider the trie containing only [refs, > + * refs/worktree] (both with values). > + * > + * | key | unmatched | val from node | return value | > + * |-----------------|------------|---------------|--------------| > + * | a | not called | n/a | -1 | > + * | refs | \0 | refs | as per fn | > + * | refs/ | / | refs | as per fn | > + * | refs/w | /w | refs | as per fn | > + * | refs/worktree | \0 | refs/worktree | as per fn | > + * | refs/worktree/ | / | refs/worktree | as per fn | > + * | refs/worktree/a | /a | refs/worktree | as per fn | > + * |-----------------|------------|---------------|--------------| > + * > + */ > +static int trie_find(struct trie *root, const char *key, match_fn fn, > + void *baton) > +{ > + int i; > + int result; > + struct trie *child; > + > + if (!*key) { > + /* we have reached the end of the key */ > + if (root->value && !root->len) > + return fn(key, root->value, baton); > + else > + return -1; > + } > + > + for (i = 0; i < root->len; ++i) { > + /* Partial path normalization: skip consecutive slashes. */ > + if (key[i] == '/' && key[i+1] == '/') { > + key++; > + continue; > + } > + if (root->contents[i] != key[i]) > + return -1; > + } > + > + /* Matched the entire compressed section */ > + key += i; > + if (!*key) > + /* End of key */ > + return fn(key, root->value, baton); > + > + /* Partial path normalization: skip consecutive slashes */ > + while (key[0] == '/' && key[1] == '/') > + key ++; These "skipping consecutive slashes" inside this function is cute. I would have expected me to react "Eek, Ugly" to it, but somehow I didn't find them ugly. > + child = root->children[(unsigned char)*key]; > + if (child) > + result = trie_find(child, key + 1, fn, baton); > + else > + result = -1; > + > + if (result >= 0 || (*key != '/' && *key != 0)) > + return result; > + if (root->value) > + return fn(key, root->value, baton); > + else > + return -1; > +} > + > +static struct trie common_trie; > +static int common_trie_done_setup; > + > +static void init_common_trie(void) > +{ > + struct common_dir *p; > + > + if (common_trie_done_setup) > + return; > + > + for (p = common_list; p->dirname; p++) > + add_to_trie(&common_trie, p->dirname, p); > + > + common_trie_done_setup = 1; > +} I somehow wonder if we'd want to precomute the trie at the build time, though ;-) > +/* > + * Helper function for update_common_dir: returns 1 if the dir > + * prefix is common. > + */ > +static int check_common(const char *unmatched, void *value, void *baton) > +{ > + struct common_dir *dir = value; > + > + if (!dir) > + return 0; > + > + if (dir->is_dir && (unmatched[0] == 0 || unmatched[0] == '/')) > + return !dir->exclude; > + > + if (!dir->is_dir && unmatched[0] == 0) > + return !dir->exclude; > + > + return 0; > +} > + > +static void update_common_dir(struct strbuf *buf, int git_dir_len) > +{ > + char *base = buf->buf + git_dir_len; > + init_common_trie(); > + if (trie_find(&common_trie, base, check_common, NULL) > 0) > + replace_dir(buf, git_dir_len, get_git_common_dir()); > } Thanks for a pleasant read. > > void report_linked_checkout_garbage(void) > diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh > index 93605f4..1ca49e1 100755 > --- a/t/t0060-path-utils.sh > +++ b/t/t0060-path-utils.sh > @@ -271,6 +271,7 @@ test_git_path GIT_COMMON_DIR=bar objects/bar bar/objects/bar > test_git_path GIT_COMMON_DIR=bar info/exclude bar/info/exclude > test_git_path GIT_COMMON_DIR=bar info/grafts bar/info/grafts > test_git_path GIT_COMMON_DIR=bar info/sparse-checkout .git/info/sparse-checkout > +test_git_path GIT_COMMON_DIR=bar info//sparse-checkout .git/info//sparse-checkout > test_git_path GIT_COMMON_DIR=bar remotes/bar bar/remotes/bar > test_git_path GIT_COMMON_DIR=bar branches/bar bar/branches/bar > test_git_path GIT_COMMON_DIR=bar logs/refs/heads/master bar/logs/refs/heads/master -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html