Change calculation of changed submodule paths to read revisions config. We now read the submodule name for every changed submodule path in a commit. We then use the submodules names for fetching instead of the submodule paths. We lazily read all configurations of changed submodule path into a cache which can then be used to lookup the configuration for a certain revision and path or name. It is expected that .gitmodules files do not change often between commits. Thats why we lookup the .gitmodules sha1 and use a submodule configuration cache to lazily lookup the parsed result for a submodule stored by its name. This cache could be reused for other purposes which need knowledge about submodule configurations. For example automatic clone on fetch of a new submodule if it is configured to be automatically initialized. The submodule configuration in the current worktree can be stored using the null sha1. Signed-off-by: Heiko Voigt <hvoigt@xxxxxxxxxx> --- Makefile | 1 + submodule-config-cache.c | 96 ++++++++++++++++++++++ submodule-config-cache.h | 34 ++++++++ submodule.c | 196 ++++++++++++++++++++++++++++++++++++++------ t/t5526-fetch-submodules.sh | 31 +++++++ 5 files changed, 335 insertions(+), 23 deletions(-) create mode 100644 submodule-config-cache.c create mode 100644 submodule-config-cache.h diff --git a/Makefile b/Makefile index 98da708..2e1d83b 100644 --- a/Makefile +++ b/Makefile @@ -858,6 +858,7 @@ LIB_OBJS += strbuf.o LIB_OBJS += streaming.o LIB_OBJS += string-list.o LIB_OBJS += submodule.o +LIB_OBJS += submodule-config-cache.o LIB_OBJS += symlinks.o LIB_OBJS += tag.o LIB_OBJS += trace.o diff --git a/submodule-config-cache.c b/submodule-config-cache.c new file mode 100644 index 0000000..8ea5ac4 --- /dev/null +++ b/submodule-config-cache.c @@ -0,0 +1,96 @@ +#include "cache.h" +#include "submodule-config-cache.h" +#include "strbuf.h" +#include "hash.h" + +void submodule_config_cache_init(struct submodule_config_cache *cache) +{ + init_hash(&cache->for_name); + init_hash(&cache->for_path); +} + +static int free_one_submodule_config(void *ptr, void *data) +{ + struct submodule_config *entry = ptr; + + strbuf_release(&entry->path); + strbuf_release(&entry->name); + free(entry); + + return 0; +} + +void submodule_config_cache_free(struct submodule_config_cache *cache) +{ + /* NOTE: its important to iterate over the name hash here + * since paths might have multiple entries */ + for_each_hash(&cache->for_name, free_one_submodule_config, NULL); + free_hash(&cache->for_path); + free_hash(&cache->for_name); +} + +static unsigned int hash_sha1_string(const unsigned char *sha1, const char *string) +{ + int c; + unsigned int hash, string_hash = 5381; + memcpy(&hash, sha1, sizeof(hash)); + + /* djb2 hash */ + while ((c = *string++)) + string_hash = ((string_hash << 5) + hash) + c; /* hash * 33 + c */ + + return hash + string_hash; +} + +void submodule_config_cache_update_path(struct submodule_config_cache *cache, + struct submodule_config *config) +{ + void **pos; + int hash = hash_sha1_string(config->gitmodule_sha1, config->path.buf); + pos = insert_hash(hash, config, &cache->for_path); + if (pos) { + config->next = *pos; + *pos = config; + } +} + +void submodule_config_cache_insert(struct submodule_config_cache *cache, struct submodule_config *config) +{ + unsigned int hash; + void **pos; + + hash = hash_sha1_string(config->gitmodule_sha1, config->name.buf); + pos = insert_hash(hash, config, &cache->for_name); + if (pos) { + config->next = *pos; + *pos = config; + } +} + +struct submodule_config *submodule_config_cache_lookup_path(struct submodule_config_cache *cache, + const unsigned char *gitmodule_sha1, const char *path) +{ + unsigned int hash = hash_sha1_string(gitmodule_sha1, path); + struct submodule_config *config = lookup_hash(hash, &cache->for_path); + + while (config && + hashcmp(config->gitmodule_sha1, gitmodule_sha1) && + strcmp(path, config->path.buf)) + config = config->next; + + return config; +} + +struct submodule_config *submodule_config_cache_lookup_name(struct submodule_config_cache *cache, + const unsigned char *gitmodule_sha1, const char *name) +{ + unsigned int hash = hash_sha1_string(gitmodule_sha1, name); + struct submodule_config *config = lookup_hash(hash, &cache->for_name); + + while (config && + hashcmp(config->gitmodule_sha1, gitmodule_sha1) && + strcmp(name, config->name.buf)) + config = config->next; + + return config; +} diff --git a/submodule-config-cache.h b/submodule-config-cache.h new file mode 100644 index 0000000..2b48c27 --- /dev/null +++ b/submodule-config-cache.h @@ -0,0 +1,34 @@ +#ifndef SUBMODULE_CONFIG_CACHE_H +#define SUBMODULE_CONFIG_CACHE_H + +#include "hash.h" +#include "strbuf.h" + +struct submodule_config_cache { + struct hash_table for_path; + struct hash_table for_name; +}; + +/* one submodule_config_cache entry */ +struct submodule_config { + struct strbuf path; + struct strbuf name; + unsigned char gitmodule_sha1[20]; + int fetch_recurse_submodules; + struct submodule_config *next; +}; + +void submodule_config_cache_init(struct submodule_config_cache *cache); +void submodule_config_cache_free(struct submodule_config_cache *cache); + +void submodule_config_cache_update_path(struct submodule_config_cache *cache, + struct submodule_config *config); +void submodule_config_cache_insert(struct submodule_config_cache *cache, + struct submodule_config *config); + +struct submodule_config *submodule_config_cache_lookup_path(struct submodule_config_cache *cache, + const unsigned char *gitmodule_sha1, const char *path); +struct submodule_config *submodule_config_cache_lookup_name(struct submodule_config_cache *cache, + const unsigned char *gitmodule_sha1, const char *name); + +#endif /* SUBMODULE_CONFIG_CACHE_H */ diff --git a/submodule.c b/submodule.c index 9ba1496..b603000 100644 --- a/submodule.c +++ b/submodule.c @@ -1,5 +1,6 @@ #include "cache.h" #include "submodule.h" +#include "submodule-config-cache.h" #include "dir.h" #include "diff.h" #include "commit.h" @@ -11,11 +12,12 @@ #include "sha1-array.h" #include "argv-array.h" +struct submodule_config_cache submodule_config_cache; static struct string_list config_name_for_path; static struct string_list config_fetch_recurse_submodules_for_name; static struct string_list config_ignore_for_name; static int config_fetch_recurse_submodules = RECURSE_SUBMODULES_ON_DEMAND; -static struct string_list changed_submodule_paths; +static struct string_list changed_submodule_names; static int initialized_fetch_ref_tips; static struct sha1_array ref_tips_before_fetch; static struct sha1_array ref_tips_after_fetch; @@ -487,34 +489,177 @@ static int is_submodule_commit_present(const char *path, unsigned char sha1[20]) return is_present; } +static int read_sha1_strbuf(struct strbuf *s, const unsigned char *sha1, + enum object_type *type) +{ + unsigned long size; + void *sha1_data; + + sha1_data = read_sha1_file(sha1, type, &size); + if (!sha1_data) + return 0; + + strbuf_attach(s, sha1_data, size, size); + + return size; +} + +struct parse_submodule_config_parameter { + unsigned char *gitmodule_sha1; + struct submodule_config_cache *cache; +}; + +static int name_and_item_from_var(const char *var, struct strbuf *name, struct strbuf *item) +{ + /* find the name and add it */ + strbuf_addstr(name, var + strlen("submodule.")); + char *end = strrchr(name->buf, '.'); + if (!end) { + strbuf_release(name); + return 0; + } + *end = '\0'; + if (((end + 1) - name->buf) < name->len) + strbuf_addstr(item, end + 1); + + return 1; +} + +static struct submodule_config *lookup_or_create_by_name(struct submodule_config_cache *cache, + unsigned char *gitmodule_sha1, const char *name) +{ + struct submodule_config *config; + config = submodule_config_cache_lookup_name(cache, gitmodule_sha1, name); + if (config) + return config; + + config = xmalloc(sizeof(*config)); + + strbuf_init(&config->name, 1024); + strbuf_addstr(&config->name, name); + + strbuf_init(&config->path, 1024); + + hashcpy(config->gitmodule_sha1, gitmodule_sha1); + config->fetch_recurse_submodules = RECURSE_SUBMODULES_DEFAULT; + config->next = NULL; + + submodule_config_cache_insert(cache, config); + + return config; +} + +static void warn_multiple_config(struct submodule_config *config, const char *option) +{ + warning("%s:.gitmodules, multiple configurations found for submodule.%s.%s. " + "Skipping second one!", sha1_to_hex(config->gitmodule_sha1), + option, config->name.buf); +} + +static int parse_submodule_config_into_cache(const char *var, const char *value, void *data) +{ + struct parse_submodule_config_parameter *me = data; + struct submodule_config *submodule_config; + + /* We only read submodule.<name> entries */ + if (prefixcmp(var, "submodule.")) + return 0; + + struct strbuf name = STRBUF_INIT, item = STRBUF_INIT; + if (!name_and_item_from_var(var, &name, &item)) + return 0; + + submodule_config = lookup_or_create_by_name(me->cache, me->gitmodule_sha1, name.buf); + + if (!suffixcmp(var, ".path")) { + if (*submodule_config->path.buf != '\0') { + warn_multiple_config(submodule_config, "path"); + return 0; + } + strbuf_addstr(&submodule_config->path, value); + submodule_config_cache_update_path(me->cache, submodule_config); + } + + if (!suffixcmp(var, ".fetchrecursesubmodules")) { + if (submodule_config->fetch_recurse_submodules != RECURSE_SUBMODULES_DEFAULT) { + warn_multiple_config(submodule_config, "fetchrecursesubmodules"); + return 0; + } + submodule_config->fetch_recurse_submodules = + parse_fetch_recurse_submodules_arg(var, value); + } + + strbuf_release(&name); + strbuf_release(&item); + + return 0; +} + +static struct submodule_config *get_submodule_config_for_commit_path(struct submodule_config_cache *cache, + const unsigned char *commit_sha1, const char *path) +{ + struct strbuf rev = STRBUF_INIT; + struct strbuf config = STRBUF_INIT; + unsigned char sha1[20]; + enum object_type type; + struct submodule_config *submodule_config; + struct parse_submodule_config_parameter parameter; + + strbuf_addf(&rev, "%s:.gitmodules", sha1_to_hex(commit_sha1)); + if (get_sha1(rev.buf, sha1) < 0) { + strbuf_release(&rev); + return NULL; + } + strbuf_release(&rev); + + submodule_config = submodule_config_cache_lookup_path(cache, sha1, path); + if (submodule_config) + return submodule_config; + + if (!read_sha1_strbuf(&config, sha1, &type)) + return NULL; + + if (type != OBJ_BLOB) { + strbuf_release(&config); + return NULL; + } + + /* fill the submodule config into the cache */ + parameter.cache = cache; + parameter.gitmodule_sha1 = sha1; + git_config_from_strbuf(parse_submodule_config_into_cache, &config, ¶meter); + strbuf_release(&config); + + return submodule_config_cache_lookup_path(cache, sha1, path); +} + static void submodule_collect_changed_cb(struct diff_queue_struct *q, struct diff_options *options, void *data) { + const unsigned char *commit_sha1 = data; int i; for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; + struct string_list_item *name_item; + struct submodule_config *submodule_config; + if (!S_ISGITLINK(p->two->mode)) continue; - if (S_ISGITLINK(p->one->mode)) { - /* NEEDSWORK: We should honor the name configured in - * the .gitmodules file of the commit we are examining - * here to be able to correctly follow submodules - * being moved around. */ - struct string_list_item *path; - path = unsorted_string_list_lookup(&changed_submodule_paths, p->two->path); - if (!path && !is_submodule_commit_present(p->two->path, p->two->sha1)) - string_list_append(&changed_submodule_paths, xstrdup(p->two->path)); - } else { - /* Submodule is new or was moved here */ - /* NEEDSWORK: When the .git directories of submodules - * live inside the superprojects .git directory some - * day we should fetch new submodules directly into - * that location too when config or options request - * that so they can be checked out from there. */ + submodule_config = get_submodule_config_for_commit_path(&submodule_config_cache, + commit_sha1, p->two->path); + if (!submodule_config) continue; - } + + name_item = unsorted_string_list_lookup(&changed_submodule_names, submodule_config->name.buf); + if (name_item) + continue; + + if (is_submodule_commit_present(p->two->path, p->two->sha1)) + continue; + + string_list_append(&changed_submodule_names, xstrdup(submodule_config->name.buf)); } } @@ -550,6 +695,8 @@ static void calculate_changed_submodule_paths(void) if (!config_name_for_path.nr) return; + submodule_config_cache_init(&submodule_config_cache); + init_revisions(&rev, NULL); argv_array_push(&argv, "--"); /* argv[0] program name */ sha1_array_for_each_unique(&ref_tips_after_fetch, @@ -563,7 +710,7 @@ static void calculate_changed_submodule_paths(void) /* * Collect all submodules (whether checked out or not) for which new - * commits have been recorded upstream in "changed_submodule_paths". + * commits have been recorded upstream in "changed_submodule_names". */ while ((commit = get_revision(&rev))) { struct commit_list *parent = commit->parents; @@ -573,6 +720,7 @@ static void calculate_changed_submodule_paths(void) DIFF_OPT_SET(&diff_opts, RECURSIVE); diff_opts.output_format |= DIFF_FORMAT_CALLBACK; diff_opts.format_callback = submodule_collect_changed_cb; + diff_opts.format_callback_data = commit->object.sha1; diff_setup_done(&diff_opts); diff_tree_sha1(parent->item->object.sha1, commit->object.sha1, "", &diff_opts); diffcore_std(&diff_opts); @@ -585,6 +733,8 @@ static void calculate_changed_submodule_paths(void) sha1_array_clear(&ref_tips_before_fetch); sha1_array_clear(&ref_tips_after_fetch); initialized_fetch_ref_tips = 0; + + submodule_config_cache_free(&submodule_config_cache); } int fetch_populated_submodules(const struct argv_array *options, @@ -639,7 +789,7 @@ int fetch_populated_submodules(const struct argv_array *options, if ((intptr_t)fetch_recurse_submodules_option->util == RECURSE_SUBMODULES_OFF) continue; if ((intptr_t)fetch_recurse_submodules_option->util == RECURSE_SUBMODULES_ON_DEMAND) { - if (!unsorted_string_list_lookup(&changed_submodule_paths, ce->name)) + if (!unsorted_string_list_lookup(&changed_submodule_names, name)) continue; default_argv = "on-demand"; } @@ -648,13 +798,13 @@ int fetch_populated_submodules(const struct argv_array *options, gitmodules_is_unmerged) continue; if (config_fetch_recurse_submodules == RECURSE_SUBMODULES_ON_DEMAND) { - if (!unsorted_string_list_lookup(&changed_submodule_paths, ce->name)) + if (!unsorted_string_list_lookup(&changed_submodule_names, name)) continue; default_argv = "on-demand"; } } } else if (command_line_option == RECURSE_SUBMODULES_ON_DEMAND) { - if (!unsorted_string_list_lookup(&changed_submodule_paths, ce->name)) + if (!unsorted_string_list_lookup(&changed_submodule_names, name)) continue; default_argv = "on-demand"; } @@ -685,7 +835,7 @@ int fetch_populated_submodules(const struct argv_array *options, } argv_array_clear(&argv); out: - string_list_clear(&changed_submodule_paths, 1); + string_list_clear(&changed_submodule_names, 1); return result; } diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh index ca5b027..7ee4c2b 100755 --- a/t/t5526-fetch-submodules.sh +++ b/t/t5526-fetch-submodules.sh @@ -450,4 +450,35 @@ test_expect_success "don't fetch submodule when newly recorded commits are alrea test_i18ncmp expect.err actual.err ' +test_expect_success "fetch new commits when submodule got renamed" ' + ( + cd submodule && + git checkout -b rename_sub && + echo a >a && + git add a && + git commit -ma && + git rev-parse HEAD >../expect + ) && + git clone . downstream2 && + ( + cd downstream2 && + git submodule update --init --recursive && + git checkout -b rename && + mv submodule submodule_renamed && + git config -f .gitmodules submodule.submodule.path submodule_renamed && + git add submodule_renamed .gitmodules && + git commit -m "rename submodule -> submodule-renamed" && + git push origin rename + ) && + ( + cd downstream && + git fetch --recurse-submodules=on-demand && + ( + cd submodule && + git rev-parse origin/rename_sub >../../actual + ) + ) && + test_cmp expect actual +' + test_done -- 1.8.2.rc0.25.g5062c01 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html