Heiko Voigt <hvoigt@xxxxxxxxxx> writes: > +static struct sha1_array *get_sha1s_from_list(struct string_list *submodules, > + const char *path) > +{ > + struct string_list_item *item; > + struct sha1_array *hashes; > + > + item = string_list_insert(submodules, path); > + if (item->util) > + return (struct sha1_array *) item->util; > + > + hashes = (struct sha1_array *) xmalloc(sizeof(struct sha1_array)); > + /* NEEDSWORK: should we add an initializer function for > + * sha1_array ? */ > + memset(hashes, 0, sizeof(struct sha1_array)); > + item->util = hashes; /* NEEDSWORK: should we have SHA1_ARRAY_INIT etc.? */ item->util = xcalloc(1, sizeof(struct sha1_array)); > static void collect_submodules_from_diff(struct diff_queue_struct *q, > struct diff_options *options, > void *data) > { > int i; > - struct string_list *needs_pushing = data; > + struct string_list *submodules = data; > > for (i = 0; i < q->nr; i++) { > struct diff_filepair *p = q->queue[i]; > + struct sha1_array *hashes; > if (!S_ISGITLINK(p->two->mode)) > continue; > - if (submodule_needs_pushing(p->two->path, p->two->oid.hash)) > - string_list_insert(needs_pushing, p->two->path); > + hashes = get_sha1s_from_list(submodules, p->two->path); > + sha1_array_append(hashes, p->two->oid.hash); > } > } So the idea at this step is still let each commit in the top-level history inspected for any submodule change, but the result is collected in a mapping (submodule -> [ list of submodule commits ]). As we do not expect too many "oops, the old commit was better, so let's revert and rebind the old one from the submodule" in the history of the top-level, appending and then running for-each-unique is an efficient way, instead of first checking if we already have it and then inserting new ones to maintain the uniqueness. Makes sense. > @@ -582,14 +601,41 @@ static void find_unpushed_submodule_commits(struct commit *commit, > diff_tree_combined_merge(commit, 1, &rev); > } > > +struct collect_submodule_from_sha1s_data { > + char *submodule_path; > + struct string_list *needs_pushing; > +}; > + > +static void collect_submodules_from_sha1s(const unsigned char sha1[20], > + void *data) > +{ > + struct collect_submodule_from_sha1s_data *me = > + (struct collect_submodule_from_sha1s_data *) data; > + > + if (submodule_needs_pushing(me->submodule_path, sha1)) > + string_list_insert(me->needs_pushing, me->submodule_path); > +} This is called from sha1_array_for_each_unique() that iterates over the submodule commit object names for one submodule and then ends up calling submodule_needs_pushing() number of times, which smells less efficient than it could be. You can ask rev-list <all the submodule commits to be pushed> --not --remotes just once in the submodule repository. I imagine that is what you'll do in the next patch. An obvious but much less efficient way to optimize this part would be to see if me->needs_pushing already has me->submodule_path and skip the check for submodule_needs_pushing(), but if you drop the call by find_unpushed_submodule to sha1_array_for_each_unique() to walk new submodule commits one by one, that would become irrelevant. > +static void free_submodules_sha1s(struct string_list *submodules) > +{ > + int i; > + for (i = 0; i < submodules->nr; i++) { > + struct string_list_item *item = &submodules->items[i]; > + struct sha1_array *hashes = (struct sha1_array *) item->util; > + sha1_array_clear(hashes); > + } > + string_list_clear(submodules, 1); > +} > + > int find_unpushed_submodules(unsigned char new_sha1[20], > const char *remotes_name, struct string_list *needs_pushing) > { > struct rev_info rev; > struct commit *commit; > const char *argv[] = {NULL, NULL, "--not", "NULL", NULL}; > - int argc = ARRAY_SIZE(argv) - 1; > + int argc = ARRAY_SIZE(argv) - 1, i; > char *sha1_copy; > + struct string_list submodules = STRING_LIST_INIT_DUP; > > struct strbuf remotes_arg = STRBUF_INIT; > > @@ -603,12 +649,23 @@ int find_unpushed_submodules(unsigned char new_sha1[20], > die("revision walk setup failed"); > > while ((commit = get_revision(&rev)) != NULL) > - find_unpushed_submodule_commits(commit, needs_pushing); > + find_unpushed_submodule_commits(commit, &submodules); > > reset_revision_walk(); > free(sha1_copy); > strbuf_release(&remotes_arg); > > + for (i = 0; i < submodules.nr; i++) { > + struct string_list_item *item = &submodules.items[i]; > + struct collect_submodule_from_sha1s_data data; > + data.submodule_path = item->string; > + data.needs_pushing = needs_pushing; > + sha1_array_for_each_unique((struct sha1_array *) item->util, > + collect_submodules_from_sha1s, > + &data); > + } > + free_submodules_sha1s(&submodules); > + > return needs_pushing->nr; > }