Thanks everyone for your reviews. I believe I've addressed all of them. The main change is that I've also made a change in submodule-config to avoid registering submodule ODBs as alternates (thanks, Matheus, for noticing this). Jonathan Tan (8): submodule: lazily add submodule ODBs as alternates grep: use submodule-ODB-as-alternate lazy-addition grep: typesafe versions of grep_source_init grep: read submodule entry with explicit repo grep: allocate subrepos on heap grep: add repository to OID grep sources submodule-config: pass repo upon blob config read t7814: show lack of alternate ODB-adding builtin/grep.c | 64 +++++++++++++++++++----------- config.c | 18 ++++++--- config.h | 3 ++ grep.c | 51 +++++++++++++++--------- grep.h | 18 +++++++-- object-file.c | 5 +++ submodule-config.c | 5 ++- submodule.c | 25 +++++++++++- submodule.h | 8 ++++ t/README | 10 +++++ t/t7814-grep-recurse-submodules.sh | 3 ++ 11 files changed, 156 insertions(+), 54 deletions(-) Range-diff against v1: 1: 5994a517e8 = 1: 5994a517e8 submodule: lazily add submodule ODBs as alternates 2: 31e9b914c4 = 2: 31e9b914c4 grep: use submodule-ODB-as-alternate lazy-addition 3: e5e6a0dc1e ! 3: aa3f1f3c89 grep: typesafe versions of grep_source_init @@ builtin/grep.c: static int grep_file(struct grep_opt *opt, const char *filename) if (num_threads > 1) { ## grep.c ## -@@ grep.c: int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) +@@ grep.c: int grep_source(struct grep_opt *opt, struct grep_source *gs) + return grep_source_1(opt, gs, 0); + } + ++static void grep_source_init_buf(struct grep_source *gs, char *buf, ++ unsigned long size) ++{ ++ gs->type = GREP_SOURCE_BUF; ++ gs->name = NULL; ++ gs->path = NULL; ++ gs->buf = buf; ++ gs->size = size; ++ gs->driver = NULL; ++ gs->identifier = NULL; ++} ++ + int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) + { struct grep_source gs; int r; - grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL); -+ grep_source_init_buf(&gs); - gs.buf = buf; - gs.size = size; +- gs.buf = buf; +- gs.size = size; ++ grep_source_init_buf(&gs, buf, size); + + r = grep_source(opt, &gs); @@ grep.c: int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) return r; @@ grep.c: int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) + gs->size = 0; + gs->driver = NULL; + gs->identifier = oiddup(oid); -+} -+ -+void grep_source_init_buf(struct grep_source *gs) -+{ -+ gs->type = GREP_SOURCE_BUF; -+ gs->name = NULL; -+ gs->path = NULL; -+ gs->buf = NULL; -+ gs->size = 0; -+ gs->driver = NULL; -+ gs->identifier = NULL; } void grep_source_clear(struct grep_source *gs) @@ grep.h: struct grep_source { + const char *path); +void grep_source_init_oid(struct grep_source *gs, const char *name, + const char *path, const struct object_id *oid); -+void grep_source_init_buf(struct grep_source *gs); void grep_source_clear_data(struct grep_source *gs); void grep_source_clear(struct grep_source *gs); void grep_source_load_driver(struct grep_source *gs, 4: 30ead880b3 = 4: 050deacfb7 grep: read submodule entry with explicit repo 5: f62608e88f ! 5: 3f24815224 grep: allocate subrepos on heap @@ builtin/grep.c: static void work_done(struct work_item *w) + free(repos_to_free[i]); + } + free(repos_to_free); ++ repos_to_free_nr = 0; ++ repos_to_free_alloc = 0; +} + static void *run(void *arg) 6: b2df245587 ! 6: 50c69a988b grep: add repository to OID grep sources @@ builtin/grep.c: static int grep_oid(struct grep_opt *opt, const struct object_id strbuf_release(&pathbuf); if (num_threads > 1) { +@@ builtin/grep.c: static int grep_submodule(struct grep_opt *opt, + repo_read_gitmodules(subrepo, 0); + + /* +- * NEEDSWORK: This adds the submodule's object directory to the list of +- * alternates for the single in-memory object store. This has some bad +- * consequences for memory (processed objects will never be freed) and +- * performance (this increases the number of pack files git has to pay +- * attention to, to the sum of the number of pack files in all the +- * repositories processed so far). This can be removed once the object +- * store is no longer global and instead is a member of the repository +- * object. ++ * All code paths tested by test code no longer need submodule ODBs to ++ * be added as alternates, but add it to the list just in case. ++ * Submodule ODBs added through add_submodule_odb_by_path() will be ++ * lazily registered as alternates when needed (and except in an ++ * unexpected code interaction, it won't be needed). + */ + add_submodule_odb_by_path(subrepo->objects->odb->path); + obj_read_unlock(); ## grep.c ## @@ grep.c: void grep_source_init_file(struct grep_source *gs, const char *name, @@ grep.c: void grep_source_init_oid(struct grep_source *gs, const char *name, + gs->repo = repo; } - void grep_source_init_buf(struct grep_source *gs) + void grep_source_clear(struct grep_source *gs) @@ grep.c: static int grep_source_load_oid(struct grep_source *gs) { enum object_type type; @@ grep.c: static int grep_source_load_oid(struct grep_source *gs) gs->name, ## grep.h ## +@@ grep.h: struct grep_opt { + struct grep_pat *header_list; + struct grep_pat **header_tail; + struct grep_expr *pattern_expression; ++ ++ /* ++ * NEEDSWORK: See if we can remove this field, because the repository ++ * should probably be per-source, not per-repo. This is potentially the ++ * cause of at least one bug - "git grep" ignoring the textconv ++ * attributes from submodules. See [1] for more information. ++ * [1] https://lore.kernel.org/git/CAHd-oW5iEQarYVxEXoTG-ua2zdoybTrSjCBKtO0YT292fm0NQQ@xxxxxxxxxxxxxx/ ++ */ + struct repository *repo; ++ + const char *prefix; + int prefix_length; + regex_t regexp; @@ grep.h: struct grep_source { GREP_SOURCE_BUF, } type; @@ grep.h: struct grep_source { - const char *path, const struct object_id *oid); + const char *path, const struct object_id *oid, + struct repository *repo); - void grep_source_init_buf(struct grep_source *gs); void grep_source_clear_data(struct grep_source *gs); void grep_source_clear(struct grep_source *gs); + void grep_source_load_driver(struct grep_source *gs, -: ---------- > 7: 94db10a4e5 submodule-config: pass repo upon blob config read 7: f1fc89894b = 8: 4a51fcfb77 t7814: show lack of alternate ODB-adding -- 2.33.0.rc1.237.g0d66db33f3-goog