On Fri, Aug 13, 2021 at 6:05 PM Jonathan Tan <jonathantanmy@xxxxxxxxxx> wrote: > > Record the repository whenever an OID grep source is created, and teach > the worker threads to explicitly provide the repository when accessing > objects. > > Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx> > --- > builtin/grep.c | 15 ++++++--------- > grep.c | 7 +++++-- > grep.h | 13 ++++++++++++- > 3 files changed, 23 insertions(+), 12 deletions(-) > > diff --git a/builtin/grep.c b/builtin/grep.c > index 69d8ea0808..d27e95e092 100644 > --- a/builtin/grep.c > +++ b/builtin/grep.c > @@ -349,7 +349,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid, > struct grep_source gs; > > grep_source_name(opt, filename, tree_name_len, &pathbuf); > - grep_source_init_oid(&gs, pathbuf.buf, path, oid); > + grep_source_init_oid(&gs, pathbuf.buf, path, oid, opt->repo); > strbuf_release(&pathbuf); > > if (num_threads > 1) { > @@ -462,14 +462,11 @@ static int grep_submodule(struct grep_opt *opt, > repo_read_gitmodules(subrepo, 0); > > /* > - * NEEDSWORK: This adds the submodule's object directory to the list of > - * alternates for the single in-memory object store. This has some bad > - * consequences for memory (processed objects will never be freed) and > - * performance (this increases the number of pack files git has to pay > - * attention to, to the sum of the number of pack files in all the > - * repositories processed so far). This can be removed once the object > - * store is no longer global and instead is a member of the repository > - * object. > + * All code paths tested by test code no longer need submodule ODBs to > + * be added as alternates, but add it to the list just in case. > + * Submodule ODBs added through add_submodule_odb_by_path() will be > + * lazily registered as alternates when needed (and except in an > + * unexpected code interaction, it won't be needed). Nice. > */ > add_submodule_odb_by_path(subrepo->objects->odb->path); > obj_read_unlock(); > diff --git a/grep.c b/grep.c > index 8a8105c2eb..79598f245f 100644 > --- a/grep.c > +++ b/grep.c > @@ -1863,7 +1863,8 @@ void grep_source_init_file(struct grep_source *gs, const char *name, > } > > void grep_source_init_oid(struct grep_source *gs, const char *name, > - const char *path, const struct object_id *oid) > + const char *path, const struct object_id *oid, > + struct repository *repo) > { > gs->type = GREP_SOURCE_OID; > gs->name = xstrdup_or_null(name); > @@ -1872,6 +1873,7 @@ void grep_source_init_oid(struct grep_source *gs, const char *name, > gs->size = 0; > gs->driver = NULL; > gs->identifier = oiddup(oid); > + gs->repo = repo; > } > > void grep_source_clear(struct grep_source *gs) > @@ -1900,7 +1902,8 @@ static int grep_source_load_oid(struct grep_source *gs) > { > enum object_type type; > > - gs->buf = read_object_file(gs->identifier, &type, &gs->size); > + gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type, > + &gs->size); > if (!gs->buf) > return error(_("'%s': unable to read %s"), > gs->name, > diff --git a/grep.h b/grep.h > index 480b3f5bba..24c6b64cea 100644 > --- a/grep.h > +++ b/grep.h > @@ -120,7 +120,16 @@ struct grep_opt { > struct grep_pat *header_list; > struct grep_pat **header_tail; > struct grep_expr *pattern_expression; > + > + /* > + * NEEDSWORK: See if we can remove this field, because the repository > + * should probably be per-source, not per-repo. Hmm, I think the "not per-repo" part is a bit confusing, as it refers to "the repository" ("the repository should not be per-repo"?) Could we remove that part? Maybe we could also be a bit more specific regarding the suggested conversion: "See if we can remove this field, because the repository should probably be per-source. That is, grep.c functions using `grep_opt.repo` should probably start using `grep_source.repo` instead." (But that's nitpicking from my part, feel free to ignore it.) > [...] This is potentially the > + * cause of at least one bug - "git grep" ignoring the textconv > + * attributes from submodules. See [1] for more information. > + * [1] https://lore.kernel.org/git/CAHd-oW5iEQarYVxEXoTG-ua2zdoybTrSjCBKtO0YT292fm0NQQ@xxxxxxxxxxxxxx/ > + */ > struct repository *repo; > + > const char *prefix; > int prefix_length; > regex_t regexp; > @@ -187,6 +196,7 @@ struct grep_source { > GREP_SOURCE_BUF, > } type; > void *identifier; > + struct repository *repo; /* if GREP_SOURCE_OID */ > > char *buf; > unsigned long size; > @@ -198,7 +208,8 @@ struct grep_source { > void grep_source_init_file(struct grep_source *gs, const char *name, > const char *path); > void grep_source_init_oid(struct grep_source *gs, const char *name, > - const char *path, const struct object_id *oid); > + const char *path, const struct object_id *oid, > + struct repository *repo); > void grep_source_clear_data(struct grep_source *gs); > void grep_source_clear(struct grep_source *gs); > void grep_source_load_driver(struct grep_source *gs, > -- > 2.33.0.rc1.237.g0d66db33f3-goog >