On Mon, Apr 06, 2020 at 04:59:52PM +0000, Garima Singh via GitGitGadget wrote: > +static void prepare_to_use_bloom_filter(struct rev_info *revs) > +{ > + struct pathspec_item *pi; > + char *path_alloc = NULL; > + const char *path; > + int last_index; > + int len; > + > + if (!revs->commits) > + return; > + > + repo_parse_commit(revs->repo, revs->commits->item); > + > + if (!revs->repo->objects->commit_graph) > + return; > + > + revs->bloom_filter_settings = revs->repo->objects->commit_graph->bloom_filter_settings; > + if (!revs->bloom_filter_settings) > + return; > + > + pi = &revs->pruning.pathspec.items[0]; > + last_index = pi->len - 1; > + > + /* remove single trailing slash from path, if needed */ > + if (pi->match[last_index] == '/') { > + path_alloc = xstrdup(pi->match); > + path_alloc[last_index] = '\0'; > + path = path_alloc; fill_bloom_key() takes a length parameter, so there is no need to duplicate the path to be able to shorten it by one character to remove that trailing '/'. > + } else > + path = pi->match; > + > + len = strlen(path); 'struct pathspec_item's 'len' field already contains the length of the path, so there is no need for this strlen(). > + > + revs->bloom_key = xmalloc(sizeof(struct bloom_key)); > + fill_bloom_key(path, len, revs->bloom_key, revs->bloom_filter_settings); > + > + free(path_alloc); > +} > @@ -3362,6 +3440,8 @@ int prepare_revision_walk(struct rev_info *revs) > FOR_EACH_OBJECT_PROMISOR_ONLY); > } > > + if (revs->pruning.pathspec.nr == 1 && !revs->reflog_info) > + prepare_to_use_bloom_filter(revs); > if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED) > commit_list_sort_by_date(&revs->commits); > if (revs->no_walk) return 0; if (revs->limited) { if (limit_list(revs) < 0) return -1; I extended the hunk context a bit to show that prepare_to_use_bloom_filter() is called before limit_list(). This is important, because specifying exclude revs and pathspecs, i.e. 'git log ^v1.2.3 -- dir/file' does perform a lot of diffs in limit_list(), and this way we can take advantage of Bloom filters even in this case. > @@ -3379,6 +3459,7 @@ int prepare_revision_walk(struct rev_info *revs) > simplify_merges(revs); > if (revs->children.name) > set_children(revs); > + > return 0; > } > > diff --git a/revision.h b/revision.h > index 475f048fb61..7c026fe41fc 100644 > --- a/revision.h > +++ b/revision.h > @@ -56,6 +56,8 @@ struct repository; > struct rev_info; > struct string_list; > struct saved_parents; > +struct bloom_key; > +struct bloom_filter_settings; > define_shared_commit_slab(revision_sources, char *); > > struct rev_cmdline_info { > @@ -291,6 +293,15 @@ struct rev_info { > struct revision_sources *sources; > > struct topo_walk_info *topo_walk_info; > + > + /* Commit graph bloom filter fields */ > + /* The bloom filter key for the pathspec */ > + struct bloom_key *bloom_key; > + /* > + * The bloom filter settings used to generate the key. > + * This is loaded from the commit-graph being used. > + */ > + struct bloom_filter_settings *bloom_filter_settings; > }; > > int ref_excluded(struct string_list *, const char *path); > -- > gitgitgadget >