Re: [PATCH v4 12/15] revision.c: use Bloom filters to speed up path based revision walks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Apr 06, 2020 at 04:59:52PM +0000, Garima Singh via GitGitGadget wrote:
> +static void prepare_to_use_bloom_filter(struct rev_info *revs)
> +{
> +	struct pathspec_item *pi;
> +	char *path_alloc = NULL;
> +	const char *path;
> +	int last_index;
> +	int len;
> +
> +	if (!revs->commits)
> +	    return;
> +
> +	repo_parse_commit(revs->repo, revs->commits->item);
> +
> +	if (!revs->repo->objects->commit_graph)
> +		return;
> +
> +	revs->bloom_filter_settings = revs->repo->objects->commit_graph->bloom_filter_settings;
> +	if (!revs->bloom_filter_settings)
> +		return;
> +
> +	pi = &revs->pruning.pathspec.items[0];
> +	last_index = pi->len - 1;
> +
> +	/* remove single trailing slash from path, if needed */
> +	if (pi->match[last_index] == '/') {
> +	    path_alloc = xstrdup(pi->match);
> +	    path_alloc[last_index] = '\0';
> +	    path = path_alloc;

fill_bloom_key() takes a length parameter, so there is no need to
duplicate the path to be able to shorten it by one character to remove
that trailing '/'.

> +	} else
> +	    path = pi->match;
> +
> +	len = strlen(path);

'struct pathspec_item's 'len' field already contains the length of the
path, so there is no need for this strlen().

> +
> +	revs->bloom_key = xmalloc(sizeof(struct bloom_key));
> +	fill_bloom_key(path, len, revs->bloom_key, revs->bloom_filter_settings);
> +
> +	free(path_alloc);
> +}

> @@ -3362,6 +3440,8 @@ int prepare_revision_walk(struct rev_info *revs)
>  				       FOR_EACH_OBJECT_PROMISOR_ONLY);
>  	}
>  
> +	if (revs->pruning.pathspec.nr == 1 && !revs->reflog_info)
> +		prepare_to_use_bloom_filter(revs);
>  	if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED)
>  		commit_list_sort_by_date(&revs->commits);
>  	if (revs->no_walk)
                return 0;
        if (revs->limited) {
                if (limit_list(revs) < 0)
                        return -1;

I extended the hunk context a bit to show that
prepare_to_use_bloom_filter() is called before limit_list().  This is
important, because specifying exclude revs and pathspecs, i.e.  'git
log ^v1.2.3 -- dir/file' does perform a lot of diffs in limit_list(),
and this way we can take advantage of Bloom filters even in this case.

> @@ -3379,6 +3459,7 @@ int prepare_revision_walk(struct rev_info *revs)
>  		simplify_merges(revs);
>  	if (revs->children.name)
>  		set_children(revs);
> +
>  	return 0;
>  }
>  
> diff --git a/revision.h b/revision.h
> index 475f048fb61..7c026fe41fc 100644
> --- a/revision.h
> +++ b/revision.h
> @@ -56,6 +56,8 @@ struct repository;
>  struct rev_info;
>  struct string_list;
>  struct saved_parents;
> +struct bloom_key;
> +struct bloom_filter_settings;
>  define_shared_commit_slab(revision_sources, char *);
>  
>  struct rev_cmdline_info {
> @@ -291,6 +293,15 @@ struct rev_info {
>  	struct revision_sources *sources;
>  
>  	struct topo_walk_info *topo_walk_info;
> +
> +	/* Commit graph bloom filter fields */
> +	/* The bloom filter key for the pathspec */
> +	struct bloom_key *bloom_key;
> +	/*
> +	 * The bloom filter settings used to generate the key.
> +	 * This is loaded from the commit-graph being used.
> +	 */
> +	struct bloom_filter_settings *bloom_filter_settings;
>  };
>  
>  int ref_excluded(struct string_list *, const char *path);
> -- 
> gitgitgadget
> 



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux