[PATCH 7/9] commit-graph: reuse existing bloom filters during write.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Garima Singh <garima.singh@xxxxxxxxxxxxx>

Read previously computed bloom filters from the commit-graph file if possible
to avoid recomputing during commit-graph write.

Reading from the commit-graph is based on the format in which bloom filters are
written in the commit graph file. See method `fill_filter_from_graph` in bloom.c

For reading the bloom filter for commit at lexicographic position i:
1. Read BIDX[i] which essentially gives us the starting index in BDAT for filter
   of commit i+1 (called the next_index in the code)

2. For i>0, read BIDX[i-1] which will give us the starting index in BDAT for
   filter of commit i (called the prev_index in the code)
   For i = 0, prev_index will be 0. The first lexicographic commit's filter will
   start at BDAT.

3. The length of the filter will be next_index - prev_index, because BIDX[i]
   gives the cumulative 8-byte words including the ith commit's filter.

We toggle whether bloom filters should be recomputed based on the compute_if_null
flag.

Helped-by: Derrick Stolee <dstolee@xxxxxxxxxxxxx>
Signed-off-by: Garima Singh <garima.singh@xxxxxxxxxxxxx>
---
 bloom.c        | 40 ++++++++++++++++++++++++++++++++++++++--
 bloom.h        |  3 ++-
 commit-graph.c |  6 +++---
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/bloom.c b/bloom.c
index 08328cc381..86b1005802 100644
--- a/bloom.c
+++ b/bloom.c
@@ -1,5 +1,7 @@
 #include "git-compat-util.h"
 #include "bloom.h"
+#include "commit.h"
+#include "commit-slab.h"
 #include "commit-graph.h"
 #include "object-store.h"
 #include "diff.h"
@@ -119,13 +121,35 @@ static void add_key_to_filter(struct bloom_key *key,
 	}
 }
 
+static void fill_filter_from_graph(struct commit_graph *g,
+				   struct bloom_filter *filter,
+				   struct commit *c)
+{
+	uint32_t lex_pos, prev_index, next_index;
+
+	while (c->graph_pos < g->num_commits_in_base)
+		g = g->base_graph;
+
+	lex_pos = c->graph_pos - g->num_commits_in_base;
+
+	next_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos);
+	if (lex_pos)
+		prev_index = get_be32(g->chunk_bloom_indexes + 4 * (lex_pos - 1));
+	else
+		prev_index = 0;
+
+	filter->len = next_index - prev_index;
+	filter->data = (uint64_t *)(g->chunk_bloom_data + 8 * prev_index + 12);
+}
+
 void load_bloom_filters(void)
 {
 	init_bloom_filter_slab(&bloom_filters);
 }
 
 struct bloom_filter *get_bloom_filter(struct repository *r,
-				      struct commit *c)
+				      struct commit *c,
+				      int compute_if_null)
 {
 	struct bloom_filter *filter;
 	struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@@ -134,6 +158,18 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
 	const char *revs_argv[] = {NULL, "HEAD", NULL};
 
 	filter = bloom_filter_slab_at(&bloom_filters, c);
+
+	if (!filter->data) {
+		load_commit_graph_info(r, c);
+		if (c->graph_pos != COMMIT_NOT_FROM_GRAPH && r->objects->commit_graph->chunk_bloom_indexes) {
+			fill_filter_from_graph(r->objects->commit_graph, filter, c);
+			return filter;
+		}
+	}
+
+	if (filter->data || !compute_if_null)
+			return filter;
+
 	init_revisions(&revs, NULL);
 	revs.diffopt.flags.recursive = 1;
 
@@ -198,4 +234,4 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
 	DIFF_QUEUE_CLEAR(&diff_queued_diff);
 
 	return filter;
-}
\ No newline at end of file
+}
diff --git a/bloom.h b/bloom.h
index ba8ae70b67..101d689bbd 100644
--- a/bloom.h
+++ b/bloom.h
@@ -36,7 +36,8 @@ struct bloom_key {
 void load_bloom_filters(void);
 
 struct bloom_filter *get_bloom_filter(struct repository *r,
-				      struct commit *c);
+				      struct commit *c,
+				      int compute_if_null);
 
 void fill_bloom_key(const char *data,
 		    int len,
diff --git a/commit-graph.c b/commit-graph.c
index def2ade166..0580ce75d5 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1032,7 +1032,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
 	uint32_t cur_pos = 0;
 
 	while (list < last) {
-		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
+		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
 		cur_pos += filter->len;
 		hashwrite_be32(f, cur_pos);
 		list++;
@@ -1051,7 +1051,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
 	hashwrite_be32(f, settings->bits_per_entry);
 
 	while (first < last) {
-		struct bloom_filter *filter = get_bloom_filter(ctx->r, *first);
+		struct bloom_filter *filter = get_bloom_filter(ctx->r, *first, 0);
 		hashwrite(f, filter->data, filter->len * sizeof(uint64_t));
 		first++;
 	}
@@ -1218,7 +1218,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
 
 	for (i = 0; i < ctx->commits.nr; i++) {
 		struct commit *c = ctx->commits.list[i];
-		struct bloom_filter *filter = get_bloom_filter(ctx->r, c);
+		struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
 		ctx->total_bloom_filter_size += sizeof(uint64_t) * filter->len;
 		display_progress(progress, i + 1);
 	}
-- 
gitgitgadget




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux