[PATCH v2 08/11] commit-graph: reuse existing Bloom filters during write.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Garima Singh <garima.singh@xxxxxxxxxxxxx>

Read previously computed Bloom filters from the commit-graph file if
possible to avoid recomputing during commit-graph write.

See Documentation/technical/commit-graph-format for the format in which
the Bloom filter information is written to the commit graph file.

To read Bloom filter for a given commit with lexicographic position
'i' we need to:
1. Read BIDX[i] which essentially gives us the starting index in BDAT for
   filter of commit i+1. It is essentially the index past the end
   of the filter of commit i. It is called end_index in the code.

2. For i>0, read BIDX[i-1] which will give us the starting index in BDAT
   for filter of commit i. It is called the start_index in the code.
   For the first commit, where i = 0, Bloom filter data starts at the
   beginning, just past the header in the BDAT chunk. Hence, start_index
   will be 0.

3. The length of the filter will be end_index - start_index, because
   BIDX[i] gives the cumulative 8-byte words including the ith
   commit's filter.

We toggle whether Bloom filters should be recomputed based on the
compute_if_null flag.

Helped-by: Derrick Stolee <dstolee@xxxxxxxxxxxxx>
Signed-off-by: Garima Singh <garima.singh@xxxxxxxxxxxxx>
---
 bloom.c               | 49 ++++++++++++++++++++++++++++++++++++++++++-
 bloom.h               |  4 +++-
 commit-graph.c        |  7 ++++---
 t/helper/test-bloom.c |  2 +-
 4 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/bloom.c b/bloom.c
index 818382c03b..90d84dc713 100644
--- a/bloom.c
+++ b/bloom.c
@@ -1,5 +1,7 @@
 #include "git-compat-util.h"
 #include "bloom.h"
+#include "commit.h"
+#include "commit-slab.h"
 #include "commit-graph.h"
 #include "object-store.h"
 #include "diff.h"
@@ -127,8 +129,39 @@ void add_key_to_filter(struct bloom_key *key,
 	}
 }
 
+static int load_bloom_filter_from_graph(struct commit_graph *g,
+				   struct bloom_filter *filter,
+				   struct commit *c)
+{
+	uint32_t lex_pos, start_index, end_index;
+
+	while (c->graph_pos < g->num_commits_in_base)
+		g = g->base_graph;
+
+	/* The commit graph commit 'c' lives in doesn't carry bloom filters. */
+	if (!g->chunk_bloom_indexes)
+		return 0;
+
+	lex_pos = c->graph_pos - g->num_commits_in_base;
+
+	end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos);
+
+	if (lex_pos)
+		start_index = get_be32(g->chunk_bloom_indexes + 4 * (lex_pos - 1));
+	else
+		start_index = 0;
+
+	filter->len = end_index - start_index;
+	filter->data = (uint64_t *)(g->chunk_bloom_data +
+					sizeof(uint64_t) * start_index +
+					BLOOMDATA_CHUNK_HEADER_SIZE);
+
+	return 1;
+}
+
 struct bloom_filter *get_bloom_filter(struct repository *r,
-				      struct commit *c)
+				      struct commit *c,
+				      int compute_if_not_present)
 {
 	struct bloom_filter *filter;
 	struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@@ -141,6 +174,20 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
 
 	filter = bloom_filter_slab_at(&bloom_filters, c);
 
+	if (!filter->data) {
+		load_commit_graph_info(r, c);
+		if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
+			r->objects->commit_graph->chunk_bloom_indexes) {
+			if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
+				return filter;
+			else
+				return NULL;
+		}
+	}
+
+	if (filter->data || !compute_if_not_present)
+		return filter;
+
 	repo_diff_setup(r, &diffopt);
 	diffopt.flags.recursive = 1;
 	diffopt.max_changes = max_changes;
diff --git a/bloom.h b/bloom.h
index 7f40c751f7..76f8a9ad0c 100644
--- a/bloom.h
+++ b/bloom.h
@@ -13,6 +13,7 @@ struct bloom_filter_settings {
 
 #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
 #define BITS_PER_WORD 64
+#define BLOOMDATA_CHUNK_HEADER_SIZE 3*sizeof(uint32_t)
 
 /*
  * A bloom_filter struct represents a data segment to
@@ -47,7 +48,8 @@ void add_key_to_filter(struct bloom_key *key,
 					   struct bloom_filter_settings *settings);
 
 struct bloom_filter *get_bloom_filter(struct repository *r,
-				      struct commit *c);
+				      struct commit *c,
+				      int compute_if_not_present);
 
 int bloom_filter_contains(struct bloom_filter *filter,
 			  struct bloom_key *key,
diff --git a/commit-graph.c b/commit-graph.c
index 4585b3b702..c0e9834bf2 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1094,7 +1094,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
 			ctx->commits.nr);
 
 	while (list < last) {
-		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
+		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
 		cur_pos += filter->len;
 		display_progress(progress, ++i);
 		hashwrite_be32(f, cur_pos);
@@ -1123,7 +1123,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
 	hashwrite_be32(f, settings->bits_per_entry);
 
 	while (list < last) {
-		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
+		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
 		display_progress(progress, ++i);
 		hashwrite(f, filter->data, filter->len * sizeof(uint64_t));
 		list++;
@@ -1304,7 +1304,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
 
 	for (i = 0; i < ctx->commits.nr; i++) {
 		struct commit *c = sorted_by_pos[i];
-		struct bloom_filter *filter = get_bloom_filter(ctx->r, c);
+		struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
 		ctx->total_bloom_filter_data_size += sizeof(uint64_t) * filter->len;
 		display_progress(progress, i + 1);
 	}
@@ -2314,6 +2314,7 @@ void free_commit_graph(struct commit_graph *g)
 		g->data = NULL;
 		close(g->graph_fd);
 	}
+	free(g->bloom_filter_settings);
 	free(g->filename);
 	free(g);
 }
diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c
index 331957011b..9b4be97f75 100644
--- a/t/helper/test-bloom.c
+++ b/t/helper/test-bloom.c
@@ -47,7 +47,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
 	struct bloom_filter *filter;
 	setup_git_directory();
 	c = lookup_commit(the_repository, commit_oid);
-	filter = get_bloom_filter(the_repository, c);
+	filter = get_bloom_filter(the_repository, c, 1);
 	print_bloom_filter(filter);
 }
 
-- 
gitgitgadget





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux