Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx> --- bloom-filter.c | 31 ++++++++++++++++++------------- bloom-filter.h | 12 ++++-------- commit-graph.c | 26 ++++++++++++-------------- revision.c | 9 +++------ 4 files changed, 37 insertions(+), 41 deletions(-) diff --git a/bloom-filter.c b/bloom-filter.c index 7dce0e35fa..39b453908f 100644 --- a/bloom-filter.c +++ b/bloom-filter.c @@ -1,14 +1,17 @@ #include "cache.h" #include "bloom-filter.h" -void bloom_filter_init(struct bloom_filter *bf, uint32_t bit_size) +void bloom_filter_init(struct bloom_filter *bf, uint32_t commit_nr, uint32_t bit_size) { if (bit_size % CHAR_BIT) BUG("invalid size for bloom filter"); + if (bit_size > 1024) + BUG("aborting: the bit size is per commit, not for the whole filter"); bf->nr_entries = 0; + bf->commit_nr = commit_nr; bf->bit_size = bit_size; - bf->bits = xmalloc(bit_size / CHAR_BIT); + bf->bits = xcalloc(1, commit_nr * bit_size / CHAR_BIT); } void bloom_filter_free(struct bloom_filter *bf) @@ -19,24 +22,24 @@ void bloom_filter_free(struct bloom_filter *bf) } -void bloom_filter_set_bits(struct bloom_filter *bf, const uint32_t *offsets, +static void bloom_filter_set_bits(struct bloom_filter *bf, uint32_t graph_pos, const uint32_t *offsets, int nr_offsets, int nr_entries) { int i; for (i = 0; i < nr_offsets; i++) { - uint32_t byte_offset = (offsets[i] % bf->bit_size) / CHAR_BIT; + uint32_t byte_offset = (offsets[i] % bf->bit_size + graph_pos * bf->bit_size) / CHAR_BIT; unsigned char mask = 1 << offsets[i] % CHAR_BIT; bf->bits[byte_offset] |= mask; } bf->nr_entries += nr_entries; } -int bloom_filter_check_bits(struct bloom_filter *bf, const uint32_t *offsets, +static int bloom_filter_check_bits(struct bloom_filter *bf, uint32_t graph_pos, const uint32_t *offsets, int nr) { int i; for (i = 0; i < nr; i++) { - uint32_t byte_offset = (offsets[i] % bf->bit_size) / CHAR_BIT; + uint32_t byte_offset = (offsets[i] % bf->bit_size + graph_pos * bf->bit_size) / CHAR_BIT; unsigned char mask = 1 << offsets[i] % CHAR_BIT; if (!(bf->bits[byte_offset] & mask)) return 0; @@ -45,19 +48,19 @@ int bloom_filter_check_bits(struct bloom_filter *bf, const uint32_t *offsets, } -void bloom_filter_add_hash(struct bloom_filter *bf, const unsigned char *hash) +void bloom_filter_add_hash(struct bloom_filter *bf, uint32_t graph_pos, const unsigned char *hash) { uint32_t offsets[GIT_MAX_RAWSZ / sizeof(uint32_t)]; hashcpy((unsigned char*)offsets, hash); - bloom_filter_set_bits(bf, offsets, + bloom_filter_set_bits(bf, graph_pos, offsets, the_hash_algo->rawsz / sizeof(*offsets), 1); } -int bloom_filter_check_hash(struct bloom_filter *bf, const unsigned char *hash) +int bloom_filter_check_hash(struct bloom_filter *bf, uint32_t graph_pos, const unsigned char *hash) { uint32_t offsets[GIT_MAX_RAWSZ / sizeof(uint32_t)]; hashcpy((unsigned char*)offsets, hash); - return bloom_filter_check_bits(bf, offsets, + return bloom_filter_check_bits(bf, graph_pos, offsets, the_hash_algo->rawsz / sizeof(*offsets)); } @@ -80,11 +83,12 @@ int bloom_filter_load(struct bloom_filter *bf) return -1; read_in_full(fd, &bf->nr_entries, sizeof(bf->nr_entries)); + read_in_full(fd, &bf->commit_nr, sizeof(bf->commit_nr)); read_in_full(fd, &bf->bit_size, sizeof(bf->bit_size)); if (bf->bit_size % CHAR_BIT) BUG("invalid size for bloom filter"); - bf->bits = xmalloc(bf->bit_size / CHAR_BIT); - read_in_full(fd, bf->bits, bf->bit_size / CHAR_BIT); + bf->bits = xmalloc(bf->commit_nr * bf->bit_size / CHAR_BIT); + read_in_full(fd, bf->bits, bf->commit_nr * bf->bit_size / CHAR_BIT); close(fd); @@ -96,8 +100,9 @@ void bloom_filter_write(struct bloom_filter *bf) int fd = xopen(git_path_bloom(), O_WRONLY | O_CREAT | O_TRUNC, 0666); write_in_full(fd, &bf->nr_entries, sizeof(bf->nr_entries)); + write_in_full(fd, &bf->commit_nr, sizeof(bf->commit_nr)); write_in_full(fd, &bf->bit_size, sizeof(bf->bit_size)); - write_in_full(fd, bf->bits, bf->bit_size / CHAR_BIT); + write_in_full(fd, bf->bits, bf->commit_nr * bf->bit_size / CHAR_BIT); close(fd); } diff --git a/bloom-filter.h b/bloom-filter.h index 94d0af1708..607649b8db 100644 --- a/bloom-filter.h +++ b/bloom-filter.h @@ -5,30 +5,26 @@ struct bloom_filter { uint32_t nr_entries; + uint32_t commit_nr; uint32_t bit_size; unsigned char *bits; }; -void bloom_filter_init(struct bloom_filter *bf, uint32_t bit_size); +void bloom_filter_init(struct bloom_filter *bf, uint32_t commit_nr, uint32_t bit_size); void bloom_filter_free(struct bloom_filter *bf); -void bloom_filter_set_bits(struct bloom_filter *bf, const uint32_t *offsets, - int nr_offsets, int nr_enries); -int bloom_filter_check_bits(struct bloom_filter *bf, const uint32_t *offsets, - int nr); - /* * Turns the given (SHA1) hash into 5 unsigned ints, and sets the bits at * those positions (modulo the bitmap's size) in the Bloom filter. */ -void bloom_filter_add_hash(struct bloom_filter *bf, const unsigned char *hash); +void bloom_filter_add_hash(struct bloom_filter *bf, uint32_t graph_pos, const unsigned char *hash); /* * Turns the given (SHA1) hash into 5 unsigned ints, and checks the bits at * those positions (modulo the bitmap's size) in the Bloom filter. * Returns 1 if all those bits are set, 0 otherwise. */ -int bloom_filter_check_hash(struct bloom_filter *bf, const unsigned char *hash); +int bloom_filter_check_hash(struct bloom_filter *bf, uint32_t graph_pos, const unsigned char *hash); void hashxor(const unsigned char *hash1, const unsigned char *hash2, unsigned char *out); diff --git a/commit-graph.c b/commit-graph.c index f415d3b41f..90b0b3df90 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -715,13 +715,11 @@ static int add_ref_to_list(const char *refname, static void add_changes_to_bloom_filter(struct bloom_filter *bf, struct commit *parent, struct commit *commit, + int index, struct diff_options *diffopt) { - unsigned char p_c_hash[GIT_MAX_RAWSZ]; int i; - hashxor(parent->object.oid.hash, commit->object.oid.hash, p_c_hash); - diff_tree_oid(&parent->object.oid, &commit->object.oid, "", diffopt); diffcore_std(diffopt); @@ -756,8 +754,8 @@ static void add_changes_to_bloom_filter(struct bloom_filter *bf, the_hash_algo->update_fn(&ctx, path, p - path); the_hash_algo->final_fn(name_hash, &ctx); - hashxor(name_hash, p_c_hash, hash); - bloom_filter_add_hash(bf, hash); + hashxor(name_hash, parent->object.oid.hash, hash); + bloom_filter_add_hash(bf, index, hash); } while (*p); diff_free_filepair(diff_queued_diff.queue[i]); @@ -768,11 +766,10 @@ static void add_changes_to_bloom_filter(struct bloom_filter *bf, } static void fill_bloom_filter(struct bloom_filter *bf, - struct progress *progress) + struct progress *progress, struct commit **commits, int commit_nr) { struct rev_info revs; const char *revs_argv[] = {NULL, "--all", NULL}; - struct commit *commit; int i = 0; /* We (re-)create the bloom filter from scratch every time for now. */ @@ -783,18 +780,19 @@ static void fill_bloom_filter(struct bloom_filter *bf, if (prepare_revision_walk(&revs)) die("revision walk setup failed while preparing bloom filter"); - while ((commit = get_revision(&revs))) { + for (i = 0; i < commit_nr; i++) { + struct commit *commit = commits[i]; struct commit_list *parent; for (parent = commit->parents; parent; parent = parent->next) - add_changes_to_bloom_filter(bf, parent->item, commit, + add_changes_to_bloom_filter(bf, parent->item, commit, i, &revs.diffopt); - display_progress(progress, ++i); + display_progress(progress, i); } } -static void write_bloom_filter(int report_progress, int commit_nr) +static void write_bloom_filter(int report_progress, struct commit **commits, int commit_nr) { struct bloom_filter bf; struct progress *progress = NULL; @@ -809,13 +807,13 @@ static void write_bloom_filter(int report_progress, int commit_nr) if (*end) die("GIT_USE_POC_BLOOM_FILTER must specify the number of bits in the bloom filter (multiple of 8, n < 2^32)"); - bloom_filter_init(&bf, bitsize); + bloom_filter_init(&bf, commit_nr, bitsize); if (report_progress) progress = start_progress(_("Computing bloom filter"), commit_nr); - fill_bloom_filter(&bf, progress); + fill_bloom_filter(&bf, progress, commits, commit_nr); bloom_filter_write(&bf); bloom_filter_free(&bf); @@ -1030,7 +1028,7 @@ void write_commit_graph(const char *obj_dir, finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); - write_bloom_filter(report_progress, commits.nr); + write_bloom_filter(report_progress, commits.list, commits.nr); free(graph_name); free(commits.list); diff --git a/revision.c b/revision.c index d5ba2b1598..c84a997928 100644 --- a/revision.c +++ b/revision.c @@ -490,7 +490,6 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, struct commit *parent, struct commit *commit) { - unsigned char p_c_hash[GIT_MAX_RAWSZ]; int i; if (!bf.bits) @@ -509,17 +508,15 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, */ if (!the_repository->objects->commit_graph) return -1; - if (commit->generation == GENERATION_NUMBER_INFINITY) + if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH) return -1; - hashxor(parent->object.oid.hash, commit->object.oid.hash, p_c_hash); - for (i = 0; i < revs->pruning.pathspec.nr; i++) { struct pathspec_item *pi = &revs->pruning.pathspec.items[i]; unsigned char hash[GIT_MAX_RAWSZ]; - hashxor(pi->name_hash, p_c_hash, hash); - if (bloom_filter_check_hash(&bf, hash)) { + hashxor(pi->name_hash, parent->object.oid.hash, hash); + if (bloom_filter_check_hash(&bf, commit->graph_pos, hash)) { /* * At least one of the interesting pathspecs differs, * so we can return early and let the diff machinery -- 2.19.0.271.gfe8321ec05.dirty