The table below compares the average runtime and memory usage of git rev-list HEAD -- "$path" with and without modified path Bloom filters for 5000+ randomly selected paths from each repository: Average runtime | Max RSS without with Speedup | without with -------------------------------------------+------------------------- android-base 0.8780s 0.1742s 5.04x | 387MB 227MB -41.4% cmssw 0.3143s 0.0452s 6.95x | 181MB 79MB -56.4% cpython 0.7453s 0.0956s 7.80x | 159MB 71MB -55.4% elasticsearch 0.1492s 0.0191s 7.82x | 134MB 53MB -60.2% gcc 7.1852s 0.3584s 20.05x | 297MB 231MB -22.3% gecko-dev 4.6113s 0.6318s 7.30x | 832MB 600MB -27.9% git 0.6180s 0.0405s 15.26x | 131MB 43MB -67.2% glibc 0.5618s 0.0471s 11.93x | 136MB 50MB -63.3% go 0.4913s 0.0515s 9.53x | 130MB 50MB -61.5% jdk 0.0482s 0.0089s 5.42x | 52MB 39MB -25.0% linux 0.7043s 0.1129s 6.24x | 438MB 270MB -38.4% llvm-project 2.6844s 0.4873s 5.51x | 384MB 282MB -26.6% rails 0.2784s 0.0484s 5.75x | 88MB 51MB -42.1% rust 0.7757s 0.0619s 12.53x | 345MB 89MB -74.3% tensorflow 0.6258s 0.0642s 9.74x | 233MB 85MB -63.5% webkit 1.9137s 0.3332s 5.74x | 941MB 480MB -49.0% --- commit-graph.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- commit-graph.h | 2 ++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 3210ec2f93..f9a21ecdfb 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -327,6 +327,15 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, } break; + case GRAPH_CHUNKID_MODIFIED_PATH_BLOOM_FILTERS: + if (graph->chunk_mpbf_filters) + chunk_repeated = 1; + else { + graph->chunk_mpbf_filters = data + chunk_offset; + graph->chunk_mpbf_filters_size = next_chunk_offset - chunk_offset; + } + break; + case GRAPH_CHUNKID_MODIFIED_PATH_BLOOM_FILTER_EXCLUDES: if (graph->chunk_mpbf_excludes) chunk_repeated = 1; @@ -830,6 +839,7 @@ static int load_modified_path_bloom_filter_from_graph( struct commit *parent, struct bloom_filter *bf) { const uint8_t *bloom_index; + uint64_t offset; int first_parent = 0; if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH) @@ -857,9 +867,39 @@ static int load_modified_path_bloom_filter_from_graph( bf->nr_bits = GRAPH_MODIFIED_PATH_BLOOM_FILTER_EMBEDDED_NR_BITS; bf->bits = (uint8_t*) bloom_index; return 1; + } else if (bloom_index[0] & (1 << 6)) { + /* + * Modified path Bloom filters for second..nth parents of + * merge commits are not implemented yet. + */ + return 0; + } else { + if (!first_parent) + return 0; + offset = get_be64(bloom_index); } - /* support for non-embedded Bloom filters is not implemented yet. */ - return 0; + + if (!graph->chunk_mpbf_filters) + BUG("commit %s refers to offset %lu of the Modified Path Bloom Filters chunk, but that chunk is missing", + oid_to_hex(&commit->object.oid), offset); + + if (offset + sizeof(uint32_t) >= graph->chunk_mpbf_filters_size) + BUG("commit %s refers to offset %lu of the Modified Path Bloom Filters chunk, but that's too large for chunk of size %lu bytes", + oid_to_hex(&commit->object.oid), offset, + graph->chunk_mpbf_filters_size); + + bf->nr_bits = get_be32(graph->chunk_mpbf_filters + offset); + if (!bf->nr_bits) + BUG("commit %s has a modified path Bloom filter at offset %lu, which has zero size", + oid_to_hex(&commit->object.oid), offset); + if (offset + sizeof(uint32_t) + bloom_filter_bytes(bf) > graph->chunk_mpbf_filters_size) + BUG("commit %s has a modified path Bloom filter of %u bits at offset %lu, which doesn't fit into a Modified Path Bloom Filters chunk of %lu bytes", + oid_to_hex(&commit->object.oid), bf->nr_bits, offset, + graph->chunk_mpbf_filters_size); + /* Casting away const-ness :( */ + bf->bits = (uint8_t*)(graph->chunk_mpbf_filters + offset + sizeof(uint32_t)); + + return 1; } enum bloom_result check_modified_path_bloom_filter(struct repository *r, diff --git a/commit-graph.h b/commit-graph.h index 09dfc16932..cde0d7fa30 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -69,6 +69,8 @@ struct commit_graph { const unsigned char *chunk_base_graphs; const unsigned char *chunk_mpbf_index; uint64_t chunk_mpbf_index_size; + const unsigned char *chunk_mpbf_filters; + uint64_t chunk_mpbf_filters_size; const unsigned char *chunk_mpbf_excludes; uint8_t num_modified_path_bloom_hashes; -- 2.27.0.rc1.431.g5c813f95dc