[PATCH 7/7] commit-graph: write file format v2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Derrick Stolee <derrickstolee@xxxxxxxxxx>

The commit-graph file format v2 exists so we can modify the meaning of
the Commit Data chunk to store corrected commit date offsets. Thus, we
trigger the write to use this different file format only if the
configured generation number version is 3.

The implementation needs to be careful of a few things to ensure we
enable computing corrected commit dates and do not compute topological
levels. We also still need the Generation Data Overflow chunk, but we
compute the offsets into that chunk while writing the Commit Data chunk
instead of the generation Data chunk.

Testing 'git merge-base v4.8 v4.9' in the Linux kernel with corrected
commit dates, but the only difference being the file format (between
generation number v2 and v3) we get these results:

Benchmark 1: generation number v2
  Time (mean ± σ):     144.4 ms ±   8.3 ms
  Range (min … max):   127.4 ms … 154.6 ms    20 runs

Benchmark 2: generation number v3
  Time (mean ± σ):     139.3 ms ±   7.3 ms
  Range (min … max):   125.1 ms … 148.1 ms    20 runs

This provides a 3.6% improvement, and the only reason is the reduced
I/O. This test was run with hot caches, so I re-ran it in the cold-cache
case, trying to demonstrate that this I/O cost is higher when reading
directly from disk every time:

Benchmark 1: generation number v2
  Time (mean ± σ):     469.9 ms ±  14.8 ms
  Range (min … max):   434.5 ms … 494.4 ms    10 runs

Benchmark 2: generation number v3
  Time (mean ± σ):     413.4 ms ±  18.9 ms
  Range (min … max):   372.8 ms … 428.3 ms    10 runs

With cold caches, the improvement increases to 13.4%.

Signed-off-by: Derrick Stolee <derrickstolee@xxxxxxxxxx>
---
 Documentation/config/commitgraph.txt |  4 ++-
 commit-graph.c                       | 46 +++++++++++++++++++++++++---
 commit.h                             |  1 +
 t/t5318-commit-graph.sh              | 25 ++++++++++++++-
 4 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/Documentation/config/commitgraph.txt b/Documentation/config/commitgraph.txt
index 30604e4a4c2..79d57d06a67 100644
--- a/Documentation/config/commitgraph.txt
+++ b/Documentation/config/commitgraph.txt
@@ -1,7 +1,9 @@
 commitGraph.generationVersion::
 	Specifies the type of generation number version to use when writing
 	or reading the commit-graph file. If version 1 is specified, then
-	the corrected commit dates will not be written or read. Defaults to
+	the corrected commit dates will not be written or read. If version
+	3 is specified, then the commit-graph file will be slightly smaller,
+	but will be incompatible with some old versions of Git. Defaults to
 	2.
 
 commitGraph.maxNewFilters::
diff --git a/commit-graph.c b/commit-graph.c
index 366fc4d6e41..82f7401b283 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1035,6 +1035,7 @@ struct write_commit_graph_context {
 	struct progress *progress;
 	int progress_done;
 	uint64_t progress_cnt;
+	int version;
 
 	char *base_graph_name;
 	int num_commit_graphs_before;
@@ -1118,12 +1119,14 @@ static int write_graph_chunk_data(struct hashfile *f,
 	struct commit **list = ctx->commits.list;
 	struct commit **last = ctx->commits.list + ctx->commits.nr;
 	uint32_t num_extra_edges = 0;
+	int num_generation_data_overflows = 0;
 
 	while (list < last) {
 		struct commit_list *parent;
 		struct object_id *tree;
 		int edge_value;
 		uint32_t packedDate[2];
+		uint32_t generation_data;
 		display_progress(ctx->progress, ++ctx->progress_cnt);
 
 		if (repo_parse_commit_no_graph(ctx->r, *list))
@@ -1203,7 +1206,18 @@ static int write_graph_chunk_data(struct hashfile *f,
 		else
 			packedDate[0] = 0;
 
-		packedDate[0] |= htonl(*topo_level_slab_at(ctx->topo_levels, *list) << 2);
+		if (ctx->version == GRAPH_VERSION_1)
+			generation_data = *topo_level_slab_at(ctx->topo_levels, *list);
+		else {
+			generation_data = commit_graph_data_at(*list)->generation - (*list)->date;
+			if (generation_data > GENERATION_NUMBER_V3_OFFSET_MAX) {
+				generation_data = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW_V3 |
+						  num_generation_data_overflows;
+				num_generation_data_overflows++;
+			}
+		}
+
+		packedDate[0] |= htonl(generation_data << 2);
 
 		packedDate[1] = htonl((*list)->date);
 		hashwrite(f, packedDate, 8);
@@ -1243,12 +1257,16 @@ static int write_graph_chunk_generation_data_overflow(struct hashfile *f,
 {
 	struct write_commit_graph_context *ctx = data;
 	int i;
+	timestamp_t offset_max = ctx->version >= 2 ?
+					GENERATION_NUMBER_V3_OFFSET_MAX :
+					GENERATION_NUMBER_V2_OFFSET_MAX;
+
 	for (i = 0; i < ctx->commits.nr; i++) {
 		struct commit *c = ctx->commits.list[i];
 		timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
 		display_progress(ctx->progress, ++ctx->progress_cnt);
 
-		if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
+		if (offset > offset_max) {
 			hashwrite_be32(f, offset >> 32);
 			hashwrite_be32(f, (uint32_t) offset);
 		}
@@ -1474,6 +1492,13 @@ static void compute_topological_levels(struct write_commit_graph_context *ctx)
 	int i;
 	struct commit_list *list = NULL;
 
+	/*
+	 * Skip topological levels if file format version is two or more,
+	 * since the Commit Data chunk uses corrected commit date offsets.
+	 */
+	if (ctx->version >= 2)
+		return;
+
 	if (ctx->report_progress)
 		ctx->progress = start_delayed_progress(
 					_("Computing commit graph topological levels"),
@@ -1526,6 +1551,9 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 {
 	int i;
 	struct commit_list *list = NULL;
+	timestamp_t offset_max = ctx->version >= 2 ?
+					GENERATION_NUMBER_V3_OFFSET_MAX :
+					GENERATION_NUMBER_V2_OFFSET_MAX;
 
 	if (ctx->report_progress)
 		ctx->progress = start_delayed_progress(
@@ -1585,7 +1613,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 	for (i = 0; i < ctx->commits.nr; i++) {
 		struct commit *c = ctx->commits.list[i];
 		timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
-		if (offset > GENERATION_NUMBER_V2_OFFSET_MAX)
+		if (offset > offset_max)
 			ctx->num_generation_data_overflows++;
 	}
 	stop_progress(&ctx->progress);
@@ -1908,7 +1936,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 	add_chunk(cf, GRAPH_CHUNKID_DATA, (hashsz + 16) * ctx->commits.nr,
 		  write_graph_chunk_data);
 
-	if (ctx->write_generation_data)
+	if (ctx->write_generation_data && ctx->version == GRAPH_VERSION_1)
 		add_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
 			  sizeof(uint32_t) * ctx->commits.nr,
 			  write_graph_chunk_generation_data);
@@ -1936,7 +1964,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 
 	hashwrite_be32(f, GRAPH_SIGNATURE);
 
-	hashwrite_u8(f, GRAPH_VERSION_1);
+	hashwrite_u8(f, ctx->version);
 	hashwrite_u8(f, oid_version());
 	hashwrite_u8(f, get_num_chunks(cf));
 	hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
@@ -2317,6 +2345,14 @@ int write_commit_graph(struct object_directory *odb,
 	ctx->write_generation_data = (get_configured_generation_version(r) == 2);
 	ctx->num_generation_data_overflows = 0;
 
+	if (get_configured_generation_version(r) == 3)
+		ctx->version = GRAPH_VERSION_2;
+	else
+		ctx->version = GRAPH_VERSION_1;
+
+	if (ctx->version >= GRAPH_VERSION_2)
+		ctx->write_generation_data = 1;
+
 	bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
 						      bloom_settings.bits_per_entry);
 	bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
diff --git a/commit.h b/commit.h
index 38cc5426615..a668b5cdec0 100644
--- a/commit.h
+++ b/commit.h
@@ -15,6 +15,7 @@
 #define GENERATION_NUMBER_V1_MAX 0x3FFFFFFF
 #define GENERATION_NUMBER_ZERO 0
 #define GENERATION_NUMBER_V2_OFFSET_MAX ((1ULL << 31) - 1)
+#define GENERATION_NUMBER_V3_OFFSET_MAX ((1ULL << 29) - 1)
 
 struct commit_list {
 	struct commit *item;
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index a14a13e5f7b..77e130ef63e 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -110,8 +110,13 @@ graph_read_expect() {
 	then
 		OPTIONS=" read_generation_data"
 	fi
+	VERSION=1
+	if test $GENERATION_VERSION -gt 2
+	then
+		VERSION=2
+	fi
 	cat >expect <<- EOF
-	header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
+	header: 43475048 $VERSION $(test_oid oid_version) $NUM_CHUNKS 0
 	num_commits: $1
 	chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL
 	options:$OPTIONS
@@ -343,6 +348,15 @@ test_expect_success 'build graph using --reachable' '
 graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1
 graph_git_behavior 'append graph, commit 8 vs merge 2' full commits/8 merge/2
 
+test_expect_success 'write file format v2 with generation number v3' '
+	cd "$TRASH_DIRECTORY/full" &&
+	git -c commitGraph.generationVersion=3 commit-graph write --reachable &&
+	graph_read_expect "11" "extra_edges" 3
+'
+
+graph_git_behavior 'graph v2, commit 8 vs merge 1' full commits/8 merge/1
+graph_git_behavior 'graph v2, commit 8 vs merge 2' full commits/8 merge/2
+
 test_expect_success 'setup bare repo' '
 	cd "$TRASH_DIRECTORY" &&
 	git clone --bare --no-local full bare &&
@@ -880,6 +894,15 @@ test_expect_success TIME_IS_64BIT,TIME_T_IS_64BIT 'set up and verify repo with g
 
 graph_git_behavior 'generation data overflow chunk repo' repo left right
 
+test_expect_success TIME_IS_64BIT,TIME_T_IS_64BIT 'set up and verify repo with generation data overflow chunk (v3)' '
+	cd "$TRASH_DIRECTORY/repo" &&
+	git -c commitGraph.generationVersion=3 commit-graph write --reachable &&
+	graph_read_expect 10 "generation_data_overflow" 3 &&
+	git commit-graph verify
+'
+
+graph_git_behavior 'generation data overflow chunk repo' repo left right
+
 # Do not add tests at the end of this file, unless they require 64-bit
 # timestamps, since this portion of the script is only executed when
 # time data types have 64 bits.
-- 
gitgitgadget



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux