Teach git-commit-graph to read commit graph files and summarize their contents. Use the read subcommand to verify the contents of a commit graph file in the tests. Signed-off-by: Derrick Stolee <dstolee@xxxxxxxxxxxxx> --- Documentation/git-commit-graph.txt | 16 ++++ builtin/commit-graph.c | 71 ++++++++++++++++++ commit-graph.c | 147 +++++++++++++++++++++++++++++++++++++ commit-graph.h | 23 ++++++ t/t5318-commit-graph.sh | 34 +++++++-- 5 files changed, 286 insertions(+), 5 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 55dfe5c3d8..67e107f06a 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -9,6 +9,7 @@ git-commit-graph - Write and verify Git commit graphs (.graph files) SYNOPSIS -------- [verse] +'git commit-graph read' <options> [--pack-dir <pack_dir>] 'git commit-graph write' <options> [--pack-dir <pack_dir>] @@ -34,6 +35,15 @@ Includes all commits from the existing commit graph file. Outputs the checksum hash of the written file. +'read':: + +Read a graph file given by the graph-head file and output basic +details about the graph file. ++ +With `--graph-hash=<hash>` option, consider the graph file +graph-<hash>.graph in the pack directory. + + EXAMPLES -------- @@ -43,6 +53,12 @@ EXAMPLES $ git commit-graph write ------------------------------------------------ +* Read basic information from a graph file. ++ +------------------------------------------------ +$ git commit-graph read --graph-hash=<hash> +------------------------------------------------ + GIT --- diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 5dac033bfe..3ffa7ec433 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -5,10 +5,16 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--pack-dir <packdir>]"), + N_("git commit-graph read [--graph-hash=<hash>]"), N_("git commit-graph write [--pack-dir <packdir>]"), NULL }; +static const char * const builtin_commit_graph_read_usage[] = { + N_("git commit-graph read [--pack-dir <packdir>]"), + NULL +}; + static const char * const builtin_commit_graph_write_usage[] = { N_("git commit-graph write [--pack-dir <packdir>]"), NULL @@ -16,8 +22,71 @@ static const char * const builtin_commit_graph_write_usage[] = { static struct opts_commit_graph { const char *pack_dir; + const char *graph_hash; } opts; +static int graph_read(int argc, const char **argv) +{ + struct object_id graph_hash; + struct commit_graph *graph = 0; + const char *graph_file; + + static struct option builtin_commit_graph_read_options[] = { + { OPTION_STRING, 'p', "pack-dir", &opts.pack_dir, + N_("dir"), + N_("The pack directory to store the graph") }, + { OPTION_STRING, 'H', "graph-hash", &opts.graph_hash, + N_("hash"), + N_("A hash for a specific graph file in the pack-dir."), + PARSE_OPT_OPTARG, NULL, (intptr_t) "" }, + OPT_END(), + }; + + argc = parse_options(argc, argv, NULL, + builtin_commit_graph_read_options, + builtin_commit_graph_read_usage, 0); + + if (!opts.pack_dir) { + struct strbuf path = STRBUF_INIT; + strbuf_addstr(&path, get_object_directory()); + strbuf_addstr(&path, "/pack"); + opts.pack_dir = strbuf_detach(&path, NULL); + } + + if (opts.graph_hash && strlen(opts.graph_hash) == GIT_MAX_HEXSZ) + get_oid_hex(opts.graph_hash, &graph_hash); + else + die("no graph hash specified"); + + graph_file = get_commit_graph_filename_hash(opts.pack_dir, &graph_hash); + graph = load_commit_graph_one(graph_file, opts.pack_dir); + + if (!graph) + die("graph file %s does not exist", graph_file); + + printf("header: %08x %02x %02x %02x %02x\n", + ntohl(*(uint32_t*)graph->data), + *(unsigned char*)(graph->data + 4), + *(unsigned char*)(graph->data + 5), + graph->hash_len, + graph->num_chunks); + printf("num_commits: %u\n", graph->num_commits); + printf("chunks:"); + + if (graph->chunk_oid_fanout) + printf(" oid_fanout"); + if (graph->chunk_oid_lookup) + printf(" oid_lookup"); + if (graph->chunk_commit_data) + printf(" commit_metadata"); + if (graph->chunk_large_edges) + printf(" large_edges"); + printf("\n"); + + printf("pack_dir: %s\n", graph->pack_dir); + return 0; +} + static int graph_write(int argc, const char **argv) { struct object_id *graph_hash; @@ -70,6 +139,8 @@ int cmd_commit_graph(int argc, const char **argv, const char *prefix) PARSE_OPT_STOP_AT_NON_OPTION); if (argc > 0) { + if (!strcmp(argv[0], "read")) + return graph_read(argc, argv); if (!strcmp(argv[0], "write")) return graph_write(argc, argv); } diff --git a/commit-graph.c b/commit-graph.c index cb47b68871..9a337cea4d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -51,6 +51,153 @@ char* get_commit_graph_filename_hash(const char *pack_dir, return strbuf_detach(&path, &len); } +static struct commit_graph *alloc_commit_graph(int extra) +{ + struct commit_graph *g = xmalloc(st_add(sizeof(*g), extra)); + memset(g, 0, sizeof(*g)); + g->graph_fd = -1; + + return g; +} + +static int close_commit_graph(struct commit_graph *g) +{ + if (g->graph_fd < 0) + return 0; + + munmap((void *)g->data, g->data_len); + g->data = 0; + + close(g->graph_fd); + g->graph_fd = -1; + + return 1; +} + +static void free_commit_graph(struct commit_graph **g) +{ + if (!g || !*g) + return; + + close_commit_graph(*g); + FREE_AND_NULL(*g); +} + +struct commit_graph *load_commit_graph_one(const char *graph_file, const char *pack_dir) +{ + void *graph_map; + const unsigned char *data, *chunk_lookup; + size_t graph_size; + struct stat st; + uint32_t i; + struct commit_graph *graph; + int fd = git_open(graph_file); + uint64_t last_chunk_offset; + uint32_t last_chunk_id; + uint32_t graph_signature; + unsigned char graph_version, hash_version; + + if (fd < 0) + return 0; + if (fstat(fd, &st)) { + close(fd); + return 0; + } + graph_size = xsize_t(st.st_size); + + if (graph_size < GRAPH_MIN_SIZE) { + close(fd); + die("graph file %s is too small", graph_file); + } + graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0); + data = (const unsigned char *)graph_map; + + graph_signature = ntohl(*(uint32_t*)data); + if (graph_signature != GRAPH_SIGNATURE) { + munmap(graph_map, graph_size); + close(fd); + die("graph signature %X does not match signature %X", + graph_signature, GRAPH_SIGNATURE); + } + + graph_version = *(unsigned char*)(data + 4); + if (graph_version != GRAPH_VERSION) { + munmap(graph_map, graph_size); + close(fd); + die("graph version %X does not match version %X", + graph_version, GRAPH_VERSION); + } + + hash_version = *(unsigned char*)(data + 5); + if (hash_version != GRAPH_OID_VERSION) { + munmap(graph_map, graph_size); + close(fd); + die("hash version %X does not match version %X", + hash_version, GRAPH_OID_VERSION); + } + + graph = alloc_commit_graph(strlen(pack_dir) + 1); + + graph->hash_len = *(unsigned char*)(data + 6); + graph->num_chunks = *(unsigned char*)(data + 7); + graph->graph_fd = fd; + graph->data = graph_map; + graph->data_len = graph_size; + + last_chunk_id = 0; + last_chunk_offset = 8; + chunk_lookup = data + 8; + for (i = 0; i < graph->num_chunks; i++) { + uint32_t chunk_id = get_be32(chunk_lookup + 0); + uint64_t chunk_offset1 = get_be32(chunk_lookup + 4); + uint32_t chunk_offset2 = get_be32(chunk_lookup + 8); + uint64_t chunk_offset = (chunk_offset1 << 32) | chunk_offset2; + + chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; + + if (chunk_offset > graph_size - GIT_MAX_RAWSZ) + die("improper chunk offset %08x%08x", (uint32_t)(chunk_offset >> 32), + (uint32_t)chunk_offset); + + switch (chunk_id) { + case GRAPH_CHUNKID_OIDFANOUT: + graph->chunk_oid_fanout = data + chunk_offset; + break; + + case GRAPH_CHUNKID_OIDLOOKUP: + graph->chunk_oid_lookup = data + chunk_offset; + break; + + case GRAPH_CHUNKID_DATA: + graph->chunk_commit_data = data + chunk_offset; + break; + + case GRAPH_CHUNKID_LARGEEDGES: + graph->chunk_large_edges = data + chunk_offset; + break; + + case 0: + break; + + default: + free_commit_graph(&graph); + die("unrecognized graph chunk id: %08x", chunk_id); + } + + if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP) + { + graph->num_commits = (chunk_offset - last_chunk_offset) + / graph->hash_len; + } + + last_chunk_id = chunk_id; + last_chunk_offset = chunk_offset; + } + + strcpy(graph->pack_dir, pack_dir); + return graph; +} + static void write_graph_chunk_fanout(struct sha1file *f, struct commit **commits, int nr_commits) diff --git a/commit-graph.h b/commit-graph.h index 4756f6ba5b..c1608976b3 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -7,6 +7,29 @@ extern char* get_commit_graph_filename_hash(const char *pack_dir, struct object_id *hash); +struct commit_graph { + int graph_fd; + + const unsigned char *data; + size_t data_len; + + unsigned char hash_len; + unsigned char num_chunks; + uint32_t num_commits; + struct object_id oid; + + const unsigned char *chunk_oid_fanout; + const unsigned char *chunk_oid_lookup; + const unsigned char *chunk_commit_data; + const unsigned char *chunk_large_edges; + + /* something like ".git/objects/pack" */ + char pack_dir[FLEX_ARRAY]; /* more */ +}; + +extern struct commit_graph *load_commit_graph_one(const char *graph_file, + const char *pack_dir); + extern struct object_id *write_commit_graph(const char *pack_dir); #endif diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index b762587595..ad1d0e621d 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -21,9 +21,21 @@ test_expect_success 'create commits and repack' ' done && git repack' +graph_read_expect() { + cat >expect <<- EOF + header: 43475048 01 01 14 04 + num_commits: $1 + chunks: oid_fanout oid_lookup commit_metadata large_edges + pack_dir: $2 + EOF +} + test_expect_success 'write graph' ' graph1=$(git commit-graph write) && - test_path_is_file $packdir/graph-$graph1.graph' + test_path_is_file $packdir/graph-$graph1.graph && + git commit-graph read --graph-hash=$graph1 >output && + graph_read_expect "3" "$packdir" && + test_cmp expect output' test_expect_success 'Add more commits' ' git reset --hard commits/1 && @@ -62,7 +74,10 @@ test_expect_success 'Add more commits' ' test_expect_success 'write graph with merges' ' graph2=$(git commit-graph write)&& - test_path_is_file $packdir/graph-$graph2.graph' + test_path_is_file $packdir/graph-$graph2.graph && + git commit-graph read --graph-hash=$graph2 >output && + graph_read_expect "10" "$packdir" && + test_cmp expect output' test_expect_success 'Add one more commit' ' test_commit 8 && @@ -85,14 +100,20 @@ test_expect_success 'Add one more commit' ' test_expect_success 'write graph with new commit' ' graph3=$(git commit-graph write) && - test_path_is_file $packdir/graph-$graph3.graph' - + test_path_is_file $packdir/graph-$graph3.graph && + test_path_is_file $packdir/graph-$graph3.graph && + git commit-graph read --graph-hash=$graph3 >output && + graph_read_expect "11" "$packdir" && + test_cmp expect output' test_expect_success 'write graph with nothing new' ' graph4=$(git commit-graph write) && test_path_is_file $packdir/graph-$graph4.graph && printf $graph3 >expect && printf $graph4 >output && + test_cmp expect output && + git commit-graph read --graph-hash=$graph4 >output && + graph_read_expect "11" "$packdir" && test_cmp expect output' test_expect_success 'setup bare repo' ' @@ -103,7 +124,10 @@ test_expect_success 'setup bare repo' ' test_expect_success 'write graph in bare repo' ' graphbare=$(git commit-graph write) && - test_path_is_file $baredir/graph-$graphbare.graph' + test_path_is_file $baredir/graph-$graphbare.graph && + git commit-graph read --graph-hash=$graphbare >output && + graph_read_expect "11" "$baredir" && + test_cmp expect output' test_done -- 2.15.1.45.g9b7079f