Josh Steadmon <steadmon@xxxxxxxxxx> writes: > Breaks load_commit_graph_one() into a new function, s/Breaks/Break/ > parse_commit_graph(). The latter function operates on arbitrary buffers, > which makes it suitable as a fuzzing target. Since parse_commit_graph() > is only called by load_commit_graph_one() (and the fuzzer described > below), we omit error messages that would be duplicated by the caller. > > Adds fuzz-commit-graph.c, which provides a fuzzing entry point > compatible with libFuzzer (and possibly other fuzzing engines). > > Signed-off-by: Josh Steadmon <steadmon@xxxxxxxxxx> > --- > .gitignore | 1 + > Makefile | 1 + > commit-graph.c | 53 ++++++++++++++++++++++++++++++--------------- > commit-graph.h | 3 +++ > fuzz-commit-graph.c | 16 ++++++++++++++ > 5 files changed, 57 insertions(+), 17 deletions(-) > create mode 100644 fuzz-commit-graph.c > > diff --git a/.gitignore b/.gitignore > index 0d77ea5894..8bcf153ed9 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -1,3 +1,4 @@ > +/fuzz-commit-graph > /fuzz_corpora > /fuzz-pack-headers > /fuzz-pack-idx > diff --git a/Makefile b/Makefile > index 1a44c811aa..6b72f37c29 100644 > --- a/Makefile > +++ b/Makefile > @@ -684,6 +684,7 @@ SCRIPTS = $(SCRIPT_SH_INS) \ > > ETAGS_TARGET = TAGS > > +FUZZ_OBJS += fuzz-commit-graph.o > FUZZ_OBJS += fuzz-pack-headers.o > FUZZ_OBJS += fuzz-pack-idx.o > > diff --git a/commit-graph.c b/commit-graph.c > index 40c855f185..07dd410f3c 100644 > --- a/commit-graph.c > +++ b/commit-graph.c > @@ -84,16 +84,10 @@ static int commit_graph_compatible(struct repository *r) > struct commit_graph *load_commit_graph_one(const char *graph_file) > { > void *graph_map; > - const unsigned char *data, *chunk_lookup; > size_t graph_size; > struct stat st; > - uint32_t i; > - struct commit_graph *graph; > + struct commit_graph *ret; > int fd = git_open(graph_file); > - uint64_t last_chunk_offset; > - uint32_t last_chunk_id; > - uint32_t graph_signature; > - unsigned char graph_version, hash_version; > > if (fd < 0) > return NULL; > @@ -108,27 +102,55 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) > die(_("graph file %s is too small"), graph_file); > } > graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0); > + ret = parse_commit_graph(graph_map, fd, graph_size); > + > + if (!ret) { > + munmap(graph_map, graph_size); > + close(fd); > + exit(1); > + } > + > + return ret; > +} Looks like a reasonable splitting of a helper function with a clean interface. I like it. Thanks. > +struct commit_graph *parse_commit_graph(void *graph_map, int fd, > + size_t graph_size) > +{ > + const unsigned char *data, *chunk_lookup; > + uint32_t i; > + struct commit_graph *graph; > + uint64_t last_chunk_offset; > + uint32_t last_chunk_id; > + uint32_t graph_signature; > + unsigned char graph_version, hash_version; > + > + if (!graph_map) > + return NULL; > + > + if (graph_size < GRAPH_MIN_SIZE) > + return NULL; > + > data = (const unsigned char *)graph_map; > > graph_signature = get_be32(data); > if (graph_signature != GRAPH_SIGNATURE) { > error(_("graph signature %X does not match signature %X"), > graph_signature, GRAPH_SIGNATURE); > - goto cleanup_fail; > + return NULL; > } > > graph_version = *(unsigned char*)(data + 4); > if (graph_version != GRAPH_VERSION) { > error(_("graph version %X does not match version %X"), > graph_version, GRAPH_VERSION); > - goto cleanup_fail; > + return NULL; > } > > hash_version = *(unsigned char*)(data + 5); > if (hash_version != GRAPH_OID_VERSION) { > error(_("hash version %X does not match version %X"), > hash_version, GRAPH_OID_VERSION); > - goto cleanup_fail; > + return NULL; > } > > graph = alloc_commit_graph(); > @@ -152,7 +174,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) > if (chunk_offset > graph_size - GIT_MAX_RAWSZ) { > error(_("improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32), > (uint32_t)chunk_offset); > - goto cleanup_fail; > + free(graph); > + return NULL; > } > > switch (chunk_id) { > @@ -187,7 +210,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) > > if (chunk_repeated) { > error(_("chunk id %08x appears multiple times"), chunk_id); > - goto cleanup_fail; > + free(graph); > + return NULL; > } > > if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP) > @@ -201,11 +225,6 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) > } > > return graph; > - > -cleanup_fail: > - munmap(graph_map, graph_size); > - close(fd); > - exit(1); > } > > static void prepare_commit_graph_one(struct repository *r, const char *obj_dir) > diff --git a/commit-graph.h b/commit-graph.h > index 9db40b4d3a..813e7c19f1 100644 > --- a/commit-graph.h > +++ b/commit-graph.h > @@ -54,6 +54,9 @@ struct commit_graph { > > struct commit_graph *load_commit_graph_one(const char *graph_file); > > +struct commit_graph *parse_commit_graph(void *graph_map, int fd, > + size_t graph_size); > + > /* > * Return 1 if and only if the repository has a commit-graph > * file and generation numbers are computed in that file. > diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c > new file mode 100644 > index 0000000000..cf790c9d04 > --- /dev/null > +++ b/fuzz-commit-graph.c > @@ -0,0 +1,16 @@ > +#include "commit-graph.h" > + > +struct commit_graph *parse_commit_graph(void *graph_map, int fd, > + size_t graph_size); > + > +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); > + > +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) > +{ > + struct commit_graph *g; > + > + g = parse_commit_graph((void *)data, -1, size); > + free(g); > + > + return 0; > +}