From: Arthur Chan <arthur.chan@xxxxxxxxxxxxx> fuzz-cmd-base.c / fuzz-cmd-base.h provides base functions for fuzzing on git command which are compatible with libFuzzer (and possibly other fuzzing engines). fuzz-cmd-status.c provides first git command fuzzing target as a demonstration of the approach. CC: Josh Steadmon <steadmon@xxxxxxxxxx> Signed-off-by: Arthur Chan <arthur.chan@xxxxxxxxxxxxx> --- fuzz: add basic fuzz testing for git command An initial attempt to create LibFuzzer compatible fuzzer for git command. fuzz-cmd-base.c / fuzz-cmd-base.h provides base functions for fuzzing on git command which are compatible with libFuzzer (and possibly other fuzzing engines). fuzz-cmd-status.c provides first git command fuzzing target as a demonstration of the approach. Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1351%2Farthurscchan%2Ffuzz-git-cmd-status-v1 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1351/arthurscchan/fuzz-git-cmd-status-v1 Pull-Request: https://github.com/gitgitgadget/git/pull/1351 .gitignore | 2 + Makefile | 2 + fuzz-cmd-base.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++ fuzz-cmd-base.h | 13 ++++++ fuzz-cmd-status.c | 68 +++++++++++++++++++++++++++ 5 files changed, 202 insertions(+) create mode 100644 fuzz-cmd-base.c create mode 100644 fuzz-cmd-base.h create mode 100644 fuzz-cmd-status.c diff --git a/.gitignore b/.gitignore index 80b530bbed2..5d0ce214164 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ /fuzz_corpora /fuzz-pack-headers /fuzz-pack-idx +/fuzz-cmd-base +/fuzz-cmd-status /GIT-BUILD-OPTIONS /GIT-CFLAGS /GIT-LDFLAGS diff --git a/Makefile b/Makefile index c6e126e54c2..20742935073 100644 --- a/Makefile +++ b/Makefile @@ -689,6 +689,7 @@ ETAGS_TARGET = TAGS FUZZ_OBJS += fuzz-commit-graph.o FUZZ_OBJS += fuzz-pack-headers.o FUZZ_OBJS += fuzz-pack-idx.o +FUZZ_OBJS += fuzz-cmd-status.o .PHONY: fuzz-objs fuzz-objs: $(FUZZ_OBJS) @@ -961,6 +962,7 @@ LIB_OBJS += fsck.o LIB_OBJS += fsmonitor.o LIB_OBJS += fsmonitor-ipc.o LIB_OBJS += fsmonitor-settings.o +LIB_OBJS += fuzz-cmd-base.o LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o diff --git a/fuzz-cmd-base.c b/fuzz-cmd-base.c new file mode 100644 index 00000000000..98f05c78372 --- /dev/null +++ b/fuzz-cmd-base.c @@ -0,0 +1,117 @@ +#include "cache.h" +#include "fuzz-cmd-base.h" + + +/* + * This function is used to randomize the content of a file with the + * random data. The random data normally come from the fuzzing engine + * LibFuzzer in order to create randomization of the git file worktree + * and possibly messing up of certain git config file to fuzz different + * git command execution logic. + */ +void randomize_git_file(char *dir, char *name, char *data_chunk, int data_size) { + char fname[256]; + FILE *fp; + + snprintf(fname, 255, "%s/%s", dir, name); + + fp = fopen(fname, "wb"); + if (fp) { + fwrite(data_chunk, 1, data_size, fp); + fclose(fp); + } +} + +/* + * This function is the variants of the above functions which takes + * in a set of target files to be processed. These target file are + * passing to the above function one by one for content rewrite. + */ +void randomize_git_files(char *dir, char *name_set[], int files_count, char *data, int size) { + int data_size = size / files_count; + + for(int i=0; i<files_count; i++) { + char *data_chunk = malloc(data_size); + memcpy(data_chunk, data + (i * data_size), data_size); + + randomize_git_file(dir, name_set[i], data_chunk, data_size); + + free(data_chunk); + } +} + +/* + * Instead of randomizing the content of existing files. This helper + * function helps generate a temp file with random file name before + * passing to the above functions to get randomized content for later + * fuzzing of git command + */ +void generate_random_file(char *data, int size) { + unsigned char *hash = malloc(size); + char *fname = malloc((size*2)+12); + char *data_chunk = malloc(size); + + memcpy(hash, data, size); + memcpy(data_chunk, data + size, size); + + snprintf(fname, size*2+11, "TEMP-%s-TEMP", hash_to_hex(hash)); + randomize_git_file(".", fname, data_chunk, size); + + free(hash); + free(fname); + free(data_chunk); +} + +/* + * This function helps to generate random commit and build up a + * worktree with randomization to provide a target for the fuzzing + * of git commands. + */ +void generate_commit(char *data, int size) { + int ret = 0; + char *data_chunk = malloc(size * 2); + memcpy(data_chunk, data, size * 2); + + generate_random_file(data_chunk, size); + ret += system("git add TEMP-*-TEMP"); + ret += system("git commit -m\"New Commit\""); + + free(data_chunk); +} + +/* + * In some cases, there maybe some fuzzing logic that will mess + * up with the git repository and its configuration and settings. + * This function aims to reset the git repository into the default + * base settings before each round of fuzzing. + */ +int reset_git_folder(void) { + int ret = 0; + + ret += system("rm -rf ./.git"); + ret += system("rm -f ./TEMP-*-TEMP"); + ret += system("git init"); + ret += system("git config --global user.name \"FUZZ\""); + ret += system("git config --global user.email \"FUZZ@LOCALHOST\""); + ret += system("git config --global --add safe.directory '*'"); + ret += system("git add ./TEMP_1 ./TEMP_2"); + ret += system("git commit -m\"First Commit\""); + + return ret; +} + +/* + * This helper function returns the maximum number of commit can + * be generated by the provided random data without reusing the + * data to increase randomization of the fuzzing target and allow + * more path of fuzzing to be covered. + */ +int get_max_commit_count(int data_size, int git_files_count, int hash_size) { + int count = (data_size - 4 - git_files_count * 2) / (hash_size * 2); + + if(count > 20) { + count = 20; + } + + return count; +} diff --git a/fuzz-cmd-base.h b/fuzz-cmd-base.h new file mode 100644 index 00000000000..d63e46eac75 --- /dev/null +++ b/fuzz-cmd-base.h @@ -0,0 +1,13 @@ +#ifndef FUZZ_CMD_BASE_H +#define FUZZ_CMD_BASE_H + +#define HASH_SIZE 20 + +void randomize_git_files(char *dir, char *name_set[], int files_count, char *data, int size); +void randomize_git_file(char *dir, char *name, char *data_chunk, int data_size); +void generate_random_file(char *data, int size); +void generate_commit(char *data, int size); +int reset_git_folder(void); +int get_max_commit_count(int data_size, int git_files_count, int hash_size); + +#endif diff --git a/fuzz-cmd-status.c b/fuzz-cmd-status.c new file mode 100644 index 00000000000..b02410a1259 --- /dev/null +++ b/fuzz-cmd-status.c @@ -0,0 +1,68 @@ +#include "builtin.h" +#include "repository.h" +#include "fuzz-cmd-base.h" + +int cmd_status(int argc, const char **argv, const char *prefix); + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + int no_of_commit; + int max_commit_count; + char *argv[2]; + char *data_chunk; + char *basedir = "./.git"; + + /* + * Initialize the repository + */ + initialize_the_repository(); + + max_commit_count = get_max_commit_count(size, 0, HASH_SIZE); + + /* + * End this round of fuzzing if the data is not large enough + */ + if (size <= (HASH_SIZE * 2 + 4)) { + repo_clear(the_repository); + return 0; + } + + if (reset_git_folder()) { + repo_clear(the_repository); + return 0; + } + + /* + * Generate random commit + */ + no_of_commit = (*((int *)data)) % max_commit_count + 1; + data += 4; + size -= 4; + + for (int i=0; i<no_of_commit; i++) { + data_chunk = malloc(HASH_SIZE * 2); + memcpy(data_chunk, data, HASH_SIZE * 2); + generate_commit(data_chunk, HASH_SIZE); + data += (HASH_SIZE * 2); + size -= (HASH_SIZE * 2); + free(data_chunk); + } + + /* + * Final preparing of the repository settings + */ + repo_clear(the_repository); + repo_init(the_repository, basedir, "."); + + /* + * Calling target git command + */ + argv[0] = "status"; + argv[1] = "-v"; + cmd_status(2, (const char **)argv, (const char *)""); + + repo_clear(the_repository); + + return 0; +} base-commit: dd3f6c4cae7e3b15ce984dce8593ff7569650e24 -- gitgitgadget