[PATCH] fuzz: add basic fuzz testing for git command

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Arthur Chan <arthur.chan@xxxxxxxxxxxxx>

fuzz-cmd-base.c / fuzz-cmd-base.h provides base functions for
fuzzing on git command which are compatible with libFuzzer
(and possibly other fuzzing engines).
fuzz-cmd-status.c provides first git command fuzzing target
as a demonstration of the approach.

CC: Josh Steadmon <steadmon@xxxxxxxxxx>
Signed-off-by: Arthur Chan <arthur.chan@xxxxxxxxxxxxx>
---
    fuzz: add basic fuzz testing for git command
    
    An initial attempt to create LibFuzzer compatible fuzzer for git
    command. fuzz-cmd-base.c / fuzz-cmd-base.h provides base functions for
    fuzzing on git command which are compatible with libFuzzer (and possibly
    other fuzzing engines). fuzz-cmd-status.c provides first git command
    fuzzing target as a demonstration of the approach.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1351%2Farthurscchan%2Ffuzz-git-cmd-status-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1351/arthurscchan/fuzz-git-cmd-status-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/1351

 .gitignore        |   2 +
 Makefile          |   2 +
 fuzz-cmd-base.c   | 117 ++++++++++++++++++++++++++++++++++++++++++++++
 fuzz-cmd-base.h   |  13 ++++++
 fuzz-cmd-status.c |  68 +++++++++++++++++++++++++++
 5 files changed, 202 insertions(+)
 create mode 100644 fuzz-cmd-base.c
 create mode 100644 fuzz-cmd-base.h
 create mode 100644 fuzz-cmd-status.c

diff --git a/.gitignore b/.gitignore
index 80b530bbed2..5d0ce214164 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 /fuzz_corpora
 /fuzz-pack-headers
 /fuzz-pack-idx
+/fuzz-cmd-base
+/fuzz-cmd-status
 /GIT-BUILD-OPTIONS
 /GIT-CFLAGS
 /GIT-LDFLAGS
diff --git a/Makefile b/Makefile
index c6e126e54c2..20742935073 100644
--- a/Makefile
+++ b/Makefile
@@ -689,6 +689,7 @@ ETAGS_TARGET = TAGS
 FUZZ_OBJS += fuzz-commit-graph.o
 FUZZ_OBJS += fuzz-pack-headers.o
 FUZZ_OBJS += fuzz-pack-idx.o
+FUZZ_OBJS += fuzz-cmd-status.o
 .PHONY: fuzz-objs
 fuzz-objs: $(FUZZ_OBJS)
 
@@ -961,6 +962,7 @@ LIB_OBJS += fsck.o
 LIB_OBJS += fsmonitor.o
 LIB_OBJS += fsmonitor-ipc.o
 LIB_OBJS += fsmonitor-settings.o
+LIB_OBJS += fuzz-cmd-base.o
 LIB_OBJS += gettext.o
 LIB_OBJS += gpg-interface.o
 LIB_OBJS += graph.o
diff --git a/fuzz-cmd-base.c b/fuzz-cmd-base.c
new file mode 100644
index 00000000000..98f05c78372
--- /dev/null
+++ b/fuzz-cmd-base.c
@@ -0,0 +1,117 @@
+#include "cache.h"
+#include "fuzz-cmd-base.h"
+
+
+/*
+ * This function is used to randomize the content of a file with the
+ * random data. The random data normally come from the fuzzing engine
+ * LibFuzzer in order to create randomization of the git file worktree
+ * and possibly messing up of certain git config file to fuzz different
+ * git command execution logic.
+ */
+void randomize_git_file(char *dir, char *name, char *data_chunk, int data_size) {
+   char fname[256];
+   FILE *fp;
+
+   snprintf(fname, 255, "%s/%s", dir, name);
+
+   fp = fopen(fname, "wb");
+   if (fp) {
+      fwrite(data_chunk, 1, data_size, fp);
+      fclose(fp);
+   }
+}
+
+/*
+ * This function is the variants of the above functions which takes
+ * in a set of target files to be processed. These target file are
+ * passing to the above function one by one for content rewrite.
+ */
+void randomize_git_files(char *dir, char *name_set[], int files_count, char *data, int size) {
+   int data_size = size / files_count;
+
+   for(int i=0; i<files_count; i++) {
+      char *data_chunk = malloc(data_size);
+      memcpy(data_chunk, data + (i * data_size), data_size);
+
+      randomize_git_file(dir, name_set[i], data_chunk, data_size);
+
+      free(data_chunk);
+   }
+}
+
+/*
+ * Instead of randomizing the content of existing files. This helper
+ * function helps generate a temp file with random file name before
+ * passing to the above functions to get randomized content for later
+ * fuzzing of git command
+ */
+void generate_random_file(char *data, int size) {
+   unsigned char *hash = malloc(size);
+   char *fname = malloc((size*2)+12);
+   char *data_chunk = malloc(size);
+
+   memcpy(hash, data, size);
+   memcpy(data_chunk, data + size, size);
+
+   snprintf(fname, size*2+11, "TEMP-%s-TEMP", hash_to_hex(hash));
+   randomize_git_file(".", fname, data_chunk, size);
+
+   free(hash);
+   free(fname);
+   free(data_chunk);
+}
+
+/*
+ * This function helps to generate random commit and build up a
+ * worktree with randomization to provide a target for the fuzzing
+ * of git commands.
+ */
+void generate_commit(char *data, int size) {
+   int ret = 0;
+   char *data_chunk = malloc(size * 2);
+   memcpy(data_chunk, data, size * 2);
+
+   generate_random_file(data_chunk, size);
+   ret += system("git add TEMP-*-TEMP");
+   ret += system("git commit -m\"New Commit\"");
+
+   free(data_chunk);
+}
+
+/*
+ * In some cases, there maybe some fuzzing logic that will mess
+ * up with the git repository and its configuration and settings.
+ * This function aims to reset the git repository into the default
+ * base settings before each round of fuzzing.
+ */
+int reset_git_folder(void) {
+   int ret = 0;
+
+   ret += system("rm -rf ./.git");
+   ret += system("rm -f ./TEMP-*-TEMP");
+   ret += system("git init");
+   ret += system("git config --global user.name \"FUZZ\"");
+   ret += system("git config --global user.email \"FUZZ@LOCALHOST\"");
+   ret += system("git config --global --add safe.directory '*'");
+   ret += system("git add ./TEMP_1 ./TEMP_2");
+   ret += system("git commit -m\"First Commit\"");
+
+   return ret;
+}
+
+/*
+ * This helper function returns the maximum number of commit can
+ * be generated by the provided random data without reusing the
+ * data to increase randomization of the fuzzing target and allow
+ * more path of fuzzing to be covered.
+ */
+int get_max_commit_count(int data_size, int git_files_count, int hash_size) {
+   int count = (data_size - 4 - git_files_count * 2) / (hash_size * 2);
+
+   if(count > 20) {
+      count = 20;
+   }
+
+   return count;
+}
diff --git a/fuzz-cmd-base.h b/fuzz-cmd-base.h
new file mode 100644
index 00000000000..d63e46eac75
--- /dev/null
+++ b/fuzz-cmd-base.h
@@ -0,0 +1,13 @@
+#ifndef FUZZ_CMD_BASE_H
+#define FUZZ_CMD_BASE_H
+
+#define HASH_SIZE 20
+
+void randomize_git_files(char *dir, char *name_set[], int files_count, char *data, int size);
+void randomize_git_file(char *dir, char *name, char *data_chunk, int data_size);
+void generate_random_file(char *data, int size);
+void generate_commit(char *data, int size);
+int reset_git_folder(void);
+int get_max_commit_count(int data_size, int git_files_count, int hash_size);
+
+#endif
diff --git a/fuzz-cmd-status.c b/fuzz-cmd-status.c
new file mode 100644
index 00000000000..b02410a1259
--- /dev/null
+++ b/fuzz-cmd-status.c
@@ -0,0 +1,68 @@
+#include "builtin.h"
+#include "repository.h"
+#include "fuzz-cmd-base.h"
+
+int cmd_status(int argc, const char **argv, const char *prefix);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+   int no_of_commit;
+   int max_commit_count;
+   char *argv[2];
+   char *data_chunk;
+   char *basedir = "./.git";
+
+   /*
+    *  Initialize the repository
+    */
+   initialize_the_repository();
+
+   max_commit_count = get_max_commit_count(size, 0, HASH_SIZE);
+
+   /*
+    * End this round of fuzzing if the data is not large enough
+    */
+   if (size <= (HASH_SIZE * 2 + 4)) {
+      repo_clear(the_repository);
+      return 0;
+   }
+
+   if (reset_git_folder()) {
+      repo_clear(the_repository);
+      return 0;
+   }
+
+   /*
+    * Generate random commit
+    */
+   no_of_commit = (*((int *)data)) % max_commit_count + 1;
+   data += 4;
+   size -= 4;
+
+   for (int i=0; i<no_of_commit; i++) {
+      data_chunk = malloc(HASH_SIZE * 2);
+      memcpy(data_chunk, data, HASH_SIZE * 2);
+      generate_commit(data_chunk, HASH_SIZE);
+      data += (HASH_SIZE * 2);
+      size -= (HASH_SIZE * 2);
+      free(data_chunk);
+   }
+
+   /*
+    * Final preparing of the repository settings
+    */
+   repo_clear(the_repository);
+   repo_init(the_repository, basedir, ".");
+
+   /*
+    * Calling target git command
+    */
+   argv[0] = "status";
+   argv[1] = "-v";
+   cmd_status(2, (const char **)argv, (const char *)"");
+
+   repo_clear(the_repository);
+
+   return 0;
+}

base-commit: dd3f6c4cae7e3b15ce984dce8593ff7569650e24
-- 
gitgitgadget



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux