I was curious to see how git-import script performed in C version. The result is quite good (only tried once so might be incorrect). The patch is not ready for submission though. Sorry for patch numbering, forgot to add "-n". Tests were performed with "echo 3 > /proc/sys/vm/drop_caches". Test repo is gentoo-x86, which consists of ~11k small text files. ~/t/gentoo-x86 $ time ~/git/git/git init -M Initialized empty Git repository in /home/pclouds/t/gentoo-x86/.git/ real 7m30.663s user 0m33.171s sys 3m31.599s ~/t/gentoo-x86 $ du -s .git 73700 .git ~/t/gentoo-x86 $ find .git/|wc -l 31 ~/t/gentoo-x86 $ git ls-files|wc -l 113012 ~/t/gentoo-x86 $ rm .git/ -rf ~/t/gentoo-x86 $ time ~/git/git/git init -m Initialized empty Git repository in /home/pclouds/t/gentoo-x86/.git/ real 10m5.114s user 0m13.718s sys 3m50.317s ~/t/gentoo-x86 $ du -s .git 521960 .git ~/t/gentoo-x86 $ find .git/|wc -l 123275 Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> --- builtin-init-db.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 105 insertions(+), 3 deletions(-) diff --git a/builtin-init-db.c b/builtin-init-db.c index 3ace4ca..42c126c 100644 --- a/builtin-init-db.c +++ b/builtin-init-db.c @@ -386,6 +386,105 @@ static int import_files(const char *import_message) return run_command_v_opt(args, RUN_GIT_CMD); } +static void fast_import_recursive(int fdo, const char *path) +{ + DIR *fdir; + char fullname[PATH_MAX + 1]; + int len, dtype, fdi; + struct dirent *de; + struct stat st; + const char *fdir_path; + char buf[4096]; + int buflen; + struct strbuf s; + + fdir_path = path ? path : "."; + fdir = opendir(fdir_path); + if (!fdir) + die("Could not open directory %s", fdir_path); + + if (path) { + len = strlen(path); + memcpy(fullname, path, len); + fullname[len++] = '/'; + } + else + len = 0; + strbuf_init(&s, 32); + while ((de = readdir(fdir)) != NULL) { + if (!strcmp(de->d_name, ".") || + !strcmp(de->d_name, "..") || + !strcmp(de->d_name, ".git")) + continue; + dtype = DTYPE(de); + memcpy(fullname+len, de->d_name, de->d_reclen+1); + if (lstat(fullname, &st)) + die("Could not stat %s", fullname); + if (dtype == DT_UNKNOWN) { + if (S_ISREG(st.st_mode)) + dtype = DT_REG; + if (S_ISDIR(st.st_mode)) + dtype = DT_DIR; + if (S_ISLNK(st.st_mode)) + dtype = DT_LNK; + } + switch (dtype) { + case DT_DIR: + fast_import_recursive(fdo, fullname); + break; + case DT_LNK: + case DT_REG: + fdi = open(fullname, O_RDONLY); + if (fdi == -1) + die("Could not open %s", fullname); + strbuf_setlen(&s, 0); + strbuf_addf(&s,"M 100644 inline %s\n", fullname); + strbuf_addf(&s, "data %u\n", (unsigned int)st.st_size); /* FIXME: large file */ + write_or_die(fdo, s.buf, s.len); + while ((buflen = xread(fdi, buf, sizeof(buf))) > 0) + write_or_die(fdo, buf, buflen); + close(fdi); + write_or_die(fdo, "\n", 1); + break; + } + } + strbuf_release(&s); + closedir(fdir); +} + +#define FAST_IMPORT_DEBUG 0 + +static int fast_import_files(const char *import_message) +{ + struct child_process p; + const char *argv[4] = {"fast-import", "--quiet", "--date-format=raw", NULL}; + struct strbuf s; + + memset(&p, 0, sizeof(p)); + p.argv = argv; + p.in = -1; + p.git_cmd = 1; +#if FAST_IMPORT_DEBUG + p.in = 1; +#else + if (start_command(&p)) + die("Could not spawn fast-import"); +#endif + strbuf_init(&s, 64); + strbuf_addstr(&s, "commit refs/heads/master\n"); + strbuf_addf(&s, "committer %s\n", git_committer_info(0)); + strbuf_addf(&s, "data <<MSGEND\n%s\nMSGEND\n", import_message); + write_or_die(p.in, s.buf, s.len); + strbuf_release(&s); + fast_import_recursive(p.in, NULL); +#if !FAST_IMPORT_DEBUG + close(p.in); + if (finish_command(&p)) + die("fast-import died"); +#endif + return 0; +} + static const char init_db_usage[] = "git init [-q | --quiet] [--bare] [--template=<template-directory>] [--shared[=<permissions>]] [-m|--import [<message>]]"; @@ -401,7 +500,7 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) const char *template_dir = NULL; unsigned int flags = 0; const char *import_message = NULL; - int ret, i; + int ret, i, fast_import = 0; for (i = 1; i < argc; i++, argv++) { const char *arg = argv[1]; @@ -416,7 +515,10 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) init_shared_repository = PERM_GROUP; else if (!prefixcmp(arg, "--shared=")) init_shared_repository = git_config_perm("arg", arg+9); - else if (!strcmp(arg, "--import") || !strcmp(arg, "-m")) { + else if (!strcmp(arg, "--import") || !strcmp(arg, "-m") || + !strcmp(arg, "--fast-import") || !strcmp(arg, "-M")) { + if (!strcmp(arg, "--fast-import") || !strcmp(arg, "-M")) + fast_import = 1; if (i+1 >= argc) import_message = "Initial commit"; else { @@ -481,5 +583,5 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) return ret; if (reinit) die("--import does not work with already initialized repository"); - return import_files(import_message); + return fast_import ? fast_import_files(import_message) : import_files(import_message); } -- 1.6.1.446.gc7851 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html