[PATCH 2/2] init: support --fast-import using "git fast-import" as backend

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I was curious to see how git-import script performed in C version.
The result is quite good (only tried once so might be incorrect).
The patch is not ready for submission though. Sorry for patch numbering,
forgot to add "-n".

Tests were performed with "echo 3 > /proc/sys/vm/drop_caches". Test repo is
gentoo-x86, which consists of ~11k small text files.

~/t/gentoo-x86 $ time ~/git/git/git init -M
Initialized empty Git repository in /home/pclouds/t/gentoo-x86/.git/

real    7m30.663s
user    0m33.171s
sys     3m31.599s
~/t/gentoo-x86 $ du -s .git
73700   .git
~/t/gentoo-x86 $ find .git/|wc -l
31
~/t/gentoo-x86 $ git ls-files|wc -l
113012

~/t/gentoo-x86 $ rm .git/ -rf
~/t/gentoo-x86 $ time ~/git/git/git init -m
Initialized empty Git repository in /home/pclouds/t/gentoo-x86/.git/

real    10m5.114s
user    0m13.718s
sys     3m50.317s
~/t/gentoo-x86 $ du -s .git
521960  .git
~/t/gentoo-x86 $ find .git/|wc -l
123275

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 builtin-init-db.c |  108 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/builtin-init-db.c b/builtin-init-db.c
index 3ace4ca..42c126c 100644
--- a/builtin-init-db.c
+++ b/builtin-init-db.c
@@ -386,6 +386,105 @@ static int import_files(const char *import_message)
 	return run_command_v_opt(args, RUN_GIT_CMD);
 }
 
+static void fast_import_recursive(int fdo, const char *path)
+{
+	DIR *fdir;
+	char fullname[PATH_MAX + 1];
+	int len, dtype, fdi;
+	struct dirent *de;
+	struct stat st;
+	const char *fdir_path;
+	char buf[4096];
+	int buflen;
+	struct strbuf s;
+
+	fdir_path = path ? path : ".";
+	fdir = opendir(fdir_path);
+	if (!fdir)
+		die("Could not open directory %s", fdir_path);
+
+	if (path) {
+		len = strlen(path);
+		memcpy(fullname, path, len);
+		fullname[len++] = '/';
+	}
+	else
+		len = 0;
+	strbuf_init(&s, 32);
+	while ((de = readdir(fdir)) != NULL) {
+		if (!strcmp(de->d_name, ".") ||
+		    !strcmp(de->d_name, "..") ||
+		    !strcmp(de->d_name, ".git"))
+			continue;
+		dtype = DTYPE(de);
+		memcpy(fullname+len, de->d_name, de->d_reclen+1);
+		if (lstat(fullname, &st))
+			die("Could not stat %s", fullname);
+		if (dtype == DT_UNKNOWN) {
+			if (S_ISREG(st.st_mode))
+				dtype = DT_REG;
+			if (S_ISDIR(st.st_mode))
+				dtype = DT_DIR;
+			if (S_ISLNK(st.st_mode))
+				dtype = DT_LNK;
+		}
+		switch (dtype) {
+		case DT_DIR:
+			fast_import_recursive(fdo, fullname);
+			break;
+		case DT_LNK:
+		case DT_REG:
+			fdi = open(fullname, O_RDONLY);
+			if (fdi == -1)
+				die("Could not open %s", fullname);
+			strbuf_setlen(&s, 0);
+			strbuf_addf(&s,"M 100644 inline %s\n", fullname);
+			strbuf_addf(&s, "data %u\n", (unsigned int)st.st_size); /* FIXME: large file */
+			write_or_die(fdo, s.buf, s.len);
+			while ((buflen = xread(fdi, buf, sizeof(buf))) > 0)
+				write_or_die(fdo, buf, buflen);
+			close(fdi);
+			write_or_die(fdo, "\n", 1);
+			break;
+		}
+	}
+	strbuf_release(&s);
+	closedir(fdir);
+}
+
+#define FAST_IMPORT_DEBUG 0
+
+static int fast_import_files(const char *import_message)
+{
+	struct child_process p;
+	const char *argv[4] = {"fast-import", "--quiet", "--date-format=raw", NULL};
+	struct strbuf s;
+
+	memset(&p, 0, sizeof(p));
+	p.argv = argv;
+	p.in = -1;
+	p.git_cmd = 1;
+#if FAST_IMPORT_DEBUG
+	p.in = 1;
+#else
+	if (start_command(&p))
+		die("Could not spawn fast-import");
+#endif
+	strbuf_init(&s, 64);
+	strbuf_addstr(&s, "commit refs/heads/master\n");
+	strbuf_addf(&s,   "committer %s\n", git_committer_info(0));
+	strbuf_addf(&s,   "data <<MSGEND\n%s\nMSGEND\n", import_message);
+	write_or_die(p.in, s.buf, s.len);
+	strbuf_release(&s);
+	fast_import_recursive(p.in, NULL);
+#if !FAST_IMPORT_DEBUG
+	close(p.in);
+	if (finish_command(&p))
+		die("fast-import died");
+#endif
+	return 0;
+}
+
 static const char init_db_usage[] =
 "git init [-q | --quiet] [--bare] [--template=<template-directory>] [--shared[=<permissions>]] [-m|--import [<message>]]";
 
@@ -401,7 +500,7 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
 	const char *template_dir = NULL;
 	unsigned int flags = 0;
 	const char *import_message = NULL;
-	int ret, i;
+	int ret, i, fast_import = 0;
 
 	for (i = 1; i < argc; i++, argv++) {
 		const char *arg = argv[1];
@@ -416,7 +515,10 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
 			init_shared_repository = PERM_GROUP;
 		else if (!prefixcmp(arg, "--shared="))
 			init_shared_repository = git_config_perm("arg", arg+9);
-		else if (!strcmp(arg, "--import") || !strcmp(arg, "-m")) {
+		else if (!strcmp(arg, "--import") || !strcmp(arg, "-m") ||
+			 !strcmp(arg, "--fast-import") || !strcmp(arg, "-M")) {
+			if (!strcmp(arg, "--fast-import") || !strcmp(arg, "-M"))
+				fast_import = 1;
 			if (i+1 >= argc)
 				import_message = "Initial commit";
 			else {
@@ -481,5 +583,5 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
 		return ret;
 	if (reinit)
 		die("--import does not work with already initialized repository");
-	return import_files(import_message);
+	return fast_import ? fast_import_files(import_message) : import_files(import_message);
 }
-- 
1.6.1.446.gc7851

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux