[PATCH 1/2] Add support for multi threaded checkout

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This speeds up operations like 'git clone' on NFS drives tremendously, but
slows down the same operations on local disks.

Partitioning the work and launching threads is done in unpack-trees.c.  The code
is mostly copied from preload_index.c.  The maximum number of threads is set to
8, which seemed to give a reasonable tradeoff between performance improvement on
NFS and degradation on local disks.

Some code was added to entry.c for serialization.  Most of the contents of
checkout_entry and write_entry are serialized, except writing the checked out
files to disk.
---
 entry.c        |   42 +++++++++++++++++---
 unpack-trees.c |  115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+), 6 deletions(-)

diff --git a/entry.c b/entry.c
index aa2ee46..764d2db 100644
--- a/entry.c
+++ b/entry.c
@@ -1,6 +1,21 @@
 #include "cache.h"
 #include "blob.h"
 
+#ifdef NO_PTHREADS
+
+#define checkout_lock()		(void)0
+#define checkout_unlock()	(void)0
+
+#else
+
+#include <pthread.h>
+
+static pthread_mutex_t checkout_mutex = PTHREAD_MUTEX_INITIALIZER;
+#define checkout_lock()		pthread_mutex_lock(&checkout_mutex)
+#define checkout_unlock()	pthread_mutex_unlock(&checkout_mutex)
+
+#endif
+
 static void create_directories(const char *path, const struct checkout *state)
 {
 	int len = strlen(path);
@@ -100,7 +115,7 @@ static void *read_blob_entry(struct cache_entry *ce, const char *path, unsigned
 
 static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
 {
-	int fd;
+	int fd, retval;
 	long wrote;
 
 	switch (ce->ce_mode & S_IFMT) {
@@ -109,10 +124,15 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 		unsigned long size;
 
 	case S_IFREG:
+		checkout_lock();
 		new = read_blob_entry(ce, path, &size);
-		if (!new)
-			return error("git checkout-index: unable to read sha1 file of %s (%s)",
+
+		if (!new) {
+			retval = error("git checkout-index: unable to read sha1 file of %s (%s)",
 				path, sha1_to_hex(ce->sha1));
+			checkout_unlock();
+			return retval;
+		}
 
 		/*
 		 * Convert from git internal format to working tree format
@@ -124,6 +144,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 			new = strbuf_detach(&buf, &newsize);
 			size = newsize;
 		}
+		checkout_unlock();
 
 		if (to_tempfile) {
 			strcpy(path, ".merge_file_XXXXXX");
@@ -143,10 +164,17 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 			return error("git checkout-index: unable to write file %s", path);
 		break;
 	case S_IFLNK:
+		checkout_lock();
 		new = read_blob_entry(ce, path, &size);
-		if (!new)
-			return error("git checkout-index: unable to read sha1 file of %s (%s)",
+
+		if (!new) {
+			retval = error("git checkout-index: unable to read sha1 file of %s (%s)",
 				path, sha1_to_hex(ce->sha1));
+			checkout_unlock();
+			return retval;
+		}
+		checkout_unlock();
+
 		if (to_tempfile || !has_symlinks) {
 			if (to_tempfile) {
 				strcpy(path, ".merge_link_XXXXXX");
@@ -192,7 +220,7 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 
 int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath)
 {
-	static char path[PATH_MAX + 1];
+	char path[PATH_MAX + 1];
 	struct stat st;
 	int len = state->base_dir_len;
 
@@ -229,6 +257,8 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *t
 			return error("unable to unlink old '%s' (%s)", path, strerror(errno));
 	} else if (state->not_new)
 		return 0;
+	checkout_lock();
 	create_directories(path, state);
+	checkout_unlock();
 	return write_entry(ce, path, state, 0);
 }
diff --git a/unpack-trees.c b/unpack-trees.c
index 54f301d..30b9862 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -8,6 +8,10 @@
 #include "progress.h"
 #include "refs.h"
 
+#ifndef NO_PTHREADS
+#include <pthread.h>
+#endif
+
 /*
  * Error messages expected by scripts out of plumbing commands such as
  * read-tree.  Non-scripted Porcelain is not required to use these messages
@@ -85,6 +89,115 @@ static void unlink_entry(struct cache_entry *ce)
 }
 
 static struct checkout state;
+
+#ifdef NO_PTHREADS
+#define progress_lock()		(void)0
+#define progress_unlock()	(void)0
+
+static int threaded_checkout(struct index_state *index, int update, struct progress *prog, unsigned *prog_cnt)
+{
+	return 0; /* do nothing */
+}
+
+#else
+
+#include <pthread.h>
+
+static pthread_mutex_t progress_mutex = PTHREAD_MUTEX_INITIALIZER;
+#define progress_lock()		pthread_mutex_lock(&progress_mutex)
+#define progress_unlock()	pthread_mutex_unlock(&progress_mutex)
+
+/*
+ * Mostly randomly chosen maximum thread counts: we
+ * cap the parallelism to 8 threads, and we want
+ * to have at least 500 files per thread for it to
+ * be worth starting a thread.
+ */
+#define MAX_PARALLEL (8)
+#define THREAD_COST (500)
+
+struct thread_data {
+	pthread_t pthread;
+	struct index_state *index;
+	struct checkout *state;
+	int update, offset, nr, errs;
+	struct progress *progress;
+	unsigned *progress_cnt;
+};
+
+static void *checkout_thread(void *_data)
+{
+	int nr;
+	struct thread_data *p = _data;
+	struct index_state *index = p->index;
+	struct cache_entry **cep = index->cache + p->offset;
+
+	p->errs = 0;
+
+	nr = p->nr;
+	if (0 == nr) {
+		return NULL;
+	}
+
+	if (nr + p->offset > index->cache_nr)
+		nr = index->cache_nr - p->offset;
+
+	do {
+		struct cache_entry *ce = *cep++;
+
+		if (ce->ce_flags & CE_UPDATE) {
+			progress_lock();
+			display_progress(p->progress, ++(*p->progress_cnt));
+			progress_unlock();
+			ce->ce_flags &= ~CE_UPDATE;
+			if (p->update) {
+				p->errs |= checkout_entry(ce, p->state, NULL);
+				fflush(stdout);
+			}
+		}
+	} while (--nr > 0);
+	return NULL;
+}
+
+static int threaded_checkout(struct index_state *index, int update, struct progress *prog, unsigned *prog_cnt)
+{
+	int threads, work, offset, i;
+	struct thread_data data[MAX_PARALLEL];
+	int errs = 0;
+
+	threads = index->cache_nr / THREAD_COST;
+	if (threads > MAX_PARALLEL)
+		threads = MAX_PARALLEL;
+	else if (threads == 0)
+		return 0;
+
+	offset = 0;
+	work = (index->cache_nr + threads - 1) / threads;
+	for (i = 0; i < threads; i++) {
+		struct thread_data *p = data+i;
+		p->index = index;
+		p->offset = offset;
+		p->nr = work;
+		p->state = &state;
+		p->update = update;
+		p->progress = prog;
+		p->progress_cnt = prog_cnt;
+		offset += work;
+		if (pthread_create(&p->pthread, NULL, checkout_thread, p))
+			die("unable to create threaded checkout");
+	}
+	for (i = 0; i < threads; i++) {
+		struct thread_data *p = data+i;
+		if (pthread_join(p->pthread, NULL))
+			die("unable to join threaded checkout");
+		errs |= p->errs;
+	}
+
+	return errs;
+}
+
+#endif
+
 static int check_updates(struct unpack_trees_options *o)
 {
 	unsigned cnt = 0, total = 0;
@@ -118,6 +231,8 @@ static int check_updates(struct unpack_trees_options *o)
 		}
 	}
 
+	errs |= threaded_checkout(index, o->update, progress, &cnt);
+
 	for (i = 0; i < index->cache_nr; i++) {
 		struct cache_entry *ce = index->cache[i];
 
-- 
1.6.0.4.1116.gc5d7

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux