[PATCH v2 0/3] read-cache: speed up index load through parallelization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The big changes in this itteration are:

- Switched to index.threads to provide control over the use of threading

- Added another worker thread to load the index extensions in parallel

- Applied optimization expand_name_field() suggested by Duy

The net result of these optimizations is a savings of 25.8% (1,000,000 files)
to 38.1% (100,000 files) as measured by p0002-read-cache.sh.

This patch conflicts with Duy's patch to remove the double memory copy and
pass in the previous ce instead.  The two will need to be merged/reconciled
once they settle down a bit.


Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/39f2b0f5fe
Checkout: git fetch https://github.com/benpeart/git read-index-multithread-v2 && git checkout 39f2b0f5fe


### Interdiff (v1..v2):

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 3344685cc4..79f8296d9c 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -899,14 +899,6 @@ relatively high IO latencies.  When enabled, Git will do the
 index comparison to the filesystem data in parallel, allowing
 overlapping IO's.  Defaults to true.
 
-core.fastIndex::
-       Enable parallel index loading
-+
-This can speed up operations like 'git diff' and 'git status' especially
-when the index is very large.  When enabled, Git will do the index
-loading from the on disk format to the in-memory format in parallel.
-Defaults to true.
-
 core.createObject::
 	You can set this to 'link', in which case a hardlink followed by
 	a delete of the source are used to make sure that object creation
@@ -2399,6 +2391,12 @@ imap::
 	The configuration variables in the 'imap' section are described
 	in linkgit:git-imap-send[1].
 
+index.threads::
+	Specifies the number of threads to spawn when loading the index.
+	This is meant to reduce index load time on multiprocessor machines.
+	Specifying 0 or 'true' will cause Git to auto-detect the number of
+	CPU's and set the number of threads accordingly. Defaults to 'true'.
+
 index.version::
 	Specify the version with which new index files should be
 	initialized.  This does not affect existing repositories.
diff --git a/config.c b/config.c
index 883092fdd3..3bda124550 100644
--- a/config.c
+++ b/config.c
@@ -2289,17 +2289,18 @@ int git_config_get_fsmonitor(void)
 	return 0;
 }
 
-int git_config_get_fast_index(void)
+int git_config_get_index_threads(void)
 {
-	int val;
+	int is_bool, val;
 
-	if (!git_config_get_maybe_bool("core.fastindex", &val))
+	if (!git_config_get_bool_or_int("index.threads", &is_bool, &val)) {
+		if (is_bool)
+			return val ? 0 : 1;
+		else
 			return val;
+	}
 
-	if (getenv("GIT_FASTINDEX_TEST"))
-		return 1;
-
-	return -1; /* default value */
+	return 0; /* auto-detect */
 }
 
 NORETURN
diff --git a/config.h b/config.h
index 74ca4e7db5..a06027e69b 100644
--- a/config.h
+++ b/config.h
@@ -250,7 +250,7 @@ extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
 extern int git_config_get_max_percent_split_change(void);
 extern int git_config_get_fsmonitor(void);
-extern int git_config_get_fast_index(void);
+extern int git_config_get_index_threads(void);
 
 /* This dies if the configured or default date is in the future */
 extern int git_config_get_expiry(const char *key, const char **output);
diff --git a/read-cache.c b/read-cache.c
index 0fa7e1a04c..f5e7c86c42 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -24,10 +24,6 @@
 #include "utf8.h"
 #include "fsmonitor.h"
 
-#ifndef min
-#define min(a,b) (((a) < (b)) ? (a) : (b))
-#endif
-
 /* Mask for the name length in ce_flags in the on-disk index */
 
 #define CE_NAMEMASK  (0x0fff)
@@ -1758,9 +1754,8 @@ static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
 
 	if (name->len < len)
 		die("malformed name field in the index");
-	strbuf_remove(name, name->len - len, len);
-	for (ep = cp; *ep; ep++)
-		; /* find the end */
+	strbuf_setlen(name, name->len - len);
+	ep = cp + strlen((const char *)cp);
 	strbuf_add(name, cp, ep - cp);
 	return (const char *)ep + 1 - cp_;
 }
@@ -1893,7 +1888,13 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
 	return ondisk_size + entries * per_entry;
 }
 
-static unsigned long load_cache_entry_block(struct index_state *istate, struct mem_pool *ce_mem_pool, int offset, int nr, void *mmap, unsigned long start_offset, struct strbuf *previous_name)
+/*
+ * A helper function that will load the specified range of cache entries
+ * from the memory mapped file and add them to the given index.
+ */
+static unsigned long load_cache_entry_block(struct index_state *istate,
+			struct mem_pool *ce_mem_pool, int offset, int nr, void *mmap,
+			unsigned long start_offset, struct strbuf *previous_name)
 {
 	int i;
 	unsigned long src_offset = start_offset;
@@ -1912,7 +1913,8 @@ static unsigned long load_cache_entry_block(struct index_state *istate, struct m
 	return src_offset - start_offset;
 }
 
-static unsigned long load_all_cache_entries(struct index_state *istate, void *mmap, size_t mmap_size, unsigned long src_offset)
+static unsigned long load_all_cache_entries(struct index_state *istate,
+			void *mmap, size_t mmap_size, unsigned long src_offset)
 {
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 	unsigned long consumed;
@@ -1927,7 +1929,8 @@ static unsigned long load_all_cache_entries(struct index_state *istate, void *mm
 			      estimate_cache_size(mmap_size, istate->cache_nr));
 	}
 
-	consumed = load_cache_entry_block(istate, istate->ce_mem_pool, 0, istate->cache_nr, mmap, src_offset, previous_name);
+	consumed = load_cache_entry_block(istate, istate->ce_mem_pool,
+					0, istate->cache_nr, mmap, src_offset, previous_name);
 	strbuf_release(&previous_name_buf);
 	return consumed;
 }
@@ -1955,67 +1958,110 @@ struct load_cache_entries_thread_data
 	struct mem_pool *ce_mem_pool;
 	int offset, nr;
 	void *mmap;
+	size_t mmap_size;
 	unsigned long start_offset;
 	struct strbuf previous_name_buf;
 	struct strbuf *previous_name;
 	unsigned long consumed;	/* return # of bytes in index file processed */
 };
 
-/*
-* A thread proc to run the load_cache_entries() computation
-* across multiple background threads.
-*/
 static void *load_cache_entries_thread(void *_data)
 {
 	struct load_cache_entries_thread_data *p = _data;
 
-	p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, p->offset, p->nr, p->mmap, p->start_offset, p->previous_name);
+	p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool,
+		p->offset, p->nr, p->mmap, p->start_offset, p->previous_name);
+	return NULL;
+}
+
+static void *load_index_extensions_thread(void *_data)
+{
+	struct load_cache_entries_thread_data *p = _data;
+	unsigned long src_offset = p->start_offset;
+
+	while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
+		/* After an array of active_nr index entries,
+		 * there can be arbitrary number of extended
+		 * sections, each of which is prefixed with
+		 * extension name (4-byte) and section length
+		 * in 4-byte network byte order.
+		 */
+		uint32_t extsize;
+		memcpy(&extsize, (char *)p->mmap + src_offset + 4, 4);
+		extsize = ntohl(extsize);
+		if (read_index_extension(p->istate,
+								(const char *)p->mmap + src_offset,
+								(char *)p->mmap + src_offset + 8,
+								extsize) < 0) {
+			munmap(p->mmap, p->mmap_size);
+			die("index file corrupt");
+		}
+		src_offset += 8;
+		src_offset += extsize;
+	}
+	p->consumed += src_offset - p->start_offset;
+
 	return NULL;
 }
 
-static unsigned long load_cache_entries(struct index_state *istate, void *mmap, size_t mmap_size, unsigned long src_offset)
+static unsigned long load_cache_entries(struct index_state *istate,
+			void *mmap, size_t mmap_size, unsigned long src_offset)
 {
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 	struct load_cache_entries_thread_data *data;
-	int threads, cpus, thread_nr;
+	int nr_threads, cpus, ce_per_thread;
 	unsigned long consumed;
 	int i, thread;
 
+	nr_threads = git_config_get_index_threads();
+	if (!nr_threads) {
 		cpus = online_cpus();
-	threads = istate->cache_nr / THREAD_COST;
-	if (threads > cpus)
-		threads = cpus;
+		nr_threads = istate->cache_nr / THREAD_COST;
+		if (nr_threads > cpus)
+			nr_threads = cpus;
+	}
 
 	/* enable testing with fewer than default minimum of entries */
-	if ((istate->cache_nr > 1) && (threads < 2) && getenv("GIT_FASTINDEX_TEST"))
-		threads = 2;
+	if ((istate->cache_nr > 1) && (nr_threads < 2) && git_env_bool("GIT_INDEX_THREADS_TEST", 0))
+		nr_threads = 2;
 
-	if (threads < 2 || !git_config_get_fast_index())
+	if (nr_threads < 2)
 		return load_all_cache_entries(istate, mmap, mmap_size, src_offset);
 
+	/* a little sanity checking */
+	if (istate->name_hash_initialized)
+		die("the name hash isn't thread safe");
+
 	mem_pool_init(&istate->ce_mem_pool, 0);
 	if (istate->version == 4)
 		previous_name = &previous_name_buf;
 	else
 		previous_name = NULL;
 
-	thread_nr = (istate->cache_nr + threads - 1) / threads;
-	data = xcalloc(threads, sizeof(struct load_cache_entries_thread_data));
+	/* allocate an extra thread for loading the index extensions */
+	ce_per_thread = DIV_ROUND_UP(istate->cache_nr, nr_threads);
+	data = xcalloc(nr_threads + 1, sizeof(struct load_cache_entries_thread_data));
 
-	/* loop through index entries starting a thread for every thread_nr entries */
+	/*
+	 * Loop through index entries starting a thread for every ce_per_thread
+	 * entries.
+	 */
 	consumed = thread = 0;
-	for (i = 0; ; i++) {
+	for (i = 0; i < istate->cache_nr; i++) {
 		struct ondisk_cache_entry *ondisk;
 		const char *name;
 		unsigned int flags;
 
-		/* we've reached the begining of a block of cache entries, kick off a thread to process them */
-		if (0 == i % thread_nr) {
+		/*
+		 * we've reached the beginning of a block of cache entries,
+		 * kick off a thread to process them
+		 */
+		if (0 == i % ce_per_thread) {
 			struct load_cache_entries_thread_data *p = &data[thread];
 
 			p->istate = istate;
 			p->offset = i;
-			p->nr = min(thread_nr, istate->cache_nr - i);
+			p->nr = ce_per_thread < istate->cache_nr - i ? ce_per_thread : istate->cache_nr - i;
 
 			/* create a mem_pool for each thread */
 			if (istate->version == 4)
@@ -2034,8 +2080,8 @@ static unsigned long load_cache_entries(struct index_state *istate, void *mmap,
 
 			if (pthread_create(&p->pthread, NULL, load_cache_entries_thread, p))
 				die("unable to create load_cache_entries_thread");
-			if (++thread == threads || p->nr != thread_nr)
-				break;
+
+			++thread;
 		}
 
 		ondisk = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
@@ -2064,7 +2110,18 @@ static unsigned long load_cache_entries(struct index_state *istate, void *mmap,
 			src_offset += (name - ((char *)ondisk)) + expand_name_field(previous_name, name);
 	}
 
-	for (i = 0; i < threads; i++) {
+	/* create a thread to load the index extensions */
+	struct load_cache_entries_thread_data *p = &data[thread];
+	p->istate = istate;
+	mem_pool_init(&p->ce_mem_pool, 0);
+	p->mmap = mmap;
+	p->mmap_size = mmap_size;
+	p->start_offset = src_offset;
+
+	if (pthread_create(&p->pthread, NULL, load_index_extensions_thread, p))
+		die("unable to create load_index_extensions_thread");
+
+	for (i = 0; i < nr_threads + 1; i++) {
 		struct load_cache_entries_thread_data *p = data + i;
 		if (pthread_join(p->pthread, NULL))
 			die("unable to join load_cache_entries_thread");


### Patches

Ben Peart (3):
  read-cache: speed up index load through parallelization
  read-cache: load cache extensions on worker thread
  read-cache: micro-optimize expand_name_field() to speed up V4 index
    parsing.

 Documentation/config.txt |   6 +
 config.c                 |  14 ++
 config.h                 |   1 +
 read-cache.c             | 281 +++++++++++++++++++++++++++++++++++----
 4 files changed, 275 insertions(+), 27 deletions(-)


base-commit: 29d9e3e2c47dd4b5053b0a98c891878d398463e3
-- 
2.18.0.windows.1






[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux