The big changes in this itteration are: - Switched to index.threads to provide control over the use of threading - Added another worker thread to load the index extensions in parallel - Applied optimization expand_name_field() suggested by Duy The net result of these optimizations is a savings of 25.8% (1,000,000 files) to 38.1% (100,000 files) as measured by p0002-read-cache.sh. This patch conflicts with Duy's patch to remove the double memory copy and pass in the previous ce instead. The two will need to be merged/reconciled once they settle down a bit. Base Ref: master Web-Diff: https://github.com/benpeart/git/commit/39f2b0f5fe Checkout: git fetch https://github.com/benpeart/git read-index-multithread-v2 && git checkout 39f2b0f5fe ### Interdiff (v1..v2): diff --git a/Documentation/config.txt b/Documentation/config.txt index 3344685cc4..79f8296d9c 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -899,14 +899,6 @@ relatively high IO latencies. When enabled, Git will do the index comparison to the filesystem data in parallel, allowing overlapping IO's. Defaults to true. -core.fastIndex:: - Enable parallel index loading -+ -This can speed up operations like 'git diff' and 'git status' especially -when the index is very large. When enabled, Git will do the index -loading from the on disk format to the in-memory format in parallel. -Defaults to true. - core.createObject:: You can set this to 'link', in which case a hardlink followed by a delete of the source are used to make sure that object creation @@ -2399,6 +2391,12 @@ imap:: The configuration variables in the 'imap' section are described in linkgit:git-imap-send[1]. +index.threads:: + Specifies the number of threads to spawn when loading the index. + This is meant to reduce index load time on multiprocessor machines. + Specifying 0 or 'true' will cause Git to auto-detect the number of + CPU's and set the number of threads accordingly. Defaults to 'true'. + index.version:: Specify the version with which new index files should be initialized. This does not affect existing repositories. diff --git a/config.c b/config.c index 883092fdd3..3bda124550 100644 --- a/config.c +++ b/config.c @@ -2289,17 +2289,18 @@ int git_config_get_fsmonitor(void) return 0; } -int git_config_get_fast_index(void) +int git_config_get_index_threads(void) { - int val; + int is_bool, val; - if (!git_config_get_maybe_bool("core.fastindex", &val)) + if (!git_config_get_bool_or_int("index.threads", &is_bool, &val)) { + if (is_bool) + return val ? 0 : 1; + else return val; + } - if (getenv("GIT_FASTINDEX_TEST")) - return 1; - - return -1; /* default value */ + return 0; /* auto-detect */ } NORETURN diff --git a/config.h b/config.h index 74ca4e7db5..a06027e69b 100644 --- a/config.h +++ b/config.h @@ -250,7 +250,7 @@ extern int git_config_get_untracked_cache(void); extern int git_config_get_split_index(void); extern int git_config_get_max_percent_split_change(void); extern int git_config_get_fsmonitor(void); -extern int git_config_get_fast_index(void); +extern int git_config_get_index_threads(void); /* This dies if the configured or default date is in the future */ extern int git_config_get_expiry(const char *key, const char **output); diff --git a/read-cache.c b/read-cache.c index 0fa7e1a04c..f5e7c86c42 100644 --- a/read-cache.c +++ b/read-cache.c @@ -24,10 +24,6 @@ #include "utf8.h" #include "fsmonitor.h" -#ifndef min -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - /* Mask for the name length in ce_flags in the on-disk index */ #define CE_NAMEMASK (0x0fff) @@ -1758,9 +1754,8 @@ static unsigned long expand_name_field(struct strbuf *name, const char *cp_) if (name->len < len) die("malformed name field in the index"); - strbuf_remove(name, name->len - len, len); - for (ep = cp; *ep; ep++) - ; /* find the end */ + strbuf_setlen(name, name->len - len); + ep = cp + strlen((const char *)cp); strbuf_add(name, cp, ep - cp); return (const char *)ep + 1 - cp_; } @@ -1893,7 +1888,13 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) return ondisk_size + entries * per_entry; } -static unsigned long load_cache_entry_block(struct index_state *istate, struct mem_pool *ce_mem_pool, int offset, int nr, void *mmap, unsigned long start_offset, struct strbuf *previous_name) +/* + * A helper function that will load the specified range of cache entries + * from the memory mapped file and add them to the given index. + */ +static unsigned long load_cache_entry_block(struct index_state *istate, + struct mem_pool *ce_mem_pool, int offset, int nr, void *mmap, + unsigned long start_offset, struct strbuf *previous_name) { int i; unsigned long src_offset = start_offset; @@ -1912,7 +1913,8 @@ static unsigned long load_cache_entry_block(struct index_state *istate, struct m return src_offset - start_offset; } -static unsigned long load_all_cache_entries(struct index_state *istate, void *mmap, size_t mmap_size, unsigned long src_offset) +static unsigned long load_all_cache_entries(struct index_state *istate, + void *mmap, size_t mmap_size, unsigned long src_offset) { struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; unsigned long consumed; @@ -1927,7 +1929,8 @@ static unsigned long load_all_cache_entries(struct index_state *istate, void *mm estimate_cache_size(mmap_size, istate->cache_nr)); } - consumed = load_cache_entry_block(istate, istate->ce_mem_pool, 0, istate->cache_nr, mmap, src_offset, previous_name); + consumed = load_cache_entry_block(istate, istate->ce_mem_pool, + 0, istate->cache_nr, mmap, src_offset, previous_name); strbuf_release(&previous_name_buf); return consumed; } @@ -1955,67 +1958,110 @@ struct load_cache_entries_thread_data struct mem_pool *ce_mem_pool; int offset, nr; void *mmap; + size_t mmap_size; unsigned long start_offset; struct strbuf previous_name_buf; struct strbuf *previous_name; unsigned long consumed; /* return # of bytes in index file processed */ }; -/* -* A thread proc to run the load_cache_entries() computation -* across multiple background threads. -*/ static void *load_cache_entries_thread(void *_data) { struct load_cache_entries_thread_data *p = _data; - p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, p->offset, p->nr, p->mmap, p->start_offset, p->previous_name); + p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, + p->offset, p->nr, p->mmap, p->start_offset, p->previous_name); + return NULL; +} + +static void *load_index_extensions_thread(void *_data) +{ + struct load_cache_entries_thread_data *p = _data; + unsigned long src_offset = p->start_offset; + + while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) { + /* After an array of active_nr index entries, + * there can be arbitrary number of extended + * sections, each of which is prefixed with + * extension name (4-byte) and section length + * in 4-byte network byte order. + */ + uint32_t extsize; + memcpy(&extsize, (char *)p->mmap + src_offset + 4, 4); + extsize = ntohl(extsize); + if (read_index_extension(p->istate, + (const char *)p->mmap + src_offset, + (char *)p->mmap + src_offset + 8, + extsize) < 0) { + munmap(p->mmap, p->mmap_size); + die("index file corrupt"); + } + src_offset += 8; + src_offset += extsize; + } + p->consumed += src_offset - p->start_offset; + return NULL; } -static unsigned long load_cache_entries(struct index_state *istate, void *mmap, size_t mmap_size, unsigned long src_offset) +static unsigned long load_cache_entries(struct index_state *istate, + void *mmap, size_t mmap_size, unsigned long src_offset) { struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; struct load_cache_entries_thread_data *data; - int threads, cpus, thread_nr; + int nr_threads, cpus, ce_per_thread; unsigned long consumed; int i, thread; + nr_threads = git_config_get_index_threads(); + if (!nr_threads) { cpus = online_cpus(); - threads = istate->cache_nr / THREAD_COST; - if (threads > cpus) - threads = cpus; + nr_threads = istate->cache_nr / THREAD_COST; + if (nr_threads > cpus) + nr_threads = cpus; + } /* enable testing with fewer than default minimum of entries */ - if ((istate->cache_nr > 1) && (threads < 2) && getenv("GIT_FASTINDEX_TEST")) - threads = 2; + if ((istate->cache_nr > 1) && (nr_threads < 2) && git_env_bool("GIT_INDEX_THREADS_TEST", 0)) + nr_threads = 2; - if (threads < 2 || !git_config_get_fast_index()) + if (nr_threads < 2) return load_all_cache_entries(istate, mmap, mmap_size, src_offset); + /* a little sanity checking */ + if (istate->name_hash_initialized) + die("the name hash isn't thread safe"); + mem_pool_init(&istate->ce_mem_pool, 0); if (istate->version == 4) previous_name = &previous_name_buf; else previous_name = NULL; - thread_nr = (istate->cache_nr + threads - 1) / threads; - data = xcalloc(threads, sizeof(struct load_cache_entries_thread_data)); + /* allocate an extra thread for loading the index extensions */ + ce_per_thread = DIV_ROUND_UP(istate->cache_nr, nr_threads); + data = xcalloc(nr_threads + 1, sizeof(struct load_cache_entries_thread_data)); - /* loop through index entries starting a thread for every thread_nr entries */ + /* + * Loop through index entries starting a thread for every ce_per_thread + * entries. + */ consumed = thread = 0; - for (i = 0; ; i++) { + for (i = 0; i < istate->cache_nr; i++) { struct ondisk_cache_entry *ondisk; const char *name; unsigned int flags; - /* we've reached the begining of a block of cache entries, kick off a thread to process them */ - if (0 == i % thread_nr) { + /* + * we've reached the beginning of a block of cache entries, + * kick off a thread to process them + */ + if (0 == i % ce_per_thread) { struct load_cache_entries_thread_data *p = &data[thread]; p->istate = istate; p->offset = i; - p->nr = min(thread_nr, istate->cache_nr - i); + p->nr = ce_per_thread < istate->cache_nr - i ? ce_per_thread : istate->cache_nr - i; /* create a mem_pool for each thread */ if (istate->version == 4) @@ -2034,8 +2080,8 @@ static unsigned long load_cache_entries(struct index_state *istate, void *mmap, if (pthread_create(&p->pthread, NULL, load_cache_entries_thread, p)) die("unable to create load_cache_entries_thread"); - if (++thread == threads || p->nr != thread_nr) - break; + + ++thread; } ondisk = (struct ondisk_cache_entry *)((char *)mmap + src_offset); @@ -2064,7 +2110,18 @@ static unsigned long load_cache_entries(struct index_state *istate, void *mmap, src_offset += (name - ((char *)ondisk)) + expand_name_field(previous_name, name); } - for (i = 0; i < threads; i++) { + /* create a thread to load the index extensions */ + struct load_cache_entries_thread_data *p = &data[thread]; + p->istate = istate; + mem_pool_init(&p->ce_mem_pool, 0); + p->mmap = mmap; + p->mmap_size = mmap_size; + p->start_offset = src_offset; + + if (pthread_create(&p->pthread, NULL, load_index_extensions_thread, p)) + die("unable to create load_index_extensions_thread"); + + for (i = 0; i < nr_threads + 1; i++) { struct load_cache_entries_thread_data *p = data + i; if (pthread_join(p->pthread, NULL)) die("unable to join load_cache_entries_thread"); ### Patches Ben Peart (3): read-cache: speed up index load through parallelization read-cache: load cache extensions on worker thread read-cache: micro-optimize expand_name_field() to speed up V4 index parsing. Documentation/config.txt | 6 + config.c | 14 ++ config.h | 1 + read-cache.c | 281 +++++++++++++++++++++++++++++++++++---- 4 files changed, 275 insertions(+), 27 deletions(-) base-commit: 29d9e3e2c47dd4b5053b0a98c891878d398463e3 -- 2.18.0.windows.1