This patch helps address the CPU cost of loading the index by loading the cache extensions on a worker thread in parallel with loading the cache entries. This is possible because the current extensions don't access the cache entries in the index_state structure so are OK that they don't all exist yet. The CACHE_EXT_TREE, CACHE_EXT_RESOLVE_UNDO, and CACHE_EXT_UNTRACKED extensions don't even get a pointer to the index so don't have access to the cache entries. CACHE_EXT_LINK only uses the index_state to initialize the split index. CACHE_EXT_FSMONITOR only uses the index_state to save the fsmonitor last update and dirty flags. I used p0002-read-cache.sh to generate some performance data on the cumulative impact: 100,000 entries Test HEAD~3 HEAD~2 --------------------------------------------------------------------------- read_cache/discard_cache 1000 times 14.08(0.01+0.10) 9.72(0.03+0.06) -31.0% 1,000,000 entries Test HEAD~3 HEAD~2 ------------------------------------------------------------------------------ read_cache/discard_cache 1000 times 202.95(0.01+0.07) 154.14(0.03+0.06) -24.1% Signed-off-by: Ben Peart <Ben.Peart@xxxxxxxxxxxxx> --- read-cache.c | 60 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/read-cache.c b/read-cache.c index c30346388a..f768004617 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1959,16 +1959,13 @@ struct load_cache_entries_thread_data struct mem_pool *ce_mem_pool; int offset, nr; void *mmap; + size_t mmap_size; unsigned long start_offset; struct strbuf previous_name_buf; struct strbuf *previous_name; unsigned long consumed; /* return # of bytes in index file processed */ }; -/* -* A thread proc to run the load_cache_entries() computation -* across multiple background threads. -*/ static void *load_cache_entries_thread(void *_data) { struct load_cache_entries_thread_data *p = _data; @@ -1978,6 +1975,36 @@ static void *load_cache_entries_thread(void *_data) return NULL; } +static void *load_index_extensions_thread(void *_data) +{ + struct load_cache_entries_thread_data *p = _data; + unsigned long src_offset = p->start_offset; + + while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) { + /* After an array of active_nr index entries, + * there can be arbitrary number of extended + * sections, each of which is prefixed with + * extension name (4-byte) and section length + * in 4-byte network byte order. + */ + uint32_t extsize; + memcpy(&extsize, (char *)p->mmap + src_offset + 4, 4); + extsize = ntohl(extsize); + if (read_index_extension(p->istate, + (const char *)p->mmap + src_offset, + (char *)p->mmap + src_offset + 8, + extsize) < 0) { + munmap(p->mmap, p->mmap_size); + die("index file corrupt"); + } + src_offset += 8; + src_offset += extsize; + } + p->consumed += src_offset - p->start_offset; + + return NULL; +} + static unsigned long load_cache_entries(struct index_state *istate, void *mmap, size_t mmap_size, unsigned long src_offset) { @@ -2012,16 +2039,16 @@ static unsigned long load_cache_entries(struct index_state *istate, else previous_name = NULL; + /* allocate an extra thread for loading the index extensions */ ce_per_thread = DIV_ROUND_UP(istate->cache_nr, nr_threads); - data = xcalloc(nr_threads, sizeof(struct load_cache_entries_thread_data)); + data = xcalloc(nr_threads + 1, sizeof(struct load_cache_entries_thread_data)); /* * Loop through index entries starting a thread for every ce_per_thread - * entries. Exit the loop when we've created the final thread (no need - * to parse the remaining entries. + * entries. */ consumed = thread = 0; - for (i = 0; ; i++) { + for (i = 0; i < istate->cache_nr; i++) { struct ondisk_cache_entry *ondisk; const char *name; unsigned int flags; @@ -2055,9 +2082,7 @@ static unsigned long load_cache_entries(struct index_state *istate, if (pthread_create(&p->pthread, NULL, load_cache_entries_thread, p)) die("unable to create load_cache_entries_thread"); - /* exit the loop when we've created the last thread */ - if (++thread == nr_threads) - break; + ++thread; } ondisk = (struct ondisk_cache_entry *)((char *)mmap + src_offset); @@ -2086,7 +2111,18 @@ static unsigned long load_cache_entries(struct index_state *istate, src_offset += (name - ((char *)ondisk)) + expand_name_field(previous_name, name); } - for (i = 0; i < nr_threads; i++) { + /* create a thread to load the index extensions */ + struct load_cache_entries_thread_data *p = &data[thread]; + p->istate = istate; + mem_pool_init(&p->ce_mem_pool, 0); + p->mmap = mmap; + p->mmap_size = mmap_size; + p->start_offset = src_offset; + + if (pthread_create(&p->pthread, NULL, load_index_extensions_thread, p)) + die("unable to create load_index_extensions_thread"); + + for (i = 0; i < nr_threads + 1; i++) { struct load_cache_entries_thread_data *p = data + i; if (pthread_join(p->pthread, NULL)) die("unable to join load_cache_entries_thread"); -- 2.18.0.windows.1