[PATCH v2 2/3] read-cache: load cache extensions on worker thread

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch helps address the CPU cost of loading the index by loading
the cache extensions on a worker thread in parallel with loading the cache
entries.

This is possible because the current extensions don't access the cache
entries in the index_state structure so are OK that they don't all exist
yet.

The CACHE_EXT_TREE, CACHE_EXT_RESOLVE_UNDO, and CACHE_EXT_UNTRACKED
extensions don't even get a pointer to the index so don't have access to the
cache entries.

CACHE_EXT_LINK only uses the index_state to initialize the split index.
CACHE_EXT_FSMONITOR only uses the index_state to save the fsmonitor last
update and dirty flags.

I used p0002-read-cache.sh to generate some performance data on the
cumulative impact:

100,000 entries

Test                                HEAD~3           HEAD~2
---------------------------------------------------------------------------
read_cache/discard_cache 1000 times 14.08(0.01+0.10) 9.72(0.03+0.06) -31.0%

1,000,000 entries

Test                                HEAD~3            HEAD~2
------------------------------------------------------------------------------
read_cache/discard_cache 1000 times 202.95(0.01+0.07) 154.14(0.03+0.06) -24.1%

Signed-off-by: Ben Peart <Ben.Peart@xxxxxxxxxxxxx>
---
 read-cache.c | 60 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 12 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index c30346388a..f768004617 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1959,16 +1959,13 @@ struct load_cache_entries_thread_data
 	struct mem_pool *ce_mem_pool;
 	int offset, nr;
 	void *mmap;
+	size_t mmap_size;
 	unsigned long start_offset;
 	struct strbuf previous_name_buf;
 	struct strbuf *previous_name;
 	unsigned long consumed;	/* return # of bytes in index file processed */
 };
 
-/*
-* A thread proc to run the load_cache_entries() computation
-* across multiple background threads.
-*/
 static void *load_cache_entries_thread(void *_data)
 {
 	struct load_cache_entries_thread_data *p = _data;
@@ -1978,6 +1975,36 @@ static void *load_cache_entries_thread(void *_data)
 	return NULL;
 }
 
+static void *load_index_extensions_thread(void *_data)
+{
+	struct load_cache_entries_thread_data *p = _data;
+	unsigned long src_offset = p->start_offset;
+
+	while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
+		/* After an array of active_nr index entries,
+		 * there can be arbitrary number of extended
+		 * sections, each of which is prefixed with
+		 * extension name (4-byte) and section length
+		 * in 4-byte network byte order.
+		 */
+		uint32_t extsize;
+		memcpy(&extsize, (char *)p->mmap + src_offset + 4, 4);
+		extsize = ntohl(extsize);
+		if (read_index_extension(p->istate,
+								(const char *)p->mmap + src_offset,
+								(char *)p->mmap + src_offset + 8,
+								extsize) < 0) {
+			munmap(p->mmap, p->mmap_size);
+			die("index file corrupt");
+		}
+		src_offset += 8;
+		src_offset += extsize;
+	}
+	p->consumed += src_offset - p->start_offset;
+
+	return NULL;
+}
+
 static unsigned long load_cache_entries(struct index_state *istate,
 			void *mmap, size_t mmap_size, unsigned long src_offset)
 {
@@ -2012,16 +2039,16 @@ static unsigned long load_cache_entries(struct index_state *istate,
 	else
 		previous_name = NULL;
 
+	/* allocate an extra thread for loading the index extensions */
 	ce_per_thread = DIV_ROUND_UP(istate->cache_nr, nr_threads);
-	data = xcalloc(nr_threads, sizeof(struct load_cache_entries_thread_data));
+	data = xcalloc(nr_threads + 1, sizeof(struct load_cache_entries_thread_data));
 
 	/*
 	 * Loop through index entries starting a thread for every ce_per_thread
-	 * entries. Exit the loop when we've created the final thread (no need
-	 * to parse the remaining entries.
+	 * entries.
 	 */
 	consumed = thread = 0;
-	for (i = 0; ; i++) {
+	for (i = 0; i < istate->cache_nr; i++) {
 		struct ondisk_cache_entry *ondisk;
 		const char *name;
 		unsigned int flags;
@@ -2055,9 +2082,7 @@ static unsigned long load_cache_entries(struct index_state *istate,
 			if (pthread_create(&p->pthread, NULL, load_cache_entries_thread, p))
 				die("unable to create load_cache_entries_thread");
 
-			/* exit the loop when we've created the last thread */
-			if (++thread == nr_threads)
-				break;
+			++thread;
 		}
 
 		ondisk = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
@@ -2086,7 +2111,18 @@ static unsigned long load_cache_entries(struct index_state *istate,
 			src_offset += (name - ((char *)ondisk)) + expand_name_field(previous_name, name);
 	}
 
-	for (i = 0; i < nr_threads; i++) {
+	/* create a thread to load the index extensions */
+	struct load_cache_entries_thread_data *p = &data[thread];
+	p->istate = istate;
+	mem_pool_init(&p->ce_mem_pool, 0);
+	p->mmap = mmap;
+	p->mmap_size = mmap_size;
+	p->start_offset = src_offset;
+
+	if (pthread_create(&p->pthread, NULL, load_index_extensions_thread, p))
+		die("unable to create load_index_extensions_thread");
+
+	for (i = 0; i < nr_threads + 1; i++) {
 		struct load_cache_entries_thread_data *p = data + i;
 		if (pthread_join(p->pthread, NULL))
 			die("unable to join load_cache_entries_thread");
-- 
2.18.0.windows.1





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux