[PATCH] preload-index: optimize for sequential IO

Karsten Blees <karsten.blees@xxxxxxxxx> · Wed, 25 Jun 2014 00:53:31 +0200

Enabling core.preloadIndex on a real HD reduces cold cache performance by
~5%. This is because the threads read from up to 20 different locations on
disk.

Additionally, some threads finish early (each thread is assigned a fixed
number of cache entries to process in advance), i.e. preloading is not as
parallel as we would like. With hot cache, threads finish so quickly that
most run in sequence rather than in parallel.

Change background threads so that they run until all work is done. Use a
central mutex-protected counter to iterate over available cache entries. As
cache entries are sorted by path, this implicitly increases IO locality.

This improves cold cache performance of preload_index() by ~20% and
hot cache performance by ~15%. Total improvement of e.g. 'git status -uno'
on WebKit is ~15% (cold cache) and ~5% (hot cache).

Signed-off-by: Karsten Blees <blees@xxxxxxx>
---
 preload-index.c | 76 ++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 31 deletions(-)

diff --git a/preload-index.c b/preload-index.c
index 968ee25..6ac368d 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -28,50 +28,65 @@ struct thread_data {
 	pthread_t pthread;
 	struct index_state *index;
 	struct pathspec pathspec;
-	int offset, nr;
+	pthread_mutex_t *pmutex;
+	int *pnr;
 };
 
 static void *preload_thread(void *_data)
 {
-	int nr;
+	int nr, max_nr;
 	struct thread_data *p = _data;
 	struct index_state *index = p->index;
-	struct cache_entry **cep = index->cache + p->offset;
 	struct cache_def cache;
 
 	memset(&cache, 0, sizeof(cache));
-	nr = p->nr;
-	if (nr + p->offset > index->cache_nr)
-		nr = index->cache_nr - p->offset;
+	for (;;) {
+		/* get next batch of entries to check */
+		pthread_mutex_lock(p->pmutex);
+		nr = *p->pnr;
+		*p->pnr += THREAD_COST;
+		pthread_mutex_unlock(p->pmutex);
 
-	do {
-		struct cache_entry *ce = *cep++;
-		struct stat st;
+		max_nr = nr + THREAD_COST;
+		if (max_nr > index->cache_nr)
+			max_nr = index->cache_nr;
 
-		if (ce_stage(ce))
-			continue;
-		if (S_ISGITLINK(ce->ce_mode))
-			continue;
-		if (ce_uptodate(ce))
-			continue;
-		if (!ce_path_match(ce, &p->pathspec, NULL))
-			continue;
-		if (threaded_has_symlink_leading_path(&cache, ce->name, ce_namelen(ce)))
-			continue;
-		if (lstat(ce->name, &st))
-			continue;
-		if (ie_match_stat(index, ce, &st, CE_MATCH_RACY_IS_DIRTY))
-			continue;
-		ce_mark_uptodate(ce);
-	} while (--nr > 0);
+		/* break loop if no more work to do */
+		if (nr >= max_nr)
+			break;
+
+		for (; nr < max_nr; nr++) {
+			struct cache_entry *ce = index->cache[nr];
+			struct stat st;
+
+			if (ce_stage(ce))
+				continue;
+			if (S_ISGITLINK(ce->ce_mode))
+				continue;
+			if (ce_uptodate(ce))
+				continue;
+			if (!ce_path_match(ce, &p->pathspec, NULL))
+				continue;
+			if (threaded_has_symlink_leading_path(&cache, ce->name,
+							      ce_namelen(ce)))
+				continue;
+			if (lstat(ce->name, &st))
+				continue;
+			if (ie_match_stat(index, ce, &st,
+					  CE_MATCH_RACY_IS_DIRTY))
+				continue;
+			ce_mark_uptodate(ce);
+		}
+	}
 	return NULL;
 }
 
 static void preload_index(struct index_state *index,
 			  const struct pathspec *pathspec)
 {
-	int threads, i, work, offset;
+	int threads, i, nr = 0;
 	struct thread_data data[MAX_PARALLEL];
+	pthread_mutex_t mutex;
 
 	if (!core_preload_index)
 		return;
@@ -81,17 +96,15 @@ static void preload_index(struct index_state *index,
 		return;
 	if (threads > MAX_PARALLEL)
 		threads = MAX_PARALLEL;
-	offset = 0;
-	work = DIV_ROUND_UP(index->cache_nr, threads);
 	memset(&data, 0, sizeof(data));
+	pthread_mutex_init(&mutex, NULL);
 	for (i = 0; i < threads; i++) {
 		struct thread_data *p = data+i;
 		p->index = index;
 		if (pathspec)
 			copy_pathspec(&p->pathspec, pathspec);
-		p->offset = offset;
-		p->nr = work;
-		offset += work;
+		p->pnr = &nr;
+		p->pmutex = &mutex;
 		if (pthread_create(&p->pthread, NULL, preload_thread, p))
 			die("unable to create threaded lstat");
 	}
@@ -100,6 +113,7 @@ static void preload_index(struct index_state *index,
 		if (pthread_join(p->pthread, NULL))
 			die("unable to join threaded lstat");
 	}
+	pthread_mutex_destroy(&mutex);
 }
 #endif
 
-- 
1.9.4.msysgit.0.1.gc8a51b4

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html