[PATCH v5 0/5] read-cache: speed up index load through parallelization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This version of the patch merges in Duy's work to speed up index v4 decoding.
I had to massage it a bit to get it to work with the multi-threading but it is
still largely his code. I also responded to Junio's feedback on initializing
copy_len to avoid compiler warnings.

I also added a minor cleanup patch to minimize the casting required when
working with the memory mapped index and other minor changes based on the
feedback received.

Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/dcf62005f8
Checkout: git fetch https://github.com/benpeart/git read-index-multithread-v5 && git checkout dcf62005f8


### Interdiff (v3..v5):

diff --git a/read-cache.c b/read-cache.c
index 8537a55750..c05e887fc9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1655,7 +1655,7 @@ int verify_index_checksum;
 /* Allow fsck to force verification of the cache entry order. */
 int verify_ce_order;
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(const struct cache_header *hdr, unsigned long size)
 {
 	git_hash_ctx c;
 	unsigned char hash[GIT_MAX_RAWSZ];
@@ -1679,7 +1679,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
 }
 
 static int read_index_extension(struct index_state *istate,
-				const char *ext, void *data, unsigned long sz)
+				const char *ext, const char *data, unsigned long sz)
 {
 	switch (CACHE_EXT(ext)) {
 	case CACHE_EXT_TREE:
@@ -1721,33 +1721,6 @@ int read_index(struct index_state *istate)
 	return read_index_from(istate, get_index_file(), get_git_dir());
 }
 
-static struct cache_entry *cache_entry_from_ondisk(struct mem_pool *mem_pool,
-						   struct ondisk_cache_entry *ondisk,
-						   unsigned int flags,
-						   const char *name,
-						   size_t len)
-{
-	struct cache_entry *ce = mem_pool__ce_alloc(mem_pool, len);
-
-	ce->ce_stat_data.sd_ctime.sec = get_be32(&ondisk->ctime.sec);
-	ce->ce_stat_data.sd_mtime.sec = get_be32(&ondisk->mtime.sec);
-	ce->ce_stat_data.sd_ctime.nsec = get_be32(&ondisk->ctime.nsec);
-	ce->ce_stat_data.sd_mtime.nsec = get_be32(&ondisk->mtime.nsec);
-	ce->ce_stat_data.sd_dev   = get_be32(&ondisk->dev);
-	ce->ce_stat_data.sd_ino   = get_be32(&ondisk->ino);
-	ce->ce_mode  = get_be32(&ondisk->mode);
-	ce->ce_stat_data.sd_uid   = get_be32(&ondisk->uid);
-	ce->ce_stat_data.sd_gid   = get_be32(&ondisk->gid);
-	ce->ce_stat_data.sd_size  = get_be32(&ondisk->size);
-	ce->ce_flags = flags & ~CE_NAMEMASK;
-	ce->ce_namelen = len;
-	ce->index = 0;
-	hashcpy(ce->oid.hash, ondisk->sha1);
-	memcpy(ce->name, name, len);
-	ce->name[len] = '\0';
-	return ce;
-}
-
 /*
  * Adjacent cache entries tend to share the leading paths, so it makes
  * sense to only store the differences in later entries.  In the v4
@@ -1768,15 +1741,18 @@ static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
 	return (const char *)ep + 1 - cp_;
 }
 
-static struct cache_entry *create_from_disk(struct mem_pool *mem_pool,
+static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
+					    unsigned int version,
 					    struct ondisk_cache_entry *ondisk,
 					    unsigned long *ent_size,
-					    struct strbuf *previous_name)
+					    const struct cache_entry *previous_ce)
 {
 	struct cache_entry *ce;
 	size_t len;
 	const char *name;
 	unsigned int flags;
+	size_t copy_len = 0;
+	int expand_name_field = version == 4;
 
 	/* On-disk flags are just 16 bits */
 	flags = get_be16(&ondisk->flags);
@@ -1796,21 +1772,50 @@ static struct cache_entry *create_from_disk(struct mem_pool *mem_pool,
 	else
 		name = ondisk->name;
 
-	if (!previous_name) {
-		/* v3 and earlier */
+	if (expand_name_field) {
+		const unsigned char *cp = (const unsigned char *)name;
+		size_t strip_len, previous_len;
+
+		previous_len = previous_ce ? previous_ce->ce_namelen : 0;
+		strip_len = decode_varint(&cp);
+		if (previous_len < strip_len) {
+			if (previous_ce)
+				die(_("malformed name field in the index, near path '%s'"),
+				    previous_ce->name);
+			else
+				die(_("malformed name field in the index in the first path"));
+		}
+		copy_len = previous_len - strip_len;
+		name = (const char *)cp;
+	}
+
 	if (len == CE_NAMEMASK)
-			len = strlen(name);
-		ce = cache_entry_from_ondisk(mem_pool, ondisk, flags, name, len);
+		len = strlen(name) + copy_len;
 
-		*ent_size = ondisk_ce_size(ce);
-	} else {
-		unsigned long consumed;
-		consumed = expand_name_field(previous_name, name);
-		ce = cache_entry_from_ondisk(mem_pool, ondisk, flags,
-					     previous_name->buf,
-					     previous_name->len);
+	ce = mem_pool__ce_alloc(ce_mem_pool, len);
+
+	ce->ce_stat_data.sd_ctime.sec = get_be32(&ondisk->ctime.sec);
+	ce->ce_stat_data.sd_mtime.sec = get_be32(&ondisk->mtime.sec);
+	ce->ce_stat_data.sd_ctime.nsec = get_be32(&ondisk->ctime.nsec);
+	ce->ce_stat_data.sd_mtime.nsec = get_be32(&ondisk->mtime.nsec);
+	ce->ce_stat_data.sd_dev   = get_be32(&ondisk->dev);
+	ce->ce_stat_data.sd_ino   = get_be32(&ondisk->ino);
+	ce->ce_mode  = get_be32(&ondisk->mode);
+	ce->ce_stat_data.sd_uid   = get_be32(&ondisk->uid);
+	ce->ce_stat_data.sd_gid   = get_be32(&ondisk->gid);
+	ce->ce_stat_data.sd_size  = get_be32(&ondisk->size);
+	ce->ce_flags = flags & ~CE_NAMEMASK;
+	ce->ce_namelen = len;
+	ce->index = 0;
+	hashcpy(ce->oid.hash, ondisk->sha1);
 
-		*ent_size = (name - ((char *)ondisk)) + consumed;
+	if (expand_name_field) {
+		memcpy(ce->name, previous_ce->name, copy_len);
+		memcpy(ce->name + copy_len, name, len + 1 - copy_len);
+		*ent_size = (name - ((char *)ondisk)) + len + 1 - copy_len;
+	} else {
+		memcpy(ce->name, name, len + 1);
+		*ent_size = ondisk_ce_size(ce);
 	}
 	return ce;
 }
@@ -1897,7 +1902,7 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
 }
 
 #ifndef NO_PTHREADS
-static unsigned long read_eoie_extension(void *mmap, size_t mmap_size);
+static unsigned long read_eoie_extension(const char *mmap, size_t mmap_size);
 #endif
 static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, unsigned long offset);
 
@@ -1907,14 +1912,14 @@ struct load_index_extensions
 	pthread_t pthread;
 #endif
 	struct index_state *istate;
-	void *mmap;
+	const char *mmap;
 	size_t mmap_size;
 	unsigned long src_offset;
 };
 
-static void *load_index_extensions(void *_data)
+static void *load_index_extensions(void *data)
 {
-	struct load_index_extensions *p = _data;
+	struct load_index_extensions *p = data;
 	unsigned long src_offset = p->src_offset;
 
 	while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
@@ -1925,13 +1930,12 @@ static void *load_index_extensions(void *_data)
 		 * in 4-byte network byte order.
 		 */
 		uint32_t extsize;
-		memcpy(&extsize, (char *)p->mmap + src_offset + 4, 4);
-		extsize = ntohl(extsize);
+		extsize = get_be32(p->mmap + src_offset + 4);
 		if (read_index_extension(p->istate,
-			(const char *)p->mmap + src_offset,
-			(char *)p->mmap + src_offset + 8,
+			p->mmap + src_offset,
+			p->mmap + src_offset + 8,
 			extsize) < 0) {
-			munmap(p->mmap, p->mmap_size);
+			munmap((void *)p->mmap, p->mmap_size);
 			die("index file corrupt");
 		}
 		src_offset += 8;
@@ -1946,8 +1950,8 @@ static void *load_index_extensions(void *_data)
  * from the memory mapped file and add them to the given index.
  */
 static unsigned long load_cache_entry_block(struct index_state *istate,
-			struct mem_pool *ce_mem_pool, int offset, int nr, void *mmap,
-			unsigned long start_offset, struct strbuf *previous_name)
+			struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap,
+			unsigned long start_offset, const struct cache_entry *previous_ce)
 {
 	int i;
 	unsigned long src_offset = start_offset;
@@ -1957,34 +1961,31 @@ static unsigned long load_cache_entry_block(struct index_state *istate,
 		struct cache_entry *ce;
 		unsigned long consumed;
 
-		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
-		ce = create_from_disk(ce_mem_pool, disk_ce, &consumed, previous_name);
+		disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset);
+		ce = create_from_disk(ce_mem_pool, istate->version, disk_ce, &consumed, previous_ce);
 		set_index_entry(istate, i, ce);
 
 		src_offset += consumed;
+		previous_ce = ce;
 	}
 	return src_offset - start_offset;
 }
 
 static unsigned long load_all_cache_entries(struct index_state *istate,
-			void *mmap, size_t mmap_size, unsigned long src_offset)
+			const char *mmap, size_t mmap_size, unsigned long src_offset)
 {
-	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 	unsigned long consumed;
 
 	if (istate->version == 4) {
-		previous_name = &previous_name_buf;
 		mem_pool_init(&istate->ce_mem_pool,
 				estimate_cache_size_from_compressed(istate->cache_nr));
 	} else {
-		previous_name = NULL;
 		mem_pool_init(&istate->ce_mem_pool,
 				estimate_cache_size(mmap_size, istate->cache_nr));
 	}
 
 	consumed = load_cache_entry_block(istate, istate->ce_mem_pool,
-					0, istate->cache_nr, mmap, src_offset, previous_name);
-	strbuf_release(&previous_name_buf);
+					0, istate->cache_nr, mmap, src_offset, NULL);
 	return consumed;
 }
 
@@ -1993,7 +1994,7 @@ static unsigned long load_all_cache_entries(struct index_state *istate,
 /*
  * Mostly randomly chosen maximum thread counts: we
  * cap the parallelism to online_cpus() threads, and we want
- * to have at least 100000 cache entries per thread for it to
+ * to have at least 10000 cache entries per thread for it to
  * be worth starting a thread.
  */
 #define THREAD_COST		(10000)
@@ -2004,10 +2005,9 @@ struct load_cache_entries_thread_data
 	struct index_state *istate;
 	struct mem_pool *ce_mem_pool;
 	int offset, nr;
-	void *mmap;
+	const char *mmap;
 	unsigned long start_offset;
-	struct strbuf previous_name_buf;
-	struct strbuf *previous_name;
+	struct cache_entry *previous_ce;
 	unsigned long consumed;	/* return # of bytes in index file processed */
 };
 
@@ -2020,12 +2020,12 @@ static void *load_cache_entries_thread(void *_data)
 	struct load_cache_entries_thread_data *p = _data;
 
 	p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool,
-		p->offset, p->nr, p->mmap, p->start_offset, p->previous_name);
+		p->offset, p->nr, p->mmap, p->start_offset, p->previous_ce);
 	return NULL;
 }
 
 static unsigned long load_cache_entries_threaded(int nr_threads, struct index_state *istate,
-			void *mmap, size_t mmap_size, unsigned long src_offset)
+			const char *mmap, size_t mmap_size, unsigned long src_offset)
 {
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 	struct load_cache_entries_thread_data *data;
@@ -2067,20 +2067,23 @@ static unsigned long load_cache_entries_threaded(int nr_threads, struct index_st
 			p->istate = istate;
 			p->offset = i;
 			p->nr = ce_per_thread < istate->cache_nr - i ? ce_per_thread : istate->cache_nr - i;
+			p->mmap = mmap;
+			p->start_offset = src_offset;
 
 			/* create a mem_pool for each thread */
-			if (istate->version == 4)
+			if (istate->version == 4) {
 				mem_pool_init(&p->ce_mem_pool,
 					estimate_cache_size_from_compressed(p->nr));
-			else
+
+				/* create a previous ce entry for this block of cache entries */
+				if (previous_name->len) {
+					p->previous_ce = mem_pool__ce_alloc(p->ce_mem_pool, previous_name->len);
+					p->previous_ce->ce_namelen = previous_name->len;
+					memcpy(p->previous_ce->name, previous_name->buf, previous_name->len);
+				}
+			} else {
 				mem_pool_init(&p->ce_mem_pool,
 					estimate_cache_size(mmap_size, p->nr));
-
-			p->mmap = mmap;
-			p->start_offset = src_offset;
-			if (previous_name) {
-				strbuf_addbuf(&p->previous_name_buf, previous_name);
-				p->previous_name = &p->previous_name_buf;
 			}
 
 			if (pthread_create(&p->pthread, NULL, load_cache_entries_thread, p))
@@ -2091,7 +2094,7 @@ static unsigned long load_cache_entries_threaded(int nr_threads, struct index_st
 				break;
 		}
 
-		ondisk = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
+		ondisk = (struct ondisk_cache_entry *)(mmap + src_offset);
 
 		/* On-disk flags are just 16 bits */
 		flags = get_be16(&ondisk->flags);
@@ -2103,7 +2106,7 @@ static unsigned long load_cache_entries_threaded(int nr_threads, struct index_st
 		} else
 			name = ondisk->name;
 
-		if (!previous_name) {
+		if (istate->version != 4) {
 			size_t len;
 
 			/* v3 and earlier */
@@ -2122,7 +2125,6 @@ static unsigned long load_cache_entries_threaded(int nr_threads, struct index_st
 		if (pthread_join(p->pthread, NULL))
 			die("unable to join load_cache_entries_thread");
 		mem_pool_combine(istate->ce_mem_pool, p->ce_mem_pool);
-		strbuf_release(&p->previous_name_buf);
 		consumed += p->consumed;
 	}
 
@@ -2140,8 +2142,8 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	int fd;
 	struct stat st;
 	unsigned long src_offset;
-	struct cache_header *hdr;
-	void *mmap;
+	const struct cache_header *hdr;
+	const char *mmap;
 	size_t mmap_size;
 	struct load_index_extensions p = { 0 };
 	unsigned long extension_offset = 0;
@@ -2173,7 +2175,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
 		die_errno("unable to map index file");
 	close(fd);
 
-	hdr = mmap;
+	hdr = (const struct cache_header *)mmap;
 	if (verify_hdr(hdr, mmap_size) < 0)
 		goto unmap;
 
@@ -2233,11 +2235,11 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
 		p.src_offset = src_offset;
 		load_index_extensions(&p);
 	}
-	munmap(mmap, mmap_size);
+	munmap((void *)mmap, mmap_size);
 	return istate->cache_nr;
 
 unmap:
-	munmap(mmap, mmap_size);
+	munmap((void *)mmap, mmap_size);
 	die("index file corrupt");
 }
 
@@ -3256,11 +3258,11 @@ int should_validate_cache_entries(void)
 	return validate_index_cache_entries;
 }
 
-#define EOIE_SIZE 24 /* <4-byte offset> + <20-byte hash> */
+#define EOIE_SIZE (4 + GIT_SHA1_RAWSZ) /* <4-byte offset> + <20-byte hash> */
 #define EOIE_SIZE_WITH_HEADER (4 + 4 + EOIE_SIZE) /* <4-byte signature> + <4-byte length> + EOIE_SIZE */
 
 #ifndef NO_PTHREADS
-static unsigned long read_eoie_extension(void *mmap, size_t mmap_size)
+static unsigned long read_eoie_extension(const char *mmap, size_t mmap_size)
 {
 	/*
 	 * The end of index entries (EOIE) extension is guaranteed to be last
@@ -3271,14 +3273,18 @@ static unsigned long read_eoie_extension(void *mmap, size_t mmap_size)
 	 * <4-byte offset>
 	 * <20-byte hash>
 	 */
-	const char *index, *eoie = (const char *)mmap + mmap_size - GIT_SHA1_RAWSZ - EOIE_SIZE_WITH_HEADER;
+	const char *index, *eoie;
 	uint32_t extsize;
 	unsigned long offset, src_offset;
 	unsigned char hash[GIT_MAX_RAWSZ];
 	git_hash_ctx c;
 
+	/* ensure we have an index big enough to contain an EOIE extension */
+	if (mmap_size < sizeof(struct cache_header) + EOIE_SIZE_WITH_HEADER + the_hash_algo->rawsz)
+		return 0;
+
 	/* validate the extension signature */
-	index = eoie;
+	index = eoie = mmap + mmap_size - EOIE_SIZE_WITH_HEADER - the_hash_algo->rawsz;
 	if (CACHE_EXT(index) != CACHE_EXT_ENDOFINDEXENTRIES)
 		return 0;
 	index += sizeof(uint32_t);
@@ -3294,9 +3300,9 @@ static unsigned long read_eoie_extension(void *mmap, size_t mmap_size)
 	 * signature is after the index header and before the eoie extension.
 	 */
 	offset = get_be32(index);
-	if ((const char *)mmap + offset < (const char *)mmap + sizeof(struct cache_header))
+	if (mmap + offset < mmap + sizeof(struct cache_header))
 		return 0;
-	if ((const char *)mmap + offset >= eoie)
+	if (mmap + offset >= eoie)
 		return 0;
 	index += sizeof(uint32_t);
 
@@ -3319,20 +3325,19 @@ static unsigned long read_eoie_extension(void *mmap, size_t mmap_size)
 		 * in 4-byte network byte order.
 		 */
 		uint32_t extsize;
-		memcpy(&extsize, (char *)mmap + src_offset + 4, 4);
-		extsize = ntohl(extsize);
+		extsize = get_be32(mmap + src_offset + 4);
 
 		/* verify the extension size isn't so large it will wrap around */
 		if (src_offset + 8 + extsize < src_offset)
 			return 0;
 
-		the_hash_algo->update_fn(&c, (const char *)mmap + src_offset, 8);
+		the_hash_algo->update_fn(&c, mmap + src_offset, 8);
 
 		src_offset += 8;
 		src_offset += extsize;
 	}
 	the_hash_algo->final_fn(hash, &c);
-	if (hashcmp(hash, (unsigned char *)index))
+	if (hashcmp(hash, (const unsigned char *)index))
 		return 0;
 
 	/* Validate that the extension offsets returned us back to the eoie extension. */
diff --git a/t/README b/t/README
index 59015f7150..69c695ad8e 100644
--- a/t/README
+++ b/t/README
@@ -326,9 +326,6 @@ valid due to the addition of the EOIE extension.
 
 GIT_TEST_INDEX_THREADS=<boolean> forces multi-threaded loading of
 the index cache entries and extensions for the whole test suite.
-Currently tests 1, 4-9 in t1700-split-index.sh fail as they hard
-code SHA values for the index which are no longer valid due to the
-addition of the EOIE extension.
 
 Naming Tests
 ------------


### Patches

Ben Peart (4):
  eoie: add End of Index Entry (EOIE) extension
  read-cache: load cache extensions on a worker thread
  read-cache: load cache entries on worker threads
  read-cache: clean up casting and byte decoding

Nguyễn Thái Ngọc Duy (1):
  read-cache.c: optimize reading index format v4

 Documentation/config.txt                 |   6 +
 Documentation/technical/index-format.txt |  23 +
 config.c                                 |  18 +
 config.h                                 |   1 +
 read-cache.c                             | 579 +++++++++++++++++++----
 t/README                                 |   8 +
 t/t1700-split-index.sh                   |   1 +
 7 files changed, 538 insertions(+), 98 deletions(-)


base-commit: 29d9e3e2c47dd4b5053b0a98c891878d398463e3
-- 
2.18.0.windows.1






[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux