[PATCH 9/8] even faster loading time with index version 254

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This dirty (and likely buggy) patch shows a direction of lowering load
time even more. Basically the shared memory now contains a clean
memory dump that a git process could use with little preparation
(which also means it's tied to C Git, other implementations can't use
this)

Memory is actually shared, git won't malloc and copy over, so even if
the v254 is 235 MB (larger than v2 199MB), we use less memory.

With this patch, we can get as low as 256.442ms (compared to 663ms in
0/8) at 800 MHz, or 91ms at 2.5 GHz. Index load time should be a
solved problem.

But I'm not going to polish this patch and try to get it merged. I'd
rather see a real world repository of this size first to justify
messing up read-cache.c even more.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 cache.h              |   2 +
 read-cache--daemon.c |  31 +++++------
 read-cache.c         | 154 ++++++++++++++++++++++++++++++++++++++++++---------
 split-index.c        |   3 +
 4 files changed, 149 insertions(+), 41 deletions(-)

diff --git a/cache.h b/cache.h
index c246dee..7f0ef1e 100644
--- a/cache.h
+++ b/cache.h
@@ -297,6 +297,8 @@ struct index_state {
 	unsigned char sha1[20];
 	void *mmap;
 	size_t mmap_size;
+	int mmap_fd;
+	void *(*allocate_254)(struct index_state *, size_t);
 };
 
 extern struct index_state the_index;
diff --git a/read-cache--daemon.c b/read-cache--daemon.c
index bd6d84f..a44bd09 100644
--- a/read-cache--daemon.c
+++ b/read-cache--daemon.c
@@ -34,10 +34,19 @@ static void cleanup_socket_on_signal(int sig)
 	raise(sig);
 }
 
+static void *allocate_254(struct index_state *istate, unsigned long size)
+{
+	ftruncate(istate->mmap_fd, size);
+	istate->mmap_size = size;
+	istate->mmap = xmmap(NULL, istate->mmap_size, PROT_READ | PROT_WRITE,
+			     MAP_SHARED, istate->mmap_fd, 0);
+	return istate->mmap != MAP_FAILED ? istate->mmap : NULL;
+}
+
+extern int do_write_index(struct index_state *istate, int newfd, int strip_extensions);
 static int do_share_index(struct index_state *istate, struct strbuf *shm_path)
 {
 	struct strbuf sb = STRBUF_INIT;
-	void *map;
 	int fd;
 
 	strbuf_addf(&sb, "/git-index-%s", sha1_to_hex(istate->sha1));
@@ -48,21 +57,16 @@ static int do_share_index(struct index_state *istate, struct strbuf *shm_path)
 		shm_unlink(shm_path->buf);
 		strbuf_reset(shm_path);
 	}
-	if (ftruncate(fd, istate->mmap_size)) {
-		close(fd);
-		shm_unlink(shm_path->buf);
-		return -1;
-	}
+	istate->version = 254;
+	istate->allocate_254 = allocate_254;
+	istate->mmap_fd = fd;
+	do_write_index(istate, -1, 0);
 	strbuf_addbuf(shm_path, &sb);
-	map = xmmap(NULL, istate->mmap_size, PROT_READ | PROT_WRITE,
-		    MAP_SHARED, fd, 0);
-	if (map == MAP_FAILED) {
+	if (istate->mmap == MAP_FAILED) {
 		close(fd);
 		shm_unlink(shm_path->buf);
 		return -1;
 	}
-	memcpy(map, istate->mmap, istate->mmap_size);
-	munmap(map, istate->mmap_size);
 	fchmod(fd, 0400);
 	close(fd);
 	return 0;
@@ -88,13 +92,9 @@ static void share_index(struct index_state *istate, struct strbuf *shm_path)
 
 static void refresh()
 {
-	the_index.keep_mmap = 1;
 	if (read_cache() < 0)
 		die("could not read index");
 	share_index(&the_index, &shm_index);
-	if (the_index.split_index &&
-	    the_index.split_index->base)
-		share_index(the_index.split_index->base, &shm_sharedindex);
 	discard_index(&the_index);
 }
 
@@ -145,7 +145,6 @@ static void serve_cache(const char *socket_path, int detach)
 	if (fd < 0)
 		die_errno("unable to bind to '%s'", socket_path);
 
-	use_read_cache_daemon = -1;
 	refresh();
 	if (detach && daemonize(&daemonized))
 		die_errno("unable to detach");
diff --git a/read-cache.c b/read-cache.c
index d5c9247..4db1c30 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -61,7 +61,8 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache
 
 	replace_index_entry_in_base(istate, old, ce);
 	remove_name_hash(istate, old);
-	free(old);
+	if (old->index != 0xffffffff) /* special mark by v254 entry writing code */
+		free(old);
 	set_index_entry(istate, nr, ce);
 	ce->ce_flags |= CE_UPDATE_IN_BASE;
 	istate->cache_changed |= CE_ENTRY_CHANGED;
@@ -1333,9 +1334,11 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
 	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
 		return error("bad signature");
 	hdr_version = ntohl(hdr->hdr_version);
-	if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version)
+	if (!size && hdr_version == 254)
+		fprintf(stderr, "yeah\n");		/* go on */
+	else if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version)
 		return error("bad index version %d", hdr_version);
-	if (!size)
+	if (!size || hdr_version == 254)
 		return 0;
 	git_SHA1_Init(&c);
 	git_SHA1_Update(&c, hdr, size - 20);
@@ -1499,7 +1502,8 @@ static void *try_shm(void *mmap, size_t *mmap_size)
 		close(fd);
 		return mmap;
 	}
-	new_mmap = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	new_mmap = xmmap(NULL, st.st_size, PROT_READ | PROT_WRITE,
+			 MAP_PRIVATE, fd, 0);
 	close(fd);
 	if (new_mmap == MAP_FAILED)
 		return mmap;
@@ -1519,6 +1523,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	void *mmap, *old_mmap;
 	size_t mmap_size;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
+	int ver_254 = 0;
 
 	if (istate->initialized)
 		return istate->cache_nr;
@@ -1561,7 +1566,13 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache));
 	istate->initialized = 1;
 
-	if (istate->version == 4)
+	if (istate->version == 254) {
+		istate->version = 4;
+		ver_254 = 1;
+		istate->keep_mmap = 1;
+		istate->mmap = mmap;
+		istate->mmap_size = mmap_size;
+	} else if (istate->version == 4)
 		previous_name = &previous_name_buf;
 	else
 		previous_name = NULL;
@@ -1573,7 +1584,14 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist)
 		unsigned long consumed;
 
 		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
-		ce = create_from_disk(disk_ce, &consumed, previous_name);
+		if (ver_254) {
+			ce = mmap + src_offset;
+			consumed =
+				offsetof(struct cache_entry, name) +
+				ce_namelen(ce) + 1;
+			consumed = (consumed + 7) & ~7;
+		} else
+			ce = create_from_disk(disk_ce, &consumed, previous_name);
 		set_index_entry(istate, i, ce);
 
 		src_offset += consumed;
@@ -1655,6 +1673,8 @@ int discard_index(struct index_state *istate)
 	int i;
 
 	for (i = 0; i < istate->cache_nr; i++) {
+		if (istate->cache[i]->index == 0xffffffff)
+			continue;
 		if (istate->cache[i]->index &&
 		    istate->split_index &&
 		    istate->split_index->base &&
@@ -1696,13 +1716,51 @@ int unmerged_index(const struct index_state *istate)
 static unsigned char write_buffer[WRITE_BUFFER_SIZE];
 static unsigned long write_buffer_len;
 
+struct file_block {
+	struct file_block *next;
+	char buf[1];
+};
+static struct file_block *start, *end;
+static unsigned long file_block_size;
+#define FB_ALLOC_SIZE 65536
+#define FB_USABLE_SIZE (FB_ALLOC_SIZE - sizeof(struct file_block *))
+
+static void fill_file_block(const unsigned char *buffer, unsigned int len)
+{
+	if (!start) {
+		start = end = xmalloc(FB_ALLOC_SIZE);
+		start->next = NULL;
+	}
+
+	while (len) {
+		unsigned long used = file_block_size % FB_USABLE_SIZE;
+		unsigned long remaining = FB_USABLE_SIZE - used;
+		if (len < remaining) {
+			memcpy(end->buf + used, buffer, len);
+			file_block_size += len;
+			return;
+		}
+		memcpy(end->buf + used, buffer, remaining);
+		file_block_size += remaining;
+		buffer		+= remaining;
+		len		-= remaining;
+		end->next	 = xmalloc(FB_ALLOC_SIZE);
+		end		 = end->next;
+		end->next	 = NULL;
+	}
+}
+
 static int ce_write_flush(git_SHA_CTX *context, int fd)
 {
 	unsigned int buffered = write_buffer_len;
 	if (buffered) {
-		git_SHA1_Update(context, write_buffer, buffered);
-		if (write_in_full(fd, write_buffer, buffered) != buffered)
-			return -1;
+		if (context) {
+			git_SHA1_Update(context, write_buffer, buffered);
+			if (write_in_full(fd, write_buffer, buffered) != buffered)
+				return -1;
+		} else {
+			fill_file_block(write_buffer, buffered);
+		}
 		write_buffer_len = 0;
 	}
 	return 0;
@@ -1745,7 +1803,8 @@ static int ce_flush(git_SHA_CTX *context, int fd, unsigned char *sha1)
 
 	if (left) {
 		write_buffer_len = 0;
-		git_SHA1_Update(context, write_buffer, left);
+		if (context)
+			git_SHA1_Update(context, write_buffer, left);
 	}
 
 	/* Flush first if not enough space for SHA1 signature */
@@ -1756,10 +1815,18 @@ static int ce_flush(git_SHA_CTX *context, int fd, unsigned char *sha1)
 	}
 
 	/* Append the SHA1 signature at the end */
-	git_SHA1_Final(write_buffer + left, context);
+	if (context)
+		git_SHA1_Final(write_buffer + left, context);
+	else
+		hashclr(write_buffer + left);
 	hashcpy(sha1, write_buffer + left);
 	left += 20;
-	return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0;
+	if (context)
+		return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0;
+	else {
+		fill_file_block(write_buffer, left);
+		return 0;
+	}
 }
 
 static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
@@ -1921,10 +1988,9 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
 		rollback_lock_file(lockfile);
 }
 
-static int do_write_index(struct index_state *istate, int newfd,
-			  int strip_extensions)
+int do_write_index(struct index_state *istate, int newfd, int strip_extensions)
 {
-	git_SHA_CTX c;
+	git_SHA_CTX c, *c_p;
 	struct cache_header hdr;
 	int i, err, removed, extended, hdr_version;
 	struct cache_entry **cache = istate->cache;
@@ -1960,8 +2026,14 @@ static int do_write_index(struct index_state *istate, int newfd,
 	hdr.hdr_version = htonl(hdr_version);
 	hdr.hdr_entries = htonl(entries - removed);
 
-	git_SHA1_Init(&c);
-	if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
+	if (istate->version == 254)
+		c_p = NULL;
+	else {
+		c_p = &c;
+		git_SHA1_Init(c_p);
+	}
+
+	if (ce_write(c_p, newfd, &hdr, sizeof(hdr)) < 0)
 		return -1;
 
 	previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
@@ -1982,7 +2054,21 @@ static int do_write_index(struct index_state *istate, int newfd,
 			else
 				return error(msg, ce->name);
 		}
-		if (ce_write_entry(&c, newfd, ce, previous_name) < 0)
+		if (!c_p) {
+			static unsigned padding[8];
+			unsigned sz = offsetof(struct cache_entry, name) + ce_namelen(ce) + 1;
+			unsigned int ce_flags = ce->ce_flags;
+			struct hashmap_entry he = ce->ent;
+			ce->index = 0xffffffff;
+			memset(&ce->ent, 0, sizeof(ce->ent));
+			ce->ce_flags &= CE_VALID | CE_EXTENDED_FLAGS;
+			ce_write(NULL, 0, ce, sz);
+			ce->ce_flags = ce_flags;
+			memcpy(&ce->ent, &he, sizeof(he));
+			ce->index = 0;
+			if (sz % 8)
+				ce_write(NULL, 0, padding, 8 - (sz % 8));
+		} else if (ce_write_entry(c_p, newfd, ce, previous_name) < 0)
 			return -1;
 	}
 	strbuf_release(&previous_name_buf);
@@ -1992,9 +2078,9 @@ static int do_write_index(struct index_state *istate, int newfd,
 		struct strbuf sb = STRBUF_INIT;
 
 		err = write_link_extension(&sb, istate) < 0 ||
-			write_index_ext_header(&c, newfd, CACHE_EXT_LINK,
+			write_index_ext_header(c_p, newfd, CACHE_EXT_LINK,
 					       sb.len) < 0 ||
-			ce_write(&c, newfd, sb.buf, sb.len) < 0;
+			ce_write(c_p, newfd, sb.buf, sb.len) < 0;
 		strbuf_release(&sb);
 		if (err)
 			return -1;
@@ -2003,8 +2089,8 @@ static int do_write_index(struct index_state *istate, int newfd,
 		struct strbuf sb = STRBUF_INIT;
 
 		cache_tree_write(&sb, istate->cache_tree);
-		err = write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		err = write_index_ext_header(c_p, newfd, CACHE_EXT_TREE, sb.len) < 0
+			|| ce_write(c_p, newfd, sb.buf, sb.len) < 0;
 		strbuf_release(&sb);
 		if (err)
 			return -1;
@@ -2013,16 +2099,34 @@ static int do_write_index(struct index_state *istate, int newfd,
 		struct strbuf sb = STRBUF_INIT;
 
 		resolve_undo_write(&sb, istate->resolve_undo);
-		err = write_index_ext_header(&c, newfd, CACHE_EXT_RESOLVE_UNDO,
+		err = write_index_ext_header(c_p, newfd, CACHE_EXT_RESOLVE_UNDO,
 					     sb.len) < 0
-			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+			|| ce_write(c_p, newfd, sb.buf, sb.len) < 0;
 		strbuf_release(&sb);
 		if (err)
 			return -1;
 	}
 
-	if (ce_flush(&c, newfd, istate->sha1) || fstat(newfd, &st))
+	if (ce_flush(c_p, newfd, istate->sha1) || (c_p && fstat(newfd, &st)))
 		return -1;
+	if (!c_p) {
+		unsigned char *p = NULL;
+		if (istate->allocate_254)
+			p = istate->allocate_254(istate, file_block_size);
+		while (file_block_size) {
+			struct file_block *to_free = start;
+			int len = file_block_size > FB_USABLE_SIZE ? FB_USABLE_SIZE : file_block_size;
+			if (p) {
+				memcpy(p, start->buf, len);
+				p += len;
+			} else
+				write_or_die(newfd, start->buf, len);
+			file_block_size -= len;
+			start = start->next;
+			free(to_free);
+		}
+		start = end = NULL;
+	}
 	istate->timestamp.sec = (unsigned int)st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
 	return 0;
diff --git a/split-index.c b/split-index.c
index 21485e2..a47f805 100644
--- a/split-index.c
+++ b/split-index.c
@@ -302,6 +302,9 @@ void discard_split_index(struct index_state *istate)
 
 void save_or_free_index_entry(struct index_state *istate, struct cache_entry *ce)
 {
+	if (ce->index == 0xffffffff)
+		return;
+
 	if (ce->index &&
 	    istate->split_index &&
 	    istate->split_index->base &&
-- 
1.9.1.346.ga2b5940

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]