Re: [PATCH] read-cache.c: fix index memory allocation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 24.10.2011 09:07, schrieb Junio C Hamano:
> Thanks.
> 
> This approach may be the most appropriate for the maintenance track, but
> for the purpose of going forward, I wonder if we really want to keep the
> "estimate and allocate a large pool, and carve out individual pieces".
> 
> This bulk-allocate dates back to the days when we didn't have ondisk vs
> incore representation differences, IIRC, and as the result we deliberately
> leak cache entries whenever an entry in the index is replaced with a new
> one. Does the overhead to allocate individually really kill us that much
> for say a tree with 30k files in it?

Something like this (applies to master)?  Very basic testing didn't show
any slowdown of git status in the Linux repo.

-- >8 --
Subject: read-cache.c: allocate index entries individually

The code to estimate the in-memory size of the index based on its on-disk
representation is subtly wrong for certain architecture-dependent struct
layouts.  Instead of fixing it, replace the code to keep the index entries
in a single large block of memory and allocate each entry separately
instead.  This is both simpler and more flexible, as individual entries
can now be freed.  Actually using that added flexibility is left for a
later patch.

Suggested-by: Junio C Hamano <gitster@xxxxxxxxx>
Signed-off-by: Rene Scharfe <rene.scharfe@xxxxxxxxxxxxxx>
---
Note: This patch was brought to you by the option --histogram, which
produced nicer output than the default diff method.

 cache.h      |    1 -
 read-cache.c |   86 +++++++++++++++++++++++----------------------------------
 2 files changed, 35 insertions(+), 52 deletions(-)

diff --git a/cache.h b/cache.h
index be07ec7..ec0e571 100644
--- a/cache.h
+++ b/cache.h
@@ -316,7 +316,6 @@ struct index_state {
 	struct string_list *resolve_undo;
 	struct cache_tree *cache_tree;
 	struct cache_time timestamp;
-	void *alloc;
 	unsigned name_hash_initialized : 1,
 		 initialized : 1;
 	struct hash_table name_hash;
diff --git a/read-cache.c b/read-cache.c
index 01a0e25..7f75bfa 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1202,10 +1202,35 @@ int read_index(struct index_state *istate)
 	return read_index_from(istate, get_index_file());
 }
 
-static void convert_from_disk(struct ondisk_cache_entry *ondisk, struct cache_entry *ce)
+static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk)
 {
+	struct cache_entry *ce;
 	size_t len;
 	const char *name;
+	unsigned int flags;
+
+	/* On-disk flags are just 16 bits */
+	flags = ntohs(ondisk->flags);
+	len = flags & CE_NAMEMASK;
+
+	if (flags & CE_EXTENDED) {
+		struct ondisk_cache_entry_extended *ondisk2;
+		int extended_flags;
+		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
+		extended_flags = ntohs(ondisk2->flags2) << 16;
+		/* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
+		if (extended_flags & ~CE_EXTENDED_FLAGS)
+			die("Unknown index entry format %08x", extended_flags);
+		flags |= extended_flags;
+		name = ondisk2->name;
+	}
+	else
+		name = ondisk->name;
+
+	if (len == CE_NAMEMASK)
+		len = strlen(name);
+
+	ce = xmalloc(cache_entry_size(len));
 
 	ce->ce_ctime.sec = ntohl(ondisk->ctime.sec);
 	ce->ce_mtime.sec = ntohl(ondisk->mtime.sec);
@@ -1217,48 +1242,16 @@ static void convert_from_disk(struct ondisk_cache_entry *ondisk, struct cache_en
 	ce->ce_uid   = ntohl(ondisk->uid);
 	ce->ce_gid   = ntohl(ondisk->gid);
 	ce->ce_size  = ntohl(ondisk->size);
-	/* On-disk flags are just 16 bits */
-	ce->ce_flags = ntohs(ondisk->flags);
+	ce->ce_flags = flags;
 
 	hashcpy(ce->sha1, ondisk->sha1);
 
-	len = ce->ce_flags & CE_NAMEMASK;
-
-	if (ce->ce_flags & CE_EXTENDED) {
-		struct ondisk_cache_entry_extended *ondisk2;
-		int extended_flags;
-		ondisk2 = (struct ondisk_cache_entry_extended *)ondisk;
-		extended_flags = ntohs(ondisk2->flags2) << 16;
-		/* We do not yet understand any bit out of CE_EXTENDED_FLAGS */
-		if (extended_flags & ~CE_EXTENDED_FLAGS)
-			die("Unknown index entry format %08x", extended_flags);
-		ce->ce_flags |= extended_flags;
-		name = ondisk2->name;
-	}
-	else
-		name = ondisk->name;
-
-	if (len == CE_NAMEMASK)
-		len = strlen(name);
 	/*
 	 * NEEDSWORK: If the original index is crafted, this copy could
 	 * go unchecked.
 	 */
 	memcpy(ce->name, name, len + 1);
-}
-
-static inline size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
-{
-	long per_entry;
-
-	per_entry = sizeof(struct cache_entry) - sizeof(struct ondisk_cache_entry);
-
-	/*
-	 * Alignment can cause differences. This should be "alignof", but
-	 * since that's a gcc'ism, just use the size of a pointer.
-	 */
-	per_entry += sizeof(void *);
-	return ondisk_size + entries*per_entry;
+	return ce;
 }
 
 /* remember to discard_cache() before reading a different cache! */
@@ -1266,7 +1259,7 @@ int read_index_from(struct index_state *istate, const char *path)
 {
 	int fd, i;
 	struct stat st;
-	unsigned long src_offset, dst_offset;
+	unsigned long src_offset;
 	struct cache_header *hdr;
 	void *mmap;
 	size_t mmap_size;
@@ -1305,29 +1298,18 @@ int read_index_from(struct index_state *istate, const char *path)
 	istate->cache_nr = ntohl(hdr->hdr_entries);
 	istate->cache_alloc = alloc_nr(istate->cache_nr);
 	istate->cache = xcalloc(istate->cache_alloc, sizeof(struct cache_entry *));
-
-	/*
-	 * The disk format is actually larger than the in-memory format,
-	 * due to space for nsec etc, so even though the in-memory one
-	 * has room for a few  more flags, we can allocate using the same
-	 * index size
-	 */
-	istate->alloc = xmalloc(estimate_cache_size(mmap_size, istate->cache_nr));
 	istate->initialized = 1;
 
 	src_offset = sizeof(*hdr);
-	dst_offset = 0;
 	for (i = 0; i < istate->cache_nr; i++) {
 		struct ondisk_cache_entry *disk_ce;
 		struct cache_entry *ce;
 
 		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
-		ce = (struct cache_entry *)((char *)istate->alloc + dst_offset);
-		convert_from_disk(disk_ce, ce);
+		ce = create_from_disk(disk_ce);
 		set_index_entry(istate, i, ce);
 
 		src_offset += ondisk_ce_size(ce);
-		dst_offset += ce_size(ce);
 	}
 	istate->timestamp.sec = st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
@@ -1361,11 +1343,15 @@ unmap:
 
 int is_index_unborn(struct index_state *istate)
 {
-	return (!istate->cache_nr && !istate->alloc && !istate->timestamp.sec);
+	return (!istate->cache_nr && !istate->timestamp.sec);
 }
 
 int discard_index(struct index_state *istate)
 {
+	int i;
+
+	for (i = 0; i < istate->cache_nr; i++)
+		free(istate->cache[i]);
 	resolve_undo_clear_index(istate);
 	istate->cache_nr = 0;
 	istate->cache_changed = 0;
@@ -1374,8 +1360,6 @@ int discard_index(struct index_state *istate)
 	istate->name_hash_initialized = 0;
 	free_hash(&istate->name_hash);
 	cache_tree_free(&(istate->cache_tree));
-	free(istate->alloc);
-	istate->alloc = NULL;
 	istate->initialized = 0;
 
 	/* no need to throw away allocated active_cache */
-- 
1.7.7
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]