[PATCH 35/38] pack v4: decode tree objects

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



For now we recreate the whole tree object in its canonical form.

Eventually, the core code should grow some ability to walk packv4 tree
entries directly which would be way more efficient.  Not only would that
avoid double tree entry parsing, but the pack v4 encoding allows for
getting at child objects without going through the SHA1 search.

Signed-off-by: Nicolas Pitre <nico@xxxxxxxxxxx>
---
 packv4-parse.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 131 insertions(+), 6 deletions(-)

diff --git a/packv4-parse.c b/packv4-parse.c
index b80b73e..04eab46 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -151,19 +151,15 @@ static void load_path_dict(struct packed_git *p)
 	p->path_dict = paths;
 }
 
-const unsigned char *get_pathref(struct packed_git *p, const unsigned char **srcp)
+const unsigned char *get_pathref(struct packed_git *p, unsigned int index)
 {
-	unsigned int index;
-
 	if (!p->path_dict)
 		load_path_dict(p);
 
-	index = decode_varint(srcp);
-	if (index < 1 || index - 1 >= p->path_dict->nb_entries) {
+	if (index >= p->path_dict->nb_entries) {
 		error("%s: index overflow", __func__);
 		return NULL;
 	}
-	index -= 1;
 	return p->path_dict->data + p->path_dict->offsets[index];
 }
 
@@ -240,3 +236,132 @@ void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
 
 	return dst;
 }
+
+static int decode_entries(struct packed_git *p, struct pack_window **w_curs,
+			  off_t offset, unsigned int start, unsigned int count,
+			  unsigned char **dstp, unsigned long *sizep, int hdr)
+{
+	unsigned long avail;
+	unsigned int nb_entries;
+	const unsigned char *src, *scp;
+	off_t copy_objoffset = 0;
+
+	src = use_pack(p, w_curs, offset, &avail);
+	scp = src;
+
+	if (hdr) {
+		/* we need to skip over the object header */
+		while (*scp & 128)
+			if (++scp - src >= avail - 20)
+				return -1;
+		/* let's still make sure this is actually a tree */
+		if ((*scp++ & 0xf) != OBJ_TREE)
+			return -1;
+	}
+
+	nb_entries = decode_varint(&scp);
+	if (scp == src || start > nb_entries || count > nb_entries - start)
+		return -1;
+	offset += scp - src;
+	avail -= scp - src;
+	src = scp;
+
+	while (count) {
+		unsigned int what;
+
+		if (avail < 20) {
+			src = use_pack(p, w_curs, offset, &avail);
+			if (avail < 20)
+				return -1;
+		}
+		scp = src;
+
+		what = decode_varint(&scp);
+		if (scp == src)
+			return -1;
+
+		if (!(what & 1) && start != 0) {
+			/*
+			 * This is a single entry and we have to skip it.
+			 * The path index was parsed and is in 'what'.
+			 * Skip over the SHA1 index.
+			 */
+			while (*scp++ & 128);
+			start--;
+		} else if (!(what & 1) && start == 0) {
+			/*
+			 * This is an actual tree entry to recreate.
+			 */
+			const unsigned char *path, *sha1;
+			unsigned mode;
+			int len;
+
+			path = get_pathref(p, what >> 1);
+			sha1 = get_sha1ref(p, &scp);
+			if (!path || !sha1)
+				return -1;
+			mode = (path[0] << 8) | path[1];
+			len = snprintf((char *)*dstp, *sizep, "%o %s%c",
+					   mode, path+2, '\0');
+			if (len + 20 > *sizep)
+				return -1;
+			hashcpy(*dstp + len, sha1);
+			*dstp += len + 20;
+			*sizep -= len + 20;
+			count--;
+		} else if (what & 1) {
+			/*
+			 * Copy from another tree object.
+			 */
+			unsigned int copy_start, copy_count;
+
+			copy_start = what >> 1;
+			copy_count = decode_varint(&scp);
+			if (!copy_count)
+				return -1;
+
+			/*
+			 * The LSB of copy_count is a flag indicating if
+			 * a third value is provided to specify the source
+			 * object.  This may be omitted when it doesn't
+			 * change, but has to be specified at least for the
+			 * first copy sequence.
+			 */
+			if (copy_count & 1) {
+				unsigned index = decode_varint(&scp);
+				if (!index)  /* thin pack */
+					return -1;
+				copy_objoffset =
+					nth_packed_object_offset(p, index - 1);
+			}
+			if (!copy_objoffset)
+				return -1;
+			copy_count >>= 1;
+
+			if (start >= copy_count) {
+				start -= copy_count;
+			} else {
+				int ret;
+				copy_count -= start;
+				copy_start += start;
+				start = 0;
+				if (copy_count > count)
+					copy_count = count;
+				count -= copy_count;
+				ret = decode_entries(p, w_curs,
+					copy_objoffset, copy_start, copy_count,
+					dstp, sizep, 1);
+				if (ret)
+					return ret;
+				/* force pack window readjustment */
+				avail = scp - src;
+			}
+		}
+
+		offset += scp - src;
+		avail -= scp - src;
+		src = scp;
+	}
+
+	return 0;
+}
-- 
1.8.4.38.g317e65b

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]