[PATCH v2 00/14] pack v4 support in index-pack

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Mostly cleanups after Nico's comments. The diff against v2 is

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 4a24bc3..88340b5 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -22,8 +22,8 @@ struct object_entry {
 	struct pack_idx_entry idx;
 	unsigned long size;
 	unsigned int hdr_size;
-	enum object_type type;
-	enum object_type real_type;
+	enum object_type type;	/* type as written in pack */
+	enum object_type real_type; /* type after delta resolving */
 	unsigned delta_depth;
 	int base_object_no;
 	int nr_bases;		/* only valid for v4 trees */
@@ -194,8 +194,10 @@ static int mark_link(struct object *obj, int type, void *data)
 	return 0;
 }
 
-/* The content of each linked object must have been checked
-   or it must be already present in the object database */
+/*
+ * The content of each linked object must have been checked or it must
+ * be already present in the object database
+ */
 static unsigned check_object(struct object *obj)
 {
 	if (!obj)
@@ -289,6 +291,19 @@ static inline void *fill_and_use(int bytes)
 	return p;
 }
 
+static void check_against_sha1table(const unsigned char *sha1)
+{
+	const unsigned char *found;
+	if (!packv4)
+		return;
+
+	found = bsearch(sha1, sha1_table, nr_objects, 20,
+			(int (*)(const void *, const void *))hashcmp);
+	if (!found)
+		die(_("object %s not found in SHA-1 table"),
+		    sha1_to_hex(sha1));
+}
+
 static NORETURN void bad_object(unsigned long offset, const char *format,
 		       ...) __attribute__((format (printf, 2, 3)));
 
@@ -325,15 +340,8 @@ static const unsigned char *read_sha1ref(void)
 static const unsigned char *read_sha1table_ref(void)
 {
 	const unsigned char *sha1 = read_sha1ref();
-	if (sha1 < sha1_table || sha1 >= sha1_table + nr_objects * 20) {
-		unsigned char *found;
-		found = bsearch(sha1, sha1_table, nr_objects, 20,
-				(int (*)(const void *, const void *))hashcmp);
-		if (!found)
-			bad_object(consumed_bytes,
-				   _("SHA-1 %s not found in SHA-1 table"),
-				   sha1_to_hex(sha1));
-	}
+	if (sha1 < sha1_table || sha1 >= sha1_table + nr_objects * 20)
+		check_against_sha1table(sha1);
 	return sha1;
 }
 
@@ -346,21 +354,6 @@ static const unsigned char *read_dictref(struct packv4_dict *dict)
 	return  dict->data + dict->offsets[index];
 }
 
-static void *read_data(int size)
-{
-	const int max = sizeof(input_buffer);
-	void *buf;
-	char *p;
-	p = buf = xmalloc(size);
-	while (size) {
-		int to_fill = size > max ? max : size;
-		memcpy(p, fill_and_use(to_fill), to_fill);
-		p += to_fill;
-		size -= to_fill;
-	}
-	return buf;
-}
-
 static const char *open_pack_file(const char *pack_name)
 {
 	if (from_stdin) {
@@ -532,8 +525,7 @@ static void read_and_inflate(unsigned long offset,
 		git_SHA1_Final(sha1, ctx);
 }
 
-static void *unpack_commit_v4(unsigned int offset,
-			      unsigned long size,
+static void *unpack_commit_v4(unsigned int offset, unsigned long size,
 			      unsigned char *sha1)
 {
 	unsigned int nb_parents;
@@ -622,7 +614,8 @@ static void add_tree_delta_base(struct object_entry *obj,
  * v4 trees are actually kind of deltas and we don't do delta in the
  * first pass. This function only walks through a tree object to find
  * the end offset, register object dependencies and performs limited
- * validation.
+ * validation. For v4 trees that have no dependencies, we do
+ * uncompress and calculate their SHA-1.
  */
 static void *unpack_tree_v4(struct object_entry *obj,
 			    unsigned int offset, unsigned long size,
@@ -641,9 +634,9 @@ static void *unpack_tree_v4(struct object_entry *obj,
 				add_tree_delta_base(obj, last_base, delta_start);
 			} else if (!last_base)
 				bad_object(offset,
-					   _("bad copy count index in unpack_tree_v4"));
+					   _("missing delta base unpack_tree_v4"));
 			copy_count >>= 1;
-			if (!copy_count)
+			if (!copy_count || copy_count > nr)
 				bad_object(offset,
 					   _("bad copy count index in unpack_tree_v4"));
 			nr -= copy_count;
@@ -657,6 +650,13 @@ static void *unpack_tree_v4(struct object_entry *obj,
 			entry_sha1 = read_sha1ref();
 			nr--;
 
+			/*
+			 * Attempt to rebuild a canonical (base) tree.
+			 * If last_base is set, this tree depends on
+			 * another tree, which we have no access at this
+			 * stage, so reconstruction must be delayed until
+			 * the second pass.
+			 */
 			if (!last_base) {
 				const unsigned char *path;
 				unsigned mode;
@@ -694,6 +694,11 @@ static void *unpack_tree_v4(struct object_entry *obj,
 	}
 }
 
+/*
+ * Unpack an entry data in the streamed pack, calculate the object
+ * SHA-1 if it's not a large blob. Otherwise just try to inflate the
+ * object to /dev/null to determine the end of the entry in the pack.
+ */
 static void *unpack_entry_data(struct object_entry *obj, unsigned char *sha1)
 {
 	static char fixed_buf[8192];
@@ -799,19 +804,23 @@ static void *unpack_raw_entry(struct object_entry *obj,
 	return data;
 }
 
+/*
+ * Some checks are skipped because they are already done by
+ * unpack_tree_v4() in the first pass.
+ */
 static void *patch_one_base_tree(const struct object_entry *src,
 				 const unsigned char *src_buf,
 				 const unsigned char *delta_buf,
 				 unsigned long delta_size,
 				 unsigned long *dst_size)
 {
-	unsigned int nr;
+	int nr;
 	const unsigned char *last_base = NULL;
 	struct strbuf sb = STRBUF_INIT;
 	const unsigned char *p = delta_buf;
 
 	nr = decode_varint(&p);
-	while (nr && p < delta_buf + delta_size) {
+	while (nr > 0 && p < delta_buf + delta_size) {
 		unsigned int copy_start_or_path = decode_varint(&p);
 		if (copy_start_or_path & 1) { /* copy_start */
 			struct tree_desc desc;
@@ -829,11 +838,9 @@ static void *patch_one_base_tree(const struct object_entry *src,
 					last_base = sha1_table + (id - 1) * 20;
 				if (hashcmp(last_base, src->idx.sha1))
 					die(_("bad tree base in patch_one_base_tree"));
-			} else if (!last_base)
-				die(_("bad copy count index in patch_one_base_tree"));
+			}
+
 			copy_count >>= 1;
-			if (!copy_count)
-				die(_("bad copy count index in patch_one_base_tree"));
 			nr -= copy_count;
 
 			init_tree_desc(&desc, src_buf, src->size);
@@ -841,7 +848,8 @@ static void *patch_one_base_tree(const struct object_entry *src,
 				if (copy_start)
 					copy_start--;
 				else if (copy_count) {
-					strbuf_addf(&sb, "%o %s%c", entry.mode, entry.path, '\0');
+					strbuf_addf(&sb, "%o %s%c",
+						    entry.mode, entry.path, '\0');
 					strbuf_add(&sb, entry.sha1, 20);
 					copy_count--;
 				} else
@@ -854,8 +862,6 @@ static void *patch_one_base_tree(const struct object_entry *src,
 			unsigned int id;
 			const unsigned char *entry_sha1;
 
-			if (path_idx >= path_dict->nb_entries)
-				die(_("bad path index in unpack_tree_v4"));
 			id = decode_varint(&p);
 			if (!id) {
 				entry_sha1 = p;
@@ -876,6 +882,11 @@ static void *patch_one_base_tree(const struct object_entry *src,
 	return sb.buf;
 }
 
+/*
+ * Unpack entry data in the second pass when the pack is already
+ * stored on disk. consume call back is used for large-blob case. Must
+ * be thread safe.
+ */
 static void *unpack_data(struct object_entry *obj,
 			 int (*consume)(const unsigned char *, unsigned long, void *),
 			 void *cb_data)
@@ -1079,19 +1090,6 @@ static int check_collison(struct object_entry *entry)
 	return 0;
 }
 
-static void check_against_sha1table(struct object_entry *obj)
-{
-	const unsigned char *found;
-	if (!packv4)
-		return;
-
-	found = bsearch(obj->idx.sha1, sha1_table, nr_objects, 20,
-			(int (*)(const void *, const void *))hashcmp);
-	if (!found)
-		die(_("object %s not found in SHA-1 table"),
-		    sha1_to_hex(obj->idx.sha1));
-}
-
 static void sha1_object(const void *data, struct object_entry *obj_entry,
 			unsigned long size, enum object_type type,
 			const unsigned char *sha1)
@@ -1288,7 +1286,7 @@ static void resolve_delta(struct object_entry *delta_obj,
 		bad_object(delta_obj->idx.offset, _("failed to apply delta"));
 	hash_sha1_file(result->data, result->size,
 		       typename(delta_obj->real_type), delta_obj->idx.sha1);
-	check_against_sha1table(delta_obj);
+	check_against_sha1table(delta_obj->idx.sha1);
 	sha1_object(result->data, NULL, result->size, delta_obj->real_type,
 		    delta_obj->idx.sha1);
 	counter_lock();
@@ -1296,6 +1294,11 @@ static void resolve_delta(struct object_entry *delta_obj,
 	counter_unlock();
 }
 
+/*
+ * Given a base object, search for all objects depending on the base,
+ * try to unpack one of those object. The function will be called
+ * repeatedly until all objects are unpacked.
+ */
 static struct base_data *find_unresolved_deltas_1(struct base_data *base,
 						  struct base_data *prev_base)
 {
@@ -1408,6 +1411,10 @@ static int compare_delta_entry(const void *a, const void *b)
 				   objects[delta_b->obj_no].type);
 }
 
+/*
+ * Unpack all objects depending directly or indirectly on the given
+ * object
+ */
 static void resolve_base(struct object_entry *obj)
 {
 	struct base_data *base_obj = alloc_base_data();
@@ -1417,6 +1424,7 @@ static void resolve_base(struct object_entry *obj)
 }
 
 #ifndef NO_PTHREADS
+/* Call resolve_base() in multiple threads */
 static void *threaded_second_pass(void *data)
 {
 	set_thread_data(data);
@@ -1460,10 +1468,19 @@ static struct packv4_dict *read_dict(void)
 
 static void parse_dictionaries(void)
 {
+	int i;
 	if (!packv4)
 		return;
 
-	sha1_table = read_data(20 * nr_objects);
+	sha1_table = xmalloc(20 * nr_objects);
+	hashcpy(sha1_table, fill_and_use(20));
+	for (i = 1; i < nr_objects; i++) {
+		unsigned char *p = sha1_table + i * 20;
+		hashcpy(p, fill_and_use(20));
+		if (hashcmp(p - 20, p) >= 0)
+			die(_("wrong order in SHA-1 table at entry %d"), i);
+	}
+
 	name_dict = read_dict();
 	path_dict = read_dict();
 }
@@ -1492,9 +1509,9 @@ static void parse_pack_objects(unsigned char *sha1)
 			/* large blobs, check later */
 			obj->real_type = OBJ_BAD;
 			nr_delays++;
-			check_against_sha1table(obj);
+			check_against_sha1table(obj->idx.sha1);
 		} else {
-			check_against_sha1table(obj);
+			check_against_sha1table(obj->idx.sha1);
 			sha1_object(data, NULL, obj->size, obj->real_type,
 				    obj->idx.sha1);
 		}
@@ -2137,14 +2154,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
 	free(index_name_buf);
 	free(keep_name_buf);
 	free(sha1_table);
-	if (name_dict) {
-		free((void*)name_dict->data);
-		free(name_dict);
-	}
-	if (path_dict) {
-		free((void*)path_dict->data);
-		free(path_dict);
-	}
+	pv4_free_dict(name_dict);
+	pv4_free_dict(path_dict);
 	if (pack_name == NULL)
 		free((void *) curr_pack);
 	if (index_name == NULL)
diff --git a/packv4-parse.c b/packv4-parse.c
index 82661ba..d515bb9 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -63,6 +63,14 @@ struct packv4_dict *pv4_create_dict(const unsigned char *data, int dict_size)
 	return dict;
 }
 
+void pv4_free_dict(struct packv4_dict *dict)
+{
+	if (dict) {
+		free((void*)dict->data);
+		free(dict);
+	}
+}
+
 static struct packv4_dict *load_dict(struct packed_git *p, off_t *offset)
 {
 	struct pack_window *w_curs = NULL;
diff --git a/packv4-parse.h b/packv4-parse.h
index 0b2405a..e6719f6 100644
--- a/packv4-parse.h
+++ b/packv4-parse.h
@@ -8,6 +8,7 @@ struct packv4_dict {
 };
 
 struct packv4_dict *pv4_create_dict(const unsigned char *data, int dict_size);
+void pv4_free_dict(struct packv4_dict *dict);
 
 void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
 		     off_t offset, unsigned long size);
diff --git a/sha1_file.c b/sha1_file.c
index c7bf677..1528e28 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -763,6 +763,8 @@ void free_pack_by_name(const char *pack_name)
 			}
 			close_pack_index(p);
 			free(p->bad_object_sha1);
+			pv4_free_dict(p->ident_dict);
+			pv4_free_dict(p->path_dict);
 			*pp = p->next;
 			if (last_found_pack == p)
 				last_found_pack = NULL;
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]