[PATCH 1/7] sha1_file: keep track of where an SHA-1 object comes from

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We currently know if an object is loose or packed. We do not know if
it's from the repo's object database, or via alternates
mechanism. With this patch, sha1_object_info_extended() can tell if an
object comes from alternates source (and which one).

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 How about this way instead: we keep track of where objects come from
 so we can verify object source when we create or update something
 that contains SHA-1.

 1/7 and 2/7 prepare for tracking object source. The rest verifies
 that new commits, trees, tags, indexes, refs or reflogs do not refer
 to an external source.

 This adds some cost when add_submodule_odb() is used. I did not
 measure, but I guess the added cost is much smaller compared to
 forking, especially on Windows. No breakages detected by the test
 suite, which is really good (or my code is really broken).

 builtin/index-pack.c |  2 +-
 cache.h              |  4 +++-
 fast-import.c        |  2 +-
 sha1_file.c          | 66 ++++++++++++++++++++++++++++++++++++----------------
 4 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 43d364b..a7de3f8 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1393,7 +1393,7 @@ static void read_v2_anomalous_offsets(struct packed_git *p,
 
 static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
 {
-	struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
+	struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1, NULL);
 
 	if (!p)
 		die(_("Cannot open existing pack file '%s'"), pack_name);
diff --git a/cache.h b/cache.h
index c257953..92854ab 100644
--- a/cache.h
+++ b/cache.h
@@ -978,6 +978,7 @@ struct pack_window {
 extern struct packed_git {
 	struct packed_git *next;
 	struct pack_window *windows;
+	struct alternate_object_database *alt;
 	off_t pack_size;
 	const void *index_data;
 	size_t index_size;
@@ -1066,7 +1067,7 @@ extern void close_pack_windows(struct packed_git *);
 extern void unuse_pack(struct pack_window **);
 extern void free_pack_by_name(const char *);
 extern void clear_delta_base_cache(void);
-extern struct packed_git *add_packed_git(const char *, int, int);
+extern struct packed_git *add_packed_git(const char *, int, int, struct alternate_object_database *);
 extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t);
 extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t);
 extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
@@ -1102,6 +1103,7 @@ struct object_info {
 			unsigned int is_delta;
 		} packed;
 	} u;
+	struct alternate_object_database *alt;
 };
 extern int sha1_object_info_extended(const unsigned char *, struct object_info *);
 
diff --git a/fast-import.c b/fast-import.c
index c2a814e..4bf732e 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -964,7 +964,7 @@ static void end_packfile(void)
 		idx_name = keep_pack(create_index());
 
 		/* Register the packfile with core git's machinery. */
-		new_p = add_packed_git(idx_name, strlen(idx_name), 1);
+		new_p = add_packed_git(idx_name, strlen(idx_name), 1, NULL);
 		if (!new_p)
 			die("core git rejected index %s", idx_name);
 		all_packs[pack_id] = new_p;
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..afc7355 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -933,7 +933,8 @@ static void try_to_free_pack_memory(size_t size)
 	release_pack_memory(size, -1);
 }
 
-struct packed_git *add_packed_git(const char *path, int path_len, int local)
+struct packed_git *add_packed_git(const char *path, int path_len, int local,
+				  struct alternate_object_database *alt)
 {
 	static int have_set_try_to_free_routine;
 	struct stat st;
@@ -973,6 +974,7 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local)
 	p->mtime = st.st_mtime;
 	if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
 		hashclr(p->sha1);
+	p->alt = alt;
 	return p;
 }
 
@@ -1000,7 +1002,8 @@ void install_packed_git(struct packed_git *pack)
 	packed_git = pack;
 }
 
-static void prepare_packed_git_one(char *objdir, int local)
+static void prepare_packed_git_one(char *objdir, int local,
+				   struct alternate_object_database *alt)
 {
 	/* Ensure that this buffer is large enough so that we can
 	   append "/pack/" without clobbering the stack even if
@@ -1041,7 +1044,7 @@ static void prepare_packed_git_one(char *objdir, int local)
 		/* See if it really is a valid .idx file with corresponding
 		 * .pack file that we can map.
 		 */
-		p = add_packed_git(path, len + namelen, local);
+		p = add_packed_git(path, len + namelen, local, alt);
 		if (!p)
 			continue;
 		install_packed_git(p);
@@ -1110,11 +1113,11 @@ void prepare_packed_git(void)
 
 	if (prepare_packed_git_run_once)
 		return;
-	prepare_packed_git_one(get_object_directory(), 1);
+	prepare_packed_git_one(get_object_directory(), 1, NULL);
 	prepare_alt_odb();
 	for (alt = alt_odb_list; alt; alt = alt->next) {
 		alt->name[-1] = 0;
-		prepare_packed_git_one(alt->base, 0);
+		prepare_packed_git_one(alt->base, 0, alt);
 		alt->name[-1] = '/';
 	}
 	rearrange_packed_git();
@@ -1215,15 +1218,19 @@ static int git_open_noatime(const char *name)
 	}
 }
 
-static int open_sha1_file(const unsigned char *sha1)
+static int open_sha1_file(const unsigned char *sha1,
+			  struct alternate_object_database **p_alt)
 {
 	int fd;
 	char *name = sha1_file_name(sha1);
 	struct alternate_object_database *alt;
 
 	fd = git_open_noatime(name);
-	if (fd >= 0)
+	if (fd >= 0) {
+		if (p_alt)
+			*p_alt = NULL;
 		return fd;
+	}
 
 	prepare_alt_odb();
 	errno = ENOENT;
@@ -1231,18 +1238,23 @@ static int open_sha1_file(const unsigned char *sha1)
 		name = alt->name;
 		fill_sha1_path(name, sha1);
 		fd = git_open_noatime(alt->base);
-		if (fd >= 0)
+		if (fd >= 0) {
+			if (p_alt)
+				*p_alt = alt;
 			return fd;
+		}
 	}
 	return -1;
 }
 
-void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+static void *map_sha1_file_extended(const unsigned char *sha1,
+				    unsigned long *size,
+				    struct alternate_object_database **p_alt)
 {
 	void *map;
 	int fd;
 
-	fd = open_sha1_file(sha1);
+	fd = open_sha1_file(sha1, p_alt);
 	map = NULL;
 	if (fd >= 0) {
 		struct stat st;
@@ -1261,6 +1273,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 	return map;
 }
 
+void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+{
+	return map_sha1_file_extended(sha1, size, NULL);
+}
+
 /*
  * There used to be a second loose object header format which
  * was meant to mimic the in-pack format, allowing for direct
@@ -2096,7 +2113,9 @@ static int fill_pack_entry(const unsigned char *sha1,
 	return 1;
 }
 
-static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
+static int find_pack_entry(const unsigned char *sha1,
+			   struct pack_entry *e,
+			   struct alternate_object_database **p_alt)
 {
 	struct packed_git *p;
 
@@ -2104,14 +2123,19 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 	if (!packed_git)
 		return 0;
 
-	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack)) {
+		if (p_alt)
+			*p_alt = last_found_pack->alt;
 		return 1;
+	}
 
 	for (p = packed_git; p; p = p->next) {
 		if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
 			continue;
 
 		last_found_pack = p;
+		if (p_alt)
+			*p_alt = p->alt;
 		return 1;
 	}
 	return 0;
@@ -2130,7 +2154,9 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
 
 }
 
-static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep)
+static int sha1_loose_object_info(const unsigned char *sha1,
+				  unsigned long *sizep,
+				  struct alternate_object_database **p_alt)
 {
 	int status;
 	unsigned long mapsize, size;
@@ -2138,7 +2164,7 @@ static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *size
 	git_zstream stream;
 	char hdr[32];
 
-	map = map_sha1_file(sha1, &mapsize);
+	map = map_sha1_file_extended(sha1, &mapsize, p_alt);
 	if (!map)
 		return error("unable to find %s", sha1_to_hex(sha1));
 	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
@@ -2168,9 +2194,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 		return co->type;
 	}
 
-	if (!find_pack_entry(sha1, &e)) {
+	if (!find_pack_entry(sha1, &e, &oi->alt)) {
 		/* Most likely it's a loose object. */
-		status = sha1_loose_object_info(sha1, oi->sizep);
+		status = sha1_loose_object_info(sha1, oi->sizep, &oi->alt);
 		if (status >= 0) {
 			oi->whence = OI_LOOSE;
 			return status;
@@ -2178,7 +2204,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
 
 		/* Not a loose object; someone else may have just packed it. */
 		reprepare_packed_git();
-		if (!find_pack_entry(sha1, &e))
+		if (!find_pack_entry(sha1, &e, &oi->alt))
 			return status;
 	}
 
@@ -2213,7 +2239,7 @@ static void *read_packed_sha1(const unsigned char *sha1,
 	struct pack_entry e;
 	void *data;
 
-	if (!find_pack_entry(sha1, &e))
+	if (!find_pack_entry(sha1, &e, NULL))
 		return NULL;
 	data = cache_or_unpack_entry(e.p, e.offset, size, type, 1);
 	if (!data) {
@@ -2618,14 +2644,14 @@ int has_pack_index(const unsigned char *sha1)
 int has_sha1_pack(const unsigned char *sha1)
 {
 	struct pack_entry e;
-	return find_pack_entry(sha1, &e);
+	return find_pack_entry(sha1, &e, NULL);
 }
 
 int has_sha1_file(const unsigned char *sha1)
 {
 	struct pack_entry e;
 
-	if (find_pack_entry(sha1, &e))
+	if (find_pack_entry(sha1, &e, NULL))
 		return 1;
 	return has_loose_object(sha1);
 }
-- 
1.8.0.rc3.18.g0d9b108

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]