Re: [PATCH 6/7] read-cache: refuse to create index referring to external objects

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Nguyễn Thái Ngọc Duy  <pclouds@xxxxxxxxx> writes:

> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
> ---
>  read-cache.c | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
>
> diff --git a/read-cache.c b/read-cache.c
> index fda78bc..4579215 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -1720,6 +1720,26 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce,
>  			      ce->name + common, ce_namelen(ce) - common);
>  	}
>  
> +	if (object_database_contaminated) {
> +		struct object_info oi;
> +		switch (ce->ce_mode) {
> +		case S_IFGITLINK:
> +			break;
> +		case S_IFDIR:
> +			if (sha1_object_info_extended(ce->sha1, &oi) != OBJ_TREE ||

This case should never happen.  Do we store any tree object in the
index in the first place?

> +			    (oi.alt && oi.alt->external))
> +				die("cannot create index referring to an external tree %s",
> +				    sha1_to_hex(ce->sha1));
> +			break;
> +		default:
> +			if (sha1_object_info_extended(ce->sha1, &oi) != OBJ_BLOB ||
> +				    (oi.alt && oi.alt->external))
> +				die("cannot create index referring to an external blob %s",
> +				    sha1_to_hex(ce->sha1));
> +			break;
> +		}
> +	}
> +
>  	result = ce_write(c, fd, ondisk, size);
>  	free(ondisk);
>  	return result;

I think the check you want to add is to the cache-tree code; before
writing the new index out, if you suspect you might have called
cache_tree_update() before, invalidate any hierarchy that is
recorded to produce a tree object that refers to an object that is
only available through external object store.

As to the logic to check if a object is something that we should
refuse to create a new dependent, I think you should not butcher
sha1_object_info_extended(), and instead add a new call that checks
if a given SHA-1 yields an object if you ignore alternate object
databases that are marked as "external", whose signature may
resemble:

	int has_sha1_file_proper(const unsigned char *sha1);

or something.

This is a tangent, but in addition, you may want to add an even
narrower variant that checks the same but ignoring _all_ alternate
object databases, "external" or not:

        int has_sha1_file_local(const unsigned char *sha1);

That may be useful if we want to later make the alternate safer to
use; instead of letting the codepath to create an object ask
has_sha1_file() to see if it already exists and refrain from writing
a new copy, we switch to ask has_sha1_file_locally() and even if an
alternate object database we borrow from has it, we keep our own
copy in our repository.

Not tested beyond syntax, but rebasing something like this patch on
top of your "mark external sources" change may take us closer to
that.

 cache.h     |  2 ++
 sha1_file.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/cache.h b/cache.h
index 1f96f65..8d651b6 100644
--- a/cache.h
+++ b/cache.h
@@ -766,6 +766,8 @@ extern int move_temp_to_file(const char *tmpfile, const char *filename);
 
 extern int has_sha1_pack(const unsigned char *sha1);
 extern int has_sha1_file(const unsigned char *sha1);
+extern int has_sha1_file_proper(const unsigned char *sha1);
+extern int has_sha1_file_local(const unsigned char *sha1);
 extern int has_loose_object_nonlocal(const unsigned char *sha1);
 
 extern int has_pack_index(const unsigned char *sha1);
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..1a3192a 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -420,11 +420,18 @@ static int has_loose_object_local(const unsigned char *sha1)
 	return !access(name, F_OK);
 }
 
-int has_loose_object_nonlocal(const unsigned char *sha1)
+enum odb_origin {
+	odb_default = 0, odb_proper, odb_local
+};
+
+static int has_loose_object_nonlocal_limited(const unsigned char *sha1,
+					     enum odb_origin origin)
 {
 	struct alternate_object_database *alt;
 	prepare_alt_odb();
 	for (alt = alt_odb_list; alt; alt = alt->next) {
+		if (origin == odb_proper && 0 /* alt->external */)
+			continue;
 		fill_sha1_path(alt->name, sha1);
 		if (!access(alt->base, F_OK))
 			return 1;
@@ -432,6 +439,11 @@ int has_loose_object_nonlocal(const unsigned char *sha1)
 	return 0;
 }
 
+int has_loose_object_nonlocal(const unsigned char *sha1)
+{
+	return has_loose_object_nonlocal_limited(sha1, odb_default);
+}
+
 static int has_loose_object(const unsigned char *sha1)
 {
 	return has_loose_object_local(sha1) ||
@@ -2062,12 +2074,28 @@ int is_pack_valid(struct packed_git *p)
 	return !open_packed_git(p);
 }
 
+static int limit_pack_odb_origin(enum odb_origin origin, struct packed_git *p)
+{
+	switch (origin) {
+	default:
+		return 0;
+	case odb_proper:
+		return 0; /* p->external */
+	case odb_local:
+		return !p->pack_local;
+	}
+}
+
 static int fill_pack_entry(const unsigned char *sha1,
 			   struct pack_entry *e,
-			   struct packed_git *p)
+			   struct packed_git *p,
+			   enum odb_origin origin)
 {
 	off_t offset;
 
+	if (limit_pack_odb_origin(origin, p))
+		return 0;
+
 	if (p->num_bad_objects) {
 		unsigned i;
 		for (i = 0; i < p->num_bad_objects; i++)
@@ -2096,7 +2124,8 @@ static int fill_pack_entry(const unsigned char *sha1,
 	return 1;
 }
 
-static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
+static int find_pack_entry_limited(const unsigned char *sha1, struct pack_entry *e,
+				   enum odb_origin origin)
 {
 	struct packed_git *p;
 
@@ -2104,11 +2133,11 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 	if (!packed_git)
 		return 0;
 
-	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+	if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack, origin))
 		return 1;
 
 	for (p = packed_git; p; p = p->next) {
-		if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+		if (p == last_found_pack || !fill_pack_entry(sha1, e, p, origin))
 			continue;
 
 		last_found_pack = p;
@@ -2117,6 +2146,11 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 	return 0;
 }
 
+static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
+{
+	return find_pack_entry_limited(sha1, e, odb_default);
+}
+
 struct packed_git *find_sha1_pack(const unsigned char *sha1,
 				  struct packed_git *packs)
 {
@@ -2630,6 +2664,22 @@ int has_sha1_file(const unsigned char *sha1)
 	return has_loose_object(sha1);
 }
 
+int has_sha1_file_local(const unsigned char *sha1)
+{
+	struct pack_entry e;
+	if (find_pack_entry_limited(sha1, &e, odb_local))
+		return 1;
+	return has_loose_object_local(sha1);
+}
+
+int has_sha1_file_proper(const unsigned char *sha1)
+{
+	struct pack_entry e;
+	return (!!find_pack_entry_limited(sha1, &e, odb_proper) ||
+		has_loose_object_local(sha1) ||
+		has_loose_object_nonlocal_limited(sha1, odb_proper));
+}
+
 static void check_tree(const void *buf, size_t size)
 {
 	struct tree_desc desc;
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]