Re: [PATCH] drop support for "experimental" loose objects

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jeff King <peff@xxxxxxxx> writes:

> On Sun, Nov 24, 2013 at 03:44:44AM -0500, Jeff King wrote:
>
>> In any code path where we call parse_object, we double-check that the
>> result matches the sha1 we asked for. But low-level commands like
>> cat-file just call read_sha1_file directly, and do not have such a
>> check. We could add it, but I suspect the processing cost would be
>> noticeable.
>
> Curious, I tested this. It is noticeable. Here's the best-of-five
> timings for the patch below when running a --batch cat-file on every
> object in my git.git repo, using the patch below:
>
>   [before]
>   real    0m12.941s
>   user    0m12.700s
>   sys     0m0.244s
>
>   [after]
>   real    0m15.800s
>   user    0m15.472s
>   sys     0m0.344s
>
> So it's about 20% slower. I don't know what the right tradeoff is. It's
> cool to check the data each time we look at it, but it does carry a
> performance penalty.

> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index b2ca775..2b09773 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -199,6 +199,8 @@ static void print_object_or_die(int fd, const unsigned char *sha1,
>  	if (type == OBJ_BLOB) {
>  		if (stream_blob_to_fd(fd, sha1, NULL, 0) < 0)
>  			die("unable to stream %s to stdout", sha1_to_hex(sha1));
> +		if (check_sha1_signature(sha1, NULL, 0, NULL) < 0)
> +			die("object %s sha1 mismatch", sha1_to_hex(sha1));

check_sha1_signature() opens the object again and streams the data.
Essentially the read side is doing twice the work with that patch,
isn't it?

I wonder if we want to extend the stream_blob_to_fd() API to
optionally allow the caller to ask to validate that the returned
data is consistent with the object name the caller asked the data
for.  Something along the lines of the attached weatherbaloon patch?

 builtin/fsck.c |  3 ++-
 entry.c        |  2 +-
 streaming.c    | 29 ++++++++++++++++++++++++++++-
 streaming.h    |  4 +++-
 4 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 97ce678..f42ed9c 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -237,7 +237,8 @@ static void check_unreachable_object(struct object *obj)
 			if (!(f = fopen(filename, "w")))
 				die_errno("Could not open '%s'", filename);
 			if (obj->type == OBJ_BLOB) {
-				if (stream_blob_to_fd(fileno(f), obj->sha1, NULL, 1))
+				if (stream_blob_to_fd(fileno(f), obj->sha1, NULL,
+						      STREAMING_OUTPUT_CAN_SEEK))
 					die_errno("Could not write '%s'", filename);
 			} else
 				fprintf(f, "%s\n", sha1_to_hex(obj->sha1));
diff --git a/entry.c b/entry.c
index 7b7aa81..b3bc827 100644
--- a/entry.c
+++ b/entry.c
@@ -127,7 +127,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path,
 	if (fd < 0)
 		return -1;
 
-	result |= stream_blob_to_fd(fd, ce->sha1, filter, 1);
+	result |= stream_blob_to_fd(fd, ce->sha1, filter, STREAMING_OUTPUT_CAN_SEEK);
 	*fstat_done = fstat_output(fd, state, statbuf);
 	result |= close(fd);
 
diff --git a/streaming.c b/streaming.c
index debe904..50599df 100644
--- a/streaming.c
+++ b/streaming.c
@@ -2,6 +2,7 @@
  * Copyright (c) 2011, Google Inc.
  */
 #include "cache.h"
+#include "object.h"
 #include "streaming.h"
 
 enum input_source {
@@ -496,19 +497,33 @@ static open_method_decl(incore)
  ****************************************************************/
 
 int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *filter,
-		      int can_seek)
+		      unsigned flags)
 {
 	struct git_istream *st;
 	enum object_type type;
 	unsigned long sz;
 	ssize_t kept = 0;
 	int result = -1;
+	int can_seek = flags & STREAMING_OUTPUT_CAN_SEEK;
+
+	int want_verify = flags & STREAMING_VERIFY_OBJECT_NAME;
+	git_SHA_CTX ctx;
 
 	st = open_istream(sha1, &type, &sz, filter);
 	if (!st)
 		return result;
 	if (type != OBJ_BLOB)
 		goto close_and_exit;
+
+	if (want_verify) {
+		char hdr[32];
+		int hdrlen;
+
+		git_SHA1_Init(&ctx);
+		hdrlen = sprintf(hdr, "%s %lu", typename(type), sz) + 1;
+		git_SHA1_Update(&ctx, hdr, hdrlen);
+	}
+
 	for (;;) {
 		char buf[1024 * 16];
 		ssize_t wrote, holeto;
@@ -518,6 +533,10 @@ int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *f
 			goto close_and_exit;
 		if (!readlen)
 			break;
+
+		if (want_verify)
+			git_SHA1_Update(&ctx, buf, readlen);
+
 		if (can_seek && sizeof(buf) == readlen) {
 			for (holeto = 0; holeto < readlen; holeto++)
 				if (buf[holeto])
@@ -542,6 +561,14 @@ int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *f
 		goto close_and_exit;
 	result = 0;
 
+	if (want_verify) {
+		unsigned char verify[20];
+
+		git_SHA1_Final(verify, &ctx);
+		if (hashcmp(verify, lookup_replace_object(sha1)))
+			result = -1;
+	}
+
  close_and_exit:
 	close_istream(st);
 	return result;
diff --git a/streaming.h b/streaming.h
index 1d05c2a..68fe3a4 100644
--- a/streaming.h
+++ b/streaming.h
@@ -12,6 +12,8 @@ extern struct git_istream *open_istream(const unsigned char *, enum object_type
 extern int close_istream(struct git_istream *);
 extern ssize_t read_istream(struct git_istream *, void *, size_t);
 
-extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek);
+#define STREAMING_OUTPUT_CAN_SEEK 01
+#define STREAMING_VERIFY_OBJECT_NAME 02
+extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, unsigned flags);
 
 #endif /* STREAMING_H */
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]