[PATCH WIP 2/4] sha1_file.c: add streaming interface for reading blobs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The reason is quite obvious: large files should not be read entirely
into memory (and in some cases, cannot).

This patch deals with separate blobs only for two reasons:

 1. large blobs are less likely to be put in packs (*)
 2. streaming interface for blobs in pack is more complicated, thus
    more troublesome

(*) With regard to the first point, there is an assumption that large
blobs must stay out of pack otherwise you cannot make use of this
interface. This is not true now, but it was discussed and worked on in
the past. Hopefully a patch series that makes this assumption true
will come soon.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 cache.h     |    8 ++++
 sha1_file.c |  113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 0 deletions(-)

diff --git a/cache.h b/cache.h
index f3fc822..f6f70ce 100644
--- a/cache.h
+++ b/cache.h
@@ -655,6 +655,14 @@ extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsig
 extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
 extern int force_object_loose(const unsigned char *sha1, time_t mtime);
 
+struct loose_object_handle;
+struct loose_object_handle *open_loose_object(const unsigned char *sha1);
+int read_loose_object(struct loose_object_handle *oh, void *buf, unsigned long len);
+int close_loose_object(struct loose_object_handle *oh);
+const unsigned char *loose_object_sha1(struct loose_object_handle *oh);
+unsigned long loose_object_size(struct loose_object_handle *oh);
+enum object_type loose_object_type(struct loose_object_handle *oh);
+
 /* global flag to enable extra checks when accessing packed objects */
 extern int do_check_packed_object_crc;
 
diff --git a/sha1_file.c b/sha1_file.c
index e73cd4f..2ed06a2 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1340,6 +1340,119 @@ static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type
 	return unpack_sha1_rest(&stream, hdr, *size, sha1);
 }
 
+
+struct loose_object_handle {
+	unsigned char sha1[20];
+	enum object_type type;
+	unsigned long size;
+
+	int fd;
+	z_stream stream;
+	char *bufin, *bufout;
+	int bufin_length, bufout_length;
+	unsigned long start, end;
+};
+
+enum object_type loose_object_type(struct loose_object_handle *oh)
+{
+	return oh->type;
+}
+
+unsigned long loose_object_size(struct loose_object_handle *oh)
+{
+	return oh->size;
+}
+
+const unsigned char *loose_object_sha1(struct loose_object_handle *oh)
+{
+	return oh->sha1;
+}
+
+struct loose_object_handle *open_loose_object(const unsigned char *sha1)
+{
+	int ret, len;
+	struct loose_object_handle oh, *ohp;
+
+	oh.fd = open_sha1_file(sha1);
+	if (oh.fd == -1)
+		return NULL;
+
+	oh.bufin_length = 8192;
+	oh.bufin = xmalloc(oh.bufin_length);
+	len = xread(oh.fd, oh.bufin, oh.bufin_length);
+	if (len == -1) {
+		free(oh.bufin);
+		return NULL;
+	}
+
+	oh.bufout_length = 8192;
+	oh.bufout = xmalloc(oh.bufout_length);
+
+	ret = unpack_sha1_header(&oh.stream, (unsigned char *)oh.bufin, len, oh.bufout, oh.bufout_length);
+	if (ret < Z_OK || (oh.type = parse_sha1_header(oh.bufout, &oh.size)) < 0) {
+		free(oh.bufin);
+		free(oh.bufout);
+		return NULL;
+	}
+
+	ohp = xmalloc(sizeof(*ohp));
+	*ohp = oh;
+	memcpy(ohp->sha1, sha1, 20);
+
+	ohp->start = strlen(ohp->bufout)+1;
+	ohp->end = ohp->stream.total_out;
+	return ohp;
+}
+
+int read_loose_object(struct loose_object_handle *oh, void *buf, unsigned long buflen)
+{
+	if (oh->end == oh->start) {
+		int status;
+
+		oh->start = 0;
+		oh->stream.next_out = (unsigned char*)oh->bufout;
+		oh->stream.avail_out = oh->bufout_length;
+		status = inflate(&oh->stream, Z_NO_FLUSH);
+		oh->end = oh->stream.next_out - (unsigned char*)oh->bufout;
+
+		if (oh->stream.avail_in == 0) {
+			oh->stream.avail_in = xread(oh->fd, oh->bufin, oh->bufin_length);
+			oh->stream.next_in = (unsigned char *)oh->bufin;
+		}
+
+		/* trying to get Z_STREAM_END */
+		if (oh->stream.total_out == oh->size && status == Z_OK) {
+			status = inflate(&oh->stream, Z_NO_FLUSH);
+
+			if (status < 0)
+				error("corrupt loose object '%s'", sha1_to_hex(oh->sha1));
+			else if (oh->stream.avail_in)
+				error("garbage at end of loose object '%s'",
+				      sha1_to_hex(oh->sha1));
+		}
+	}
+
+	if (oh->end > oh->start) {
+		int len = oh->end - oh->start;
+		memcpy(buf, (char *) oh->bufout + oh->start, len);
+		oh->start = oh->end;
+		return len;
+	}
+
+	/* How can it get here? */
+	return -1;
+}
+
+int close_loose_object(struct loose_object_handle *oh)
+{
+	close(oh->fd);
+	free(oh->bufin);
+	free(oh->bufout);
+	inflateEnd(&oh->stream);
+	free(oh);
+	return 0;
+}
+
 unsigned long get_size_from_delta(struct packed_git *p,
 				  struct pack_window **w_curs,
 			          off_t curpos)
-- 
1.6.3.1.257.gbd13

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]