[PATCH WIP 4/4] index_fd: support indexing large files

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch is less impressed than the previous one as memory usage is
usually lower. But then systems without proper mmap() would still love it.

TODO: again, file limit

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 sha1_file.c |  120 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 120 insertions(+), 0 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 2ed06a2..f4f90ab 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2609,12 +2609,132 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	return ret;
 }
 
+static int index_and_write_fd(unsigned char *sha1, int fd, struct stat *st,
+			      enum object_type type, const char *path)
+{
+	int fdo, ret;
+	z_stream stream;
+	char *filename;
+	static char tmpfile[PATH_MAX];
+	int hdrlen;
+	int bufin_length = 8192, bufout_length = 8192;
+	char bufin[8192], bufout[8192];
+	int written_out = 0;
+	git_SHA_CTX c;
+
+	fdo = create_tmpfile(tmpfile, sizeof(tmpfile), "foo");
+	if (fdo < 0) {
+		if (errno == EACCES)
+			return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
+		else
+			return error("unable to create temporary sha1 filename %s: %s\n", tmpfile, strerror(errno));
+	}
+
+	hdrlen = sprintf(bufin, "%s %lu", typename(type), (unsigned long)st->st_size)+1;
+	git_SHA1_Init(&c);
+	git_SHA1_Update(&c, bufin, hdrlen);
+
+	/* Set it up */
+	memset(&stream, 0, sizeof(stream));
+	deflateInit(&stream, zlib_compression_level);
+	stream.next_out = (unsigned char *)bufout;
+	stream.avail_out = bufout_length;
+
+	/* First header.. */
+	stream.next_in = (unsigned char *)bufin;
+	stream.avail_in = hdrlen;
+	while (deflate(&stream, 0) == Z_OK)
+		/* nothing */;
+
+	written_out = stream.total_out;
+	write_or_die(fdo, bufout, written_out);
+	stream.next_out = (unsigned char *)bufout;
+	stream.avail_out = bufout_length;
+
+	/* Then the data itself.. */
+	stream.next_in = (unsigned char *)bufin;
+	stream.avail_in = xread(fd, bufin, bufin_length);
+	git_SHA1_Update(&c, stream.next_in, stream.avail_in);
+
+	while ((ret = deflate(&stream, Z_NO_FLUSH)) == Z_OK || ret == Z_BUF_ERROR) {
+		if (stream.total_out > written_out) {
+			write_or_die(fdo, bufout, stream.total_out - written_out);
+			written_out = stream.total_out;
+			stream.next_out = (unsigned char *)bufout;
+			stream.avail_out = bufout_length;
+		}
+		if (stream.avail_in == 0) {
+			stream.next_in = (unsigned char *)bufin;
+			stream.avail_in = xread(fd, bufin, bufin_length);
+			if (!stream.avail_in)
+				break;
+			git_SHA1_Update(&c, stream.next_in, stream.avail_in);
+		}
+	}
+
+	/* Done computing SHA-1 */
+	git_SHA1_Final(sha1, &c);
+
+	/* Make sure everything is flushed out */
+	while ((ret = deflate(&stream, Z_FINISH)) == Z_OK) {
+		write_or_die(fdo, bufout, stream.total_out - written_out);
+		written_out = stream.total_out;
+		stream.next_out = (unsigned char *)bufout;
+		stream.avail_out = bufout_length;
+	}
+
+	if (ret != Z_STREAM_END)
+		die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
+
+	/* Last piece */
+	if (written_out < stream.total_out)
+		write_or_die(fdo, bufout, stream.total_out - written_out);
+	close_sha1_file(fdo);
+
+	ret = deflateEnd(&stream);
+	if (ret != Z_OK)
+		die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
+
+	/* Now generate proper path from SHA-1 */
+	filename = sha1_file_name(sha1);
+	safe_create_leading_directories_const(filename);
+	return move_temp_to_file(tmpfile, filename);
+}
+
+static int hash_fd(unsigned char *sha1, int fd, struct stat *st,
+		   enum object_type type, const char *path)
+{
+	git_SHA_CTX c;
+	char buf[8192];
+	int buflen;
+
+	/* Generate the header */
+	buflen = sprintf(buf, "%s %lu", typename(type), (unsigned long)st->st_size)+1;
+
+	/* Sha1.. */
+	git_SHA1_Init(&c);
+	git_SHA1_Update(&c, buf, buflen);
+	while ((buflen = xread(fd, buf, 8192)) > 0)
+		git_SHA1_Update(&c, buf, buflen);
+	git_SHA1_Final(sha1, &c);
+	return 0;
+}
+
 int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 	     enum object_type type, const char *path)
 {
 	int ret;
 	size_t size = xsize_t(st->st_size);
 
+	if (S_ISREG(st->st_mode) && path && !convert_to_git_needed(path, size)) {
+		if (write_object)
+			ret = index_and_write_fd(sha1, fd, st, type, path);
+		else
+			ret = hash_fd(sha1, fd, st, type, path);
+		close(fd);
+		return ret;
+	}
+
 	if (!S_ISREG(st->st_mode)) {
 		struct strbuf sbuf = STRBUF_INIT;
 		if (strbuf_read(&sbuf, fd, 4096) >= 0)
-- 
1.6.3.1.257.gbd13

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]