[PATCH WIP 3/4] write_entry: use streaming interface for checkout large files

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



With this patch, Git's memory consumption should go pretty flat no
matter how large input files are. So:

 - less memory will be used
 - more less memory for systems that do not have proper mmap() support
 - unmappable files can now be checked in

TODO: buffer size, file size limit that triggers this routine

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 entry.c |   68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 68 insertions(+), 0 deletions(-)

diff --git a/entry.c b/entry.c
index cc841ed..2a49d7b 100644
--- a/entry.c
+++ b/entry.c
@@ -91,6 +91,65 @@ static void *read_blob_entry(struct cache_entry *ce, unsigned long *size)
 	return NULL;
 }
 
+/*
+ * Trying to write entry using blob streaming interface.
+ * Return 1 if normal interface should be used.
+ */
+static int write_large_entry(struct cache_entry *ce, char *path,
+			     const struct checkout *state, int to_tempfile)
+{
+	unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
+	struct loose_object_handle *oh = open_loose_object(ce->sha1);
+	char buf[8192];
+	long len;
+	int fd;
+	size_t wrote;
+
+	if (!oh)
+		return 1;
+
+	if (loose_object_type(oh) != OBJ_BLOB) {
+		close_loose_object(oh);
+		return error("git checkout-index: unable to read sha1 file of %s (%s)",
+			     path, sha1_to_hex(ce->sha1));
+	}
+
+	if (convert_to_working_tree_needed(ce->name,  xsize_t(loose_object_size(oh)))) {
+		close_loose_object(oh);
+		return 1;
+	}
+
+	if (to_tempfile) {
+		if (ce_mode_s_ifmt == S_IFREG)
+			strcpy(path, ".merge_file_XXXXXX");
+		else
+			strcpy(path, ".merge_link_XXXXXX");
+		fd = mkstemp(path);
+	} else if (ce_mode_s_ifmt == S_IFREG) {
+		fd = create_file(path, ce->ce_mode);
+	} else {
+		fd = create_file(path, 0666);
+	}
+	if (fd < 0) {
+		close_loose_object(oh);
+		return error("git checkout-index: unable to create file %s (%s)",
+			     path, strerror(errno));
+	}
+
+	while ((len = read_loose_object(oh, buf, sizeof(buf))) > 0) {
+		wrote = write_in_full(fd, buf, len);
+		if (wrote != len) {
+			close(fd);
+			close_loose_object(oh);
+			return error("git checkout-index: unable to write file %s", path);
+		}
+	}
+
+	close(fd);
+	close_loose_object(oh);
+	return 0;
+}
+
 static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile)
 {
 	unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT;
@@ -104,6 +163,15 @@ static int write_entry(struct cache_entry *ce, char *path, const struct checkout
 	switch (ce_mode_s_ifmt) {
 	case S_IFREG:
 	case S_IFLNK:
+		if (ce_mode_s_ifmt == S_IFREG) {
+			ret = write_large_entry(ce, path, state, to_tempfile);
+			if (ret < 0) /* failed */
+				return ret;
+			if (ret == 0) /* successful */
+				break;
+			/* else, go through */
+		}
+
 		new = read_blob_entry(ce, &size);
 		if (!new)
 			return error("git checkout-index: unable to read sha1 file of %s (%s)",
-- 
1.6.3.1.257.gbd13

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]