Re: [PATCH v0 3/3] Bigfile: teach "git add" to send a large file straight to a pack

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, May 08, 2011 at 01:47:35AM -0700, Junio C Hamano wrote:
>  * The decision to stream in this patch is based on the size of the
>    blob, but it should be tied to an attribute, "bigdata". The
>    attribute should also mean the blob will not undergo any
>    convert-to-git processing.

Instead we could detect if convert-from/to-git is unnecessary and
stream big blobs based on core.bigFileThresold. The below patch may
help. I made it in order to checkout files without inflating in memory
but that part's still not ready.

By the way, is there any way we can mark an object in the pack as
"non-compressed" so we can read it without inflating? zlib compression
level == 0 still splits the object into multiple parts.

-- 8< --
Subject: [PATCH] convert: add functions to determine whether conversion is needed

Blob conversion from/to repository requires the entire blob in memory.
The conversion is rarely used most of the time and that requirement
could put more pressure on memory for large blobs.

Add two functions to determine early whether we can bypass conversion
without looking at the content.

Signed-off-by: Nguyán ThÃi Ngác Duy <pclouds@xxxxxxxxx>
---
 cache.h   |    3 ++
 convert.c |   79 +++++++++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/cache.h b/cache.h
index 08a9022..be3845d 100644
--- a/cache.h
+++ b/cache.h
@@ -1091,7 +1091,10 @@ extern void trace_repo_setup(const char *prefix);
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
                           struct strbuf *dst, enum safe_crlf checksafe);
+extern int convert_to_git_needed(const char *path, size_t len,
+				 enum safe_crlf checksafe);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
+extern int convert_to_working_tree_needed(const char *path, size_t len);
 extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
 
 /* add */
diff --git a/convert.c b/convert.c
index d5aebed..39545ed 100644
--- a/convert.c
+++ b/convert.c
@@ -188,7 +188,8 @@ static int has_cr_in_index(const char *path)
 }
 
 static int crlf_to_git(const char *path, const char *src, size_t len,
-		       struct strbuf *buf, enum action action, enum safe_crlf checksafe)
+		       struct strbuf *buf, enum action action,
+		       enum safe_crlf checksafe, int dry_run)
 {
 	struct text_stat stats;
 	char *dst;
@@ -197,6 +198,9 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 	    (action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	gather_stats(src, len, &stats);
 
 	if (action == CRLF_AUTO || action == CRLF_GUESS) {
@@ -257,7 +261,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 }
 
 static int crlf_to_worktree(const char *path, const char *src, size_t len,
-			    struct strbuf *buf, enum action action)
+			    struct strbuf *buf, enum action action, int dry_run)
 {
 	char *to_free = NULL;
 	struct text_stat stats;
@@ -265,6 +269,9 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
 	if (!len || determine_output_conversion(action) != EOL_CRLF)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	gather_stats(src, len, &stats);
 
 	/* No LF? Nothing to convert, regardless. */
@@ -374,7 +381,7 @@ static int filter_buffer(int in, int out, void *data)
 }
 
 static int apply_filter(const char *path, const char *src, size_t len,
-                        struct strbuf *dst, const char *cmd)
+			struct strbuf *dst, const char *cmd, int dry_run)
 {
 	/*
 	 * Create a pipeline to have the command filter the buffer's
@@ -390,6 +397,9 @@ static int apply_filter(const char *path, const char *src, size_t len,
 	if (!cmd)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	memset(&async, 0, sizeof(async));
 	async.proc = filter_buffer;
 	async.data = &params;
@@ -541,11 +551,17 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *path, const char *src, size_t len,
-                        struct strbuf *buf, int ident)
+			struct strbuf *buf, int ident, int dry_run)
 {
 	char *dst, *dollar;
 
-	if (!ident || !count_ident(src, len))
+	if (!ident)
+		return 0;
+
+	if (dry_run)
+		return 1;
+
+	if(!count_ident(src, len))
 		return 0;
 
 	/* only grow if not in place */
@@ -582,7 +598,7 @@ static int ident_to_git(const char *path, const char *src, size_t len,
 }
 
 static int ident_to_worktree(const char *path, const char *src, size_t len,
-                             struct strbuf *buf, int ident)
+			     struct strbuf *buf, int ident, int dry_run)
 {
 	unsigned char sha1[20];
 	char *to_free = NULL, *dollar, *spc;
@@ -591,6 +607,9 @@ static int ident_to_worktree(const char *path, const char *src, size_t len,
 	if (!ident)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	cnt = count_ident(src, len);
 	if (!cnt)
 		return 0;
@@ -726,8 +745,9 @@ static enum action determine_action(enum action text_attr, enum eol eol_attr)
 	return text_attr;
 }
 
-int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum safe_crlf checksafe)
+static int convert_to_git_1(const char *path, const char *src, size_t len,
+			    struct strbuf *dst, enum safe_crlf checksafe,
+			    int dry_run)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -748,23 +768,39 @@ int convert_to_git(const char *path, const char *src, size_t len,
 			filter = drv->clean;
 	}
 
-	ret |= apply_filter(path, src, len, dst, filter);
+	ret |= apply_filter(path, src, len, dst, filter, dry_run);
 	if (ret) {
+		if (dry_run)
+			return 1;
 		src = dst->buf;
 		len = dst->len;
 	}
 	action = determine_action(action, eol_attr);
-	ret |= crlf_to_git(path, src, len, dst, action, checksafe);
+	ret |= crlf_to_git(path, src, len, dst, action, checksafe, dry_run);
 	if (ret) {
+		if (dry_run)
+			return 1;
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | ident_to_git(path, src, len, dst, ident);
+	return ret | ident_to_git(path, src, len, dst, ident, dry_run);
+}
+
+int convert_to_git(const char *path, const char *src, size_t len,
+		   struct strbuf *dst, enum safe_crlf checksafe)
+{
+	return convert_to_git_1(path, src, len, dst, checksafe, 0);
+}
+
+int convert_to_git_needed(const char *path, size_t len,
+			  enum safe_crlf checksafe)
+{
+	return convert_to_git_1(path, NULL, len, NULL, checksafe, 1);
 }
 
 static int convert_to_working_tree_internal(const char *path, const char *src,
 					    size_t len, struct strbuf *dst,
-					    int normalizing)
+					    int normalizing, int dry_run)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -785,8 +821,10 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 			filter = drv->smudge;
 	}
 
-	ret |= ident_to_worktree(path, src, len, dst, ident);
+	ret |= ident_to_worktree(path, src, len, dst, ident, dry_run);
 	if (ret) {
+		if (dry_run)
+			return 1;
 		src = dst->buf;
 		len = dst->len;
 	}
@@ -796,23 +834,30 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 	 */
 	if (filter || !normalizing) {
 		action = determine_action(action, eol_attr);
-		ret |= crlf_to_worktree(path, src, len, dst, action);
+		ret |= crlf_to_worktree(path, src, len, dst, action, dry_run);
 		if (ret) {
+			if (dry_run)
+				return 1;
 			src = dst->buf;
 			len = dst->len;
 		}
 	}
-	return ret | apply_filter(path, src, len, dst, filter);
+	return ret | apply_filter(path, src, len, dst, filter, dry_run);
 }
 
 int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
-	return convert_to_working_tree_internal(path, src, len, dst, 0);
+	return convert_to_working_tree_internal(path, src, len, dst, 0, 0);
+}
+
+int convert_to_working_tree_needed(const char *path, size_t len)
+{
+	return convert_to_working_tree_internal(path, NULL, len, NULL, 0, 1);
 }
 
 int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
-	int ret = convert_to_working_tree_internal(path, src, len, dst, 1);
+	int ret = convert_to_working_tree_internal(path, src, len, dst, 1, 0);
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
-- 
1.7.4.74.g639db


-- 8< --

-- 
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]