[RFC PATCH] convert: add functions to check if we can bypass conversion

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Blob conversion from/to repository requires the entire blob in memory.
The conversion is rarely used most of the time and that requirement
could put pressure on memory for large blobs.

Add two functions to determine early whether we can bypass conversion
without looking at the content.

Signed-off-by: Nguyán ThÃi Ngác Duy <pclouds@xxxxxxxxx>
---
 I have patches to checkout loose objects directly to working tree but
 they are ugly and do not support packs. We probably should flag deltified
 objects as "conversion required" too.

 Anyway I think the intention of this patch is good. Whatever we are
 going to do with large blobs wrt memory usage, we need to cut this
 part out, or support streaming conversion interface. I doubt the
 latter would come.

 cache.h   |    3 ++
 convert.c |   79 +++++++++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/cache.h b/cache.h
index 08a9022..be3845d 100644
--- a/cache.h
+++ b/cache.h
@@ -1091,7 +1091,10 @@ extern void trace_repo_setup(const char *prefix);
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
                           struct strbuf *dst, enum safe_crlf checksafe);
+extern int convert_to_git_needed(const char *path, size_t len,
+				 enum safe_crlf checksafe);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
+extern int convert_to_working_tree_needed(const char *path, size_t len);
 extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst);
 
 /* add */
diff --git a/convert.c b/convert.c
index d5aebed..39545ed 100644
--- a/convert.c
+++ b/convert.c
@@ -188,7 +188,8 @@ static int has_cr_in_index(const char *path)
 }
 
 static int crlf_to_git(const char *path, const char *src, size_t len,
-		       struct strbuf *buf, enum action action, enum safe_crlf checksafe)
+		       struct strbuf *buf, enum action action,
+		       enum safe_crlf checksafe, int dry_run)
 {
 	struct text_stat stats;
 	char *dst;
@@ -197,6 +198,9 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 	    (action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	gather_stats(src, len, &stats);
 
 	if (action == CRLF_AUTO || action == CRLF_GUESS) {
@@ -257,7 +261,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 }
 
 static int crlf_to_worktree(const char *path, const char *src, size_t len,
-			    struct strbuf *buf, enum action action)
+			    struct strbuf *buf, enum action action, int dry_run)
 {
 	char *to_free = NULL;
 	struct text_stat stats;
@@ -265,6 +269,9 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
 	if (!len || determine_output_conversion(action) != EOL_CRLF)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	gather_stats(src, len, &stats);
 
 	/* No LF? Nothing to convert, regardless. */
@@ -374,7 +381,7 @@ static int filter_buffer(int in, int out, void *data)
 }
 
 static int apply_filter(const char *path, const char *src, size_t len,
-                        struct strbuf *dst, const char *cmd)
+			struct strbuf *dst, const char *cmd, int dry_run)
 {
 	/*
 	 * Create a pipeline to have the command filter the buffer's
@@ -390,6 +397,9 @@ static int apply_filter(const char *path, const char *src, size_t len,
 	if (!cmd)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	memset(&async, 0, sizeof(async));
 	async.proc = filter_buffer;
 	async.data = &params;
@@ -541,11 +551,17 @@ static int count_ident(const char *cp, unsigned long size)
 }
 
 static int ident_to_git(const char *path, const char *src, size_t len,
-                        struct strbuf *buf, int ident)
+			struct strbuf *buf, int ident, int dry_run)
 {
 	char *dst, *dollar;
 
-	if (!ident || !count_ident(src, len))
+	if (!ident)
+		return 0;
+
+	if (dry_run)
+		return 1;
+
+	if(!count_ident(src, len))
 		return 0;
 
 	/* only grow if not in place */
@@ -582,7 +598,7 @@ static int ident_to_git(const char *path, const char *src, size_t len,
 }
 
 static int ident_to_worktree(const char *path, const char *src, size_t len,
-                             struct strbuf *buf, int ident)
+			     struct strbuf *buf, int ident, int dry_run)
 {
 	unsigned char sha1[20];
 	char *to_free = NULL, *dollar, *spc;
@@ -591,6 +607,9 @@ static int ident_to_worktree(const char *path, const char *src, size_t len,
 	if (!ident)
 		return 0;
 
+	if (dry_run)
+		return 1;
+
 	cnt = count_ident(src, len);
 	if (!cnt)
 		return 0;
@@ -726,8 +745,9 @@ static enum action determine_action(enum action text_attr, enum eol eol_attr)
 	return text_attr;
 }
 
-int convert_to_git(const char *path, const char *src, size_t len,
-                   struct strbuf *dst, enum safe_crlf checksafe)
+static int convert_to_git_1(const char *path, const char *src, size_t len,
+			    struct strbuf *dst, enum safe_crlf checksafe,
+			    int dry_run)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -748,23 +768,39 @@ int convert_to_git(const char *path, const char *src, size_t len,
 			filter = drv->clean;
 	}
 
-	ret |= apply_filter(path, src, len, dst, filter);
+	ret |= apply_filter(path, src, len, dst, filter, dry_run);
 	if (ret) {
+		if (dry_run)
+			return 1;
 		src = dst->buf;
 		len = dst->len;
 	}
 	action = determine_action(action, eol_attr);
-	ret |= crlf_to_git(path, src, len, dst, action, checksafe);
+	ret |= crlf_to_git(path, src, len, dst, action, checksafe, dry_run);
 	if (ret) {
+		if (dry_run)
+			return 1;
 		src = dst->buf;
 		len = dst->len;
 	}
-	return ret | ident_to_git(path, src, len, dst, ident);
+	return ret | ident_to_git(path, src, len, dst, ident, dry_run);
+}
+
+int convert_to_git(const char *path, const char *src, size_t len,
+		   struct strbuf *dst, enum safe_crlf checksafe)
+{
+	return convert_to_git_1(path, src, len, dst, checksafe, 0);
+}
+
+int convert_to_git_needed(const char *path, size_t len,
+			  enum safe_crlf checksafe)
+{
+	return convert_to_git_1(path, NULL, len, NULL, checksafe, 1);
 }
 
 static int convert_to_working_tree_internal(const char *path, const char *src,
 					    size_t len, struct strbuf *dst,
-					    int normalizing)
+					    int normalizing, int dry_run)
 {
 	struct git_attr_check check[5];
 	enum action action = CRLF_GUESS;
@@ -785,8 +821,10 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 			filter = drv->smudge;
 	}
 
-	ret |= ident_to_worktree(path, src, len, dst, ident);
+	ret |= ident_to_worktree(path, src, len, dst, ident, dry_run);
 	if (ret) {
+		if (dry_run)
+			return 1;
 		src = dst->buf;
 		len = dst->len;
 	}
@@ -796,23 +834,30 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
 	 */
 	if (filter || !normalizing) {
 		action = determine_action(action, eol_attr);
-		ret |= crlf_to_worktree(path, src, len, dst, action);
+		ret |= crlf_to_worktree(path, src, len, dst, action, dry_run);
 		if (ret) {
+			if (dry_run)
+				return 1;
 			src = dst->buf;
 			len = dst->len;
 		}
 	}
-	return ret | apply_filter(path, src, len, dst, filter);
+	return ret | apply_filter(path, src, len, dst, filter, dry_run);
 }
 
 int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
-	return convert_to_working_tree_internal(path, src, len, dst, 0);
+	return convert_to_working_tree_internal(path, src, len, dst, 0, 0);
+}
+
+int convert_to_working_tree_needed(const char *path, size_t len)
+{
+	return convert_to_working_tree_internal(path, NULL, len, NULL, 0, 1);
 }
 
 int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
-	int ret = convert_to_working_tree_internal(path, src, len, dst, 1);
+	int ret = convert_to_working_tree_internal(path, src, len, dst, 1, 0);
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
-- 
1.7.4.74.g639db

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]