[PATCH WIP 1/4] convert.c: refactor in order to skip conversion early without looking into file content

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



convert_to_{git,working_tree} require the entire blob content in
memory. This is impossible for large files (especially files that
cannot be mapped into memory at all). Those files won't likely be
converted.

This patch moves out some condition checks that does not require file
content, then large file-related routines can do early check to see if
it's possible to skip conversion. If not, follow the common routes.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 cache.h   |    2 +
 convert.c |   86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/cache.h b/cache.h
index b8503ad..f3fc822 100644
--- a/cache.h
+++ b/cache.h
@@ -933,6 +933,8 @@ extern void trace_argv_printf(const char **argv, const char *format, ...);
 extern int convert_to_git(const char *path, const char *src, size_t len,
                           struct strbuf *dst, enum safe_crlf checksafe);
 extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
+extern int convert_to_git_needed(const char *path, size_t len);
+extern int convert_to_working_tree_needed(const char *path, size_t len);
 
 /* add */
 /*
diff --git a/convert.c b/convert.c
index 1816e97..809c3e8 100644
--- a/convert.c
+++ b/convert.c
@@ -120,13 +120,18 @@ static void check_safe_crlf(const char *path, int action,
 	}
 }
 
+static int crlf_to_git_noneed(const char *path, size_t len, int action)
+{
+	return (action == CRLF_BINARY) || !auto_crlf || !len;
+}
+
 static int crlf_to_git(const char *path, const char *src, size_t len,
                        struct strbuf *buf, int action, enum safe_crlf checksafe)
 {
 	struct text_stat stats;
 	char *dst;
 
-	if ((action == CRLF_BINARY) || !auto_crlf || !len)
+	if (crlf_to_git_noneed(path, len, action))
 		return 0;
 
 	gather_stats(src, len, &stats);
@@ -179,17 +184,19 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
 	return 1;
 }
 
+static int crlf_to_worktree_noneed(const char *path, size_t len, int action)
+{
+	return 	(action == CRLF_BINARY) || (action == CRLF_INPUT) ||
+		auto_crlf <= 0 || !len;
+}
+
 static int crlf_to_worktree(const char *path, const char *src, size_t len,
                             struct strbuf *buf, int action)
 {
 	char *to_free = NULL;
 	struct text_stat stats;
 
-	if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
-	    auto_crlf <= 0)
-		return 0;
-
-	if (!len)
+	if (crlf_to_worktree_noneed(path, len, action))
 		return 0;
 
 	gather_stats(src, len, &stats);
@@ -271,6 +278,11 @@ static int filter_buffer(int fd, void *data)
 	return (write_err || status);
 }
 
+static int apply_filter_noneed(const char *path, const char *cmd)
+{
+	return cmd == NULL;
+}
+
 static int apply_filter(const char *path, const char *src, size_t len,
                         struct strbuf *dst, const char *cmd)
 {
@@ -285,7 +297,7 @@ static int apply_filter(const char *path, const char *src, size_t len,
 	struct async async;
 	struct filter_params params;
 
-	if (!cmd)
+	if (apply_filter_noneed(path, cmd))
 		return 0;
 
 	memset(&async, 0, sizeof(async));
@@ -428,12 +440,20 @@ static int count_ident(const char *cp, unsigned long size)
 	return cnt;
 }
 
+static int ident_conversion_noneed(const char *path, int ident)
+{
+	return !ident;
+}
+
 static int ident_to_git(const char *path, const char *src, size_t len,
                         struct strbuf *buf, int ident)
 {
 	char *dst, *dollar;
 
-	if (!ident || !count_ident(src, len))
+	if (ident_conversion_noneed(path, ident))
+		return 0;
+
+	if (!count_ident(src, len))
 		return 0;
 
 	/* only grow if not in place */
@@ -471,7 +491,7 @@ static int ident_to_worktree(const char *path, const char *src, size_t len,
 	char *to_free = NULL, *dollar;
 	int cnt;
 
-	if (!ident)
+	if (ident_conversion_noneed(path, ident))
 		return 0;
 
 	cnt = count_ident(src, len);
@@ -597,6 +617,28 @@ int convert_to_git(const char *path, const char *src, size_t len,
 	return ret | ident_to_git(path, src, len, dst, ident);
 }
 
+int convert_to_git_needed(const char *path, size_t len)
+{
+	struct git_attr_check check[3];
+	int crlf = CRLF_GUESS;
+	int ident = 0;
+	const char *filter = NULL;
+
+	setup_convert_check(check);
+	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+		struct convert_driver *drv;
+		crlf = git_path_check_crlf(path, check + 0);
+		ident = git_path_check_ident(path, check + 1);
+		drv = git_path_check_convert(path, check + 2);
+		if (drv && drv->clean)
+			filter = drv->clean;
+	}
+
+	return !apply_filter_noneed(path, filter) ||
+		!crlf_to_git_noneed(path, len, crlf) ||
+		!ident_conversion_noneed(path, ident);
+}
+
 int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
 {
 	struct git_attr_check check[3];
@@ -626,3 +668,29 @@ int convert_to_working_tree(const char *path, const char *src, size_t len, struc
 	}
 	return ret | apply_filter(path, src, len, dst, filter);
 }
+
+int convert_to_working_tree_needed(const char *path, size_t len)
+{
+	struct git_attr_check check[3];
+	int crlf = CRLF_GUESS;
+	int ident = 0;
+	const char *filter = NULL;
+
+	/*
+	 * any additional conversion should be added to
+	 * convert_to_working_tree_needed() as well
+	 */
+	setup_convert_check(check);
+	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+		struct convert_driver *drv;
+		crlf = git_path_check_crlf(path, check + 0);
+		ident = git_path_check_ident(path, check + 1);
+		drv = git_path_check_convert(path, check + 2);
+		if (drv && drv->smudge)
+			filter = drv->smudge;
+	}
+
+	return !ident_conversion_noneed(path, ident) ||
+		!crlf_to_worktree_noneed(path, len, crlf) ||
+		!apply_filter_noneed(path, filter);
+}
-- 
1.6.3.1.257.gbd13

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]