Re: [PATCH] Re: Teach mailinfo to ignore everything before -- >8 -- mark

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Nicolas Sebrecht <nicolas.s.dev@xxxxxx> writes:

> ... But isn't the following mark a bit
> too much permissive?
>
> ->8

Yeah, I agree that we should require a bit longer perforation, and perhaps
we should tighten the rules a bit, while at the same time not limiting the
request to cut to the exact phrase "cut here".  As you pointed out, we do
not want to be too lenient to allow misidentification, but at the same
time it is nicer to be accomodating and treat something like this as a
scissors line:

    - - - >8 - - - remove everything above this line - - - >8 - - -

I think we have bikeshedded long enough, so I won't be touching this code
any further only to change the definition of what a scissors mark looks
like, but here is what I did during lunch break, with another comment
added later to hint what s_hdr_data[] stands for after reading response
from Don Zickus.

-- >8 --
From: Junio C Hamano <gitster@xxxxxxxxx>
Subject: [PATCH] Teach mailinfo to ignore everything before -- >8 -- mark

This teaches mailinfo the scissors -- >8 -- mark; the command ignores
everything before it in the message body.

For lefties among us, we also support -- 8< -- ;-)

Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
---
 builtin-mailinfo.c  |   71 ++++++++++++++++++++++++++++++++++++++++-
 t/t5100-mailinfo.sh |    2 +-
 t/t5100/info0014    |    5 +++
 t/t5100/msg0014     |    4 ++
 t/t5100/patch0014   |   64 ++++++++++++++++++++++++++++++++++++
 t/t5100/sample.mbox |   89 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 233 insertions(+), 2 deletions(-)
 create mode 100644 t/t5100/info0014
 create mode 100644 t/t5100/msg0014
 create mode 100644 t/t5100/patch0014

diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c
index b0b5d8f..7e09b51 100644
--- a/builtin-mailinfo.c
+++ b/builtin-mailinfo.c
@@ -712,6 +712,56 @@ static inline int patchbreak(const struct strbuf *line)
 	return 0;
 }
 
+static int is_scissors_line(const struct strbuf *line)
+{
+	size_t i, len = line->len;
+	int scissors = 0, gap = 0;
+	int first_nonblank = -1;
+	int last_nonblank = 0, visible, perforation, in_perforation = 0;
+	const char *buf = line->buf;
+
+	for (i = 0; i < len; i++) {
+		if (isspace(buf[i])) {
+			if (in_perforation) {
+				perforation++;
+				gap++;
+			}
+			continue;
+		}
+		last_nonblank = i;
+		if (first_nonblank < 0)
+			first_nonblank = i;
+		if (buf[i] == '-') {
+			in_perforation = 1;
+			perforation++;
+			continue;
+		}
+		if (i + 1 < len &&
+		    (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2))) {
+			in_perforation = 1;
+			perforation += 2;
+			scissors += 2;
+			i++;
+			continue;
+		}
+		in_perforation = 0;
+	}
+
+	/*
+	 * The mark must be at least 8 bytes long (e.g. "-- >8 --").
+	 * Even though there can be arbitrary cruft on the same line
+	 * (e.g. "cut here"), in order to avoid misidentification, the
+	 * perforation must occupy more than a third of the visible
+	 * width of the line, and dashes and scissors must occupy more
+	 * than half of the perforation.
+	 */
+
+	visible = last_nonblank - first_nonblank + 1;
+	return (scissors && 8 <= visible &&
+		visible < perforation * 3 &&
+		gap * 2 < perforation);
+}
+
 static int handle_commit_msg(struct strbuf *line)
 {
 	static int still_looking = 1;
@@ -723,7 +773,8 @@ static int handle_commit_msg(struct strbuf *line)
 		strbuf_ltrim(line);
 		if (!line->len)
 			return 0;
-		if ((still_looking = check_header(line, s_hdr_data, 0)) != 0)
+		still_looking = check_header(line, s_hdr_data, 0);
+		if (still_looking)
 			return 0;
 	}
 
@@ -731,6 +782,24 @@ static int handle_commit_msg(struct strbuf *line)
 	if (metainfo_charset)
 		convert_to_utf8(line, charset.buf);
 
+	if (is_scissors_line(line)) {
+		int i;
+		rewind(cmitmsg);
+		ftruncate(fileno(cmitmsg), 0);
+		still_looking = 1;
+
+		/*
+		 * We may have already read "secondary headers"; purge
+		 * them to give ourselves a clean restart.
+		 */
+		for (i = 0; header[i]; i++) {
+			if (s_hdr_data[i])
+				strbuf_release(s_hdr_data[i]);
+			s_hdr_data[i] = NULL;
+		}
+		return 0;
+	}
+
 	if (patchbreak(line)) {
 		fclose(cmitmsg);
 		cmitmsg = NULL;
diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh
index e70ea94..e848556 100755
--- a/t/t5100-mailinfo.sh
+++ b/t/t5100-mailinfo.sh
@@ -11,7 +11,7 @@ test_expect_success 'split sample box' \
 	'git mailsplit -o. "$TEST_DIRECTORY"/t5100/sample.mbox >last &&
 	last=`cat last` &&
 	echo total is $last &&
-	test `cat last` = 13'
+	test `cat last` = 14'
 
 for mail in `echo 00*`
 do
diff --git a/t/t5100/info0014 b/t/t5100/info0014
new file mode 100644
index 0000000..ab9c8d0
--- /dev/null
+++ b/t/t5100/info0014
@@ -0,0 +1,5 @@
+Author: Junio C Hamano
+Email: gitster@xxxxxxxxx
+Subject: Teach mailinfo to ignore everything before -- >8 -- mark
+Date: Thu, 20 Aug 2009 17:18:22 -0700
+
diff --git a/t/t5100/msg0014 b/t/t5100/msg0014
new file mode 100644
index 0000000..259c6a4
--- /dev/null
+++ b/t/t5100/msg0014
@@ -0,0 +1,4 @@
+This teaches mailinfo the scissors -- >8 -- mark; the command ignores
+everything before it in the message body.
+
+Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
diff --git a/t/t5100/patch0014 b/t/t5100/patch0014
new file mode 100644
index 0000000..124efd2
--- /dev/null
+++ b/t/t5100/patch0014
@@ -0,0 +1,64 @@
+---
+ builtin-mailinfo.c |   37 ++++++++++++++++++++++++++++++++++++-
+ 1 files changed, 36 insertions(+), 1 deletions(-)
+
+diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c
+index b0b5d8f..461c47e 100644
+--- a/builtin-mailinfo.c
++++ b/builtin-mailinfo.c
+@@ -712,6 +712,34 @@ static inline int patchbreak(const struct strbuf *line)
+ 	return 0;
+ }
+ 
++static int scissors(const struct strbuf *line)
++{
++	size_t i, len = line->len;
++	int scissors_dashes_seen = 0;
++	const char *buf = line->buf;
++
++	for (i = 0; i < len; i++) {
++		if (isspace(buf[i]))
++			continue;
++		if (buf[i] == '-') {
++			scissors_dashes_seen |= 02;
++			continue;
++		}
++		if (i + 1 < len && !memcmp(buf + i, ">8", 2)) {
++			scissors_dashes_seen |= 01;
++			i++;
++			continue;
++		}
++		if (i + 7 < len && !memcmp(buf + i, "cut here", 8)) {
++			i += 7;
++			continue;
++		}
++		/* everything else --- not scissors */
++		break;
++	}
++	return scissors_dashes_seen == 03;
++}
++
+ static int handle_commit_msg(struct strbuf *line)
+ {
+ 	static int still_looking = 1;
+@@ -723,10 +751,17 @@ static int handle_commit_msg(struct strbuf *line)
+ 		strbuf_ltrim(line);
+ 		if (!line->len)
+ 			return 0;
+-		if ((still_looking = check_header(line, s_hdr_data, 0)) != 0)
++		still_looking = check_header(line, s_hdr_data, 0);
++		if (still_looking)
+ 			return 0;
+ 	}
+ 
++	if (scissors(line)) {
++		fseek(cmitmsg, 0L, SEEK_SET);
++		still_looking = 1;
++		return 0;
++	}
++
+ 	/* normalize the log message to UTF-8. */
+ 	if (metainfo_charset)
+ 		convert_to_utf8(line, charset.buf);
+-- 
+1.6.4.1
diff --git a/t/t5100/sample.mbox b/t/t5100/sample.mbox
index c3074ac..13fa4ae 100644
--- a/t/t5100/sample.mbox
+++ b/t/t5100/sample.mbox
@@ -561,3 +561,92 @@ From: <a.u.thor@xxxxxxxxxxx> (A U Thor)
 Date: Fri, 9 Jun 2006 00:44:16 -0700
 Subject: [PATCH] a patch
 
+From nobody Mon Sep 17 00:00:00 2001
+From: Junio Hamano <junkio@xxxxxxx>
+Date: Thu, 20 Aug 2009 17:18:22 -0700
+Subject: Why doesn't git-am does not like >8 scissors mark?
+
+Subject: [PATCH] BLAH ONE
+
+In real life, we will see a discussion that inspired this patch
+discussing related and unrelated things around >8 scissors mark
+in this part of the message.
+
+Subject: [PATCH] BLAH TWO
+
+And then we will see the scissors.
+
+ This line is not a scissors mark -- >8 -- but talks about it.
+ - - >8 - - please remove everything above this line - - >8 - -
+
+Subject: [PATCH] Teach mailinfo to ignore everything before -- >8 -- mark
+From: Junio C Hamano <gitster@xxxxxxxxx>
+
+This teaches mailinfo the scissors -- >8 -- mark; the command ignores
+everything before it in the message body.
+
+Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
+---
+ builtin-mailinfo.c |   37 ++++++++++++++++++++++++++++++++++++-
+ 1 files changed, 36 insertions(+), 1 deletions(-)
+
+diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c
+index b0b5d8f..461c47e 100644
+--- a/builtin-mailinfo.c
++++ b/builtin-mailinfo.c
+@@ -712,6 +712,34 @@ static inline int patchbreak(const struct strbuf *line)
+ 	return 0;
+ }
+ 
++static int scissors(const struct strbuf *line)
++{
++	size_t i, len = line->len;
++	int scissors_dashes_seen = 0;
++	const char *buf = line->buf;
++
++	for (i = 0; i < len; i++) {
++		if (isspace(buf[i]))
++			continue;
++		if (buf[i] == '-') {
++			scissors_dashes_seen |= 02;
++			continue;
++		}
++		if (i + 1 < len && !memcmp(buf + i, ">8", 2)) {
++			scissors_dashes_seen |= 01;
++			i++;
++			continue;
++		}
++		if (i + 7 < len && !memcmp(buf + i, "cut here", 8)) {
++			i += 7;
++			continue;
++		}
++		/* everything else --- not scissors */
++		break;
++	}
++	return scissors_dashes_seen == 03;
++}
++
+ static int handle_commit_msg(struct strbuf *line)
+ {
+ 	static int still_looking = 1;
+@@ -723,10 +751,17 @@ static int handle_commit_msg(struct strbuf *line)
+ 		strbuf_ltrim(line);
+ 		if (!line->len)
+ 			return 0;
+-		if ((still_looking = check_header(line, s_hdr_data, 0)) != 0)
++		still_looking = check_header(line, s_hdr_data, 0);
++		if (still_looking)
+ 			return 0;
+ 	}
+ 
++	if (scissors(line)) {
++		fseek(cmitmsg, 0L, SEEK_SET);
++		still_looking = 1;
++		return 0;
++	}
++
+ 	/* normalize the log message to UTF-8. */
+ 	if (metainfo_charset)
+ 		convert_to_utf8(line, charset.buf);
+-- 
+1.6.4.1
-- 
1.6.4.1


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]