[PATCH v4 06/44] builtin-am: auto-detect mbox patches

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Since 15ced75 (git-am foreign patch support: autodetect some patch
formats, 2009-05-27), git-am.sh is able to autodetect mbox, stgit and
mercurial patches through heuristics.

Re-implement support for autodetecting mbox/maildir files in
builtin/am.c.

RFC 2822 requires that lines are terminated by "\r\n". To support this,
implement strbuf_getline_crlf(), which will remove both '\n' and "\r\n"
from the end of the line.

Helped-by: Junio C Hamano <gitster@xxxxxxxxx>
Helped-by: Eric Sunshine <sunshine@xxxxxxxxxxxxxx>
Helped-by: Johannes Schindelin <johannes.schindelin@xxxxxx>
Signed-off-by: Paul Tan <pyokagan@xxxxxxxxx>
---

Notes:
    v4
    
    * Using strbuf_trim() to remove the \r from CRLF lines is obviously
      wrong. Instead, implement strbuf_getline_crlf() to do it correctly for
      us.
    
    * Use a regex
    
    * Instead of re-opening the file again in is_email(), rewind the
      already-opened data stream.

 builtin/am.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/builtin/am.c b/builtin/am.c
index 136ccc6..31d85eb 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -10,6 +10,21 @@
 #include "dir.h"
 #include "run-command.h"
 
+/**
+ * Like strbuf_getline(), but treats both '\n' and "\r\n" as line terminators.
+ */
+static int strbuf_getline_crlf(struct strbuf *sb, FILE *fp)
+{
+	if (strbuf_getwholeline(sb, fp, '\n'))
+		return EOF;
+	if (sb->buf[sb->len - 1] == '\n') {
+		strbuf_setlen(sb, sb->len - 1);
+		if (sb->len > 0 && sb->buf[sb->len - 1] == '\r')
+			strbuf_setlen(sb, sb->len - 1);
+	}
+	return 0;
+}
+
 enum patch_format {
 	PATCH_FORMAT_UNKNOWN = 0,
 	PATCH_FORMAT_MBOX
@@ -131,6 +146,92 @@ static void am_destroy(const struct am_state *state)
 }
 
 /**
+ * Determines if the file looks like a piece of RFC2822 mail by grabbing all
+ * non-indented lines and checking if they look like they begin with valid
+ * header field names.
+ *
+ * Returns 1 if the file looks like a piece of mail, 0 otherwise.
+ */
+static int is_mail(FILE *fp)
+{
+	const char *header_regex = "^[!-9;-~]+:";
+	struct strbuf sb = STRBUF_INIT;
+	regex_t regex;
+	int ret = 1;
+
+	if (fseek(fp, 0L, SEEK_SET))
+		die_errno(_("fseek failed"));
+
+	if (regcomp(&regex, header_regex, REG_NOSUB | REG_EXTENDED))
+		die("invalid pattern: %s", header_regex);
+
+	while (!strbuf_getline_crlf(&sb, fp)) {
+		if (!sb.len)
+			break; /* End of header */
+
+		/* Ignore indented folded lines */
+		if (*sb.buf == '\t' || *sb.buf == ' ')
+			continue;
+
+		/* It's a header if it matches header_regex */
+		if (regexec(&regex, sb.buf, 0, NULL, 0)) {
+			ret = 0;
+			goto done;
+		}
+	}
+
+done:
+	regfree(&regex);
+	strbuf_release(&sb);
+	return ret;
+}
+
+/**
+ * Attempts to detect the patch_format of the patches contained in `paths`,
+ * returning the PATCH_FORMAT_* enum value. Returns PATCH_FORMAT_UNKNOWN if
+ * detection fails.
+ */
+static int detect_patch_format(const char **paths)
+{
+	enum patch_format ret = PATCH_FORMAT_UNKNOWN;
+	struct strbuf l1 = STRBUF_INIT;
+	FILE *fp;
+
+	/*
+	 * We default to mbox format if input is from stdin and for directories
+	 */
+	if (!*paths || !strcmp(*paths, "-") || is_directory(*paths))
+		return PATCH_FORMAT_MBOX;
+
+	/*
+	 * Otherwise, check the first few lines of the first patch, starting
+	 * from the first non-blank line, to try to detect its format.
+	 */
+
+	fp = xfopen(*paths, "r");
+
+	while (!strbuf_getline_crlf(&l1, fp)) {
+		if (l1.len)
+			break;
+	}
+
+	if (starts_with(l1.buf, "From ") || starts_with(l1.buf, "From: ")) {
+		ret = PATCH_FORMAT_MBOX;
+		goto done;
+	}
+
+	if (l1.len && is_mail(fp)) {
+		ret = PATCH_FORMAT_MBOX;
+		goto done;
+	}
+
+done:
+	fclose(fp);
+	strbuf_release(&l1);
+	return ret;
+}
+
+/**
  * Splits out individual email patches from `paths`, where each path is either
  * a mbox file or a Maildir. Returns 0 on success, -1 on failure.
  */
@@ -188,6 +289,14 @@ static int split_mail(struct am_state *state, enum patch_format patch_format,
 static void am_setup(struct am_state *state, enum patch_format patch_format,
 			const char **paths)
 {
+	if (!patch_format)
+		patch_format = detect_patch_format(paths);
+
+	if (!patch_format) {
+		fprintf_ln(stderr, _("Patch format detection failed."));
+		exit(128);
+	}
+
 	if (mkdir(state->dir, 0777) < 0 && errno != EEXIST)
 		die_errno(_("failed to create directory '%s'"), state->dir);
 
-- 
2.5.0.rc0.76.gb2c6e93

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]