[PATCH 2/2] fast-import: validate entire ident string

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The author, committer, and tagger name and email should not include
any embedded <, >, or newline characters.  The format of the
identification string is

  ('author'|'committer'|'tagger') sp name sp < email > sp date

If an object has no name attached, then git expects to find two spaces
in a row.

Helped-by: Mark Lodato <lodatom@xxxxxxxxx>
Signed-off-by: Jonathan Nieder <jrnieder@xxxxxxxxx>
---
For malformed input, the parser in pretty.c and ‘git commit --amend’
tend to end up with different ideas of who the author is.  A lot of
the time, commit --amend gives up with "fatal: invalid commit".

 Documentation/git-fast-import.txt |    9 ++--
 fast-import.c                     |   54 ++++++++++++++++----------
 t/t9300-fast-import.sh            |   75 +++++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 25 deletions(-)

diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 19082b0..ee725c6 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -337,8 +337,8 @@ change to the project.
 ....
 	'commit' SP <ref> LF
 	mark?
-	('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
-	'committer' (SP <name>)? SP LT <email> GT SP <when> LF
+	('author' SP <name>? SP LT <email> GT SP <when> LF)?
+	'committer' SP <name>? SP LT <email> GT SP <when> LF
 	data
 	('from' SP <committish> LF)?
 	('merge' SP <committish> LF)?
@@ -393,8 +393,9 @@ Here `<name>` is the person's display name (for example
 (``cm@xxxxxxxxxxx'').  `LT` and `GT` are the literal less-than (\x3c)
 and greater-than (\x3e) symbols.  These are required to delimit
 the email address from the other fields in the line.  Note that
-`<name>` is free-form and may contain any sequence of bytes, except
-`LT` and `LF`.  It is typically UTF-8 encoded.
+`<name>` and `<email>` are free-form and may contain any sequence
+of bytes that are not `LT`, `GT`, or `LF`.  Both are typically UTF-8
+encoded.
 
 The time of the change is specified by `<when>` using the date format
 that was selected by the \--date-format=<fmt> command line option.
diff --git a/fast-import.c b/fast-import.c
index 1701cf1..d919168 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -19,8 +19,8 @@ Format of STDIN stream:
 
   new_commit ::= 'commit' sp ref_str lf
     mark?
-    ('author' (sp name)? sp '<' email '>' sp when lf)?
-    'committer' (sp name)? sp '<' email '>' sp when lf
+    ('author' sp name? sp '<' email '>' sp when lf)?
+    'committer' sp name? sp '<' email '>' sp when lf
     commit_msg
     ('from' sp committish lf)?
     ('merge' sp committish lf)*
@@ -47,7 +47,7 @@ Format of STDIN stream:
 
   new_tag ::= 'tag' sp tag_str lf
     'from' sp committish lf
-    ('tagger' (sp name)? sp '<' email '>' sp when lf)?
+    ('tagger' sp name? sp '<' email '>' sp when lf)?
     tag_msg;
   tag_msg ::= data;
 
@@ -123,9 +123,8 @@ Format of STDIN stream:
   sha1exp ::= # Any valid GIT SHA1 expression;
   hexsha1 ::= # SHA1 in hexadecimal format;
 
-     # note: name and email are UTF8 strings, however name must not
-     # contain '<' or lf and email must not contain any of the
-     # following: '<', '>', lf.
+     # note: name and email are UTF8 strings, however name and email
+     # must not contain any of the following: '<', '>', lf.
      #
   name  ::= # valid GIT author/committer name;
   email ::= # valid GIT author/committer email;
@@ -1929,34 +1928,47 @@ static int validate_raw_date(const char *src, char *result, int maxlen)
 	return 0;
 }
 
-static char *parse_ident(const char *buf)
+static size_t parse_name_and_email(const char *src, char **result, size_t extra)
 {
-	const char *gt;
+	const char *lt, *gt;
 	size_t name_len;
-	char *ident;
 
-	gt = strrchr(buf, '>');
-	if (!gt)
-		die("Missing > in ident string: %s", buf);
+	lt = src + strcspn(src, "<>\n");
+	if (lt == src || lt[-1] != ' ' || *lt != '<')
+		die("Invalid name in ident string: %s", src);
+	gt = lt + 1 + strcspn(lt + 1, "<>\n");
+	if (*gt != '>')
+		die("Invalid email in ident string: %s", src);
 	gt++;
 	if (*gt != ' ')
-		die("Missing space after > in ident string: %s", buf);
+		die("Missing space after > in ident string: %s", src);
 	gt++;
-	name_len = gt - buf;
-	ident = xmalloc(name_len + 24);
-	strncpy(ident, buf, name_len);
+	name_len = gt - src;
+	*result = xmalloc(name_len + extra);
+	memcpy(*result, src, name_len);
+	return name_len;
+}
+
+static char *parse_ident(const char *buf)
+{
+	const char *date;
+	size_t name_len;
+	char *ident;
+
+	name_len = parse_name_and_email(buf, &ident, 24);
+	date = buf + name_len;
 
 	switch (whenspec) {
 	case WHENSPEC_RAW:
-		if (validate_raw_date(gt, ident + name_len, 24) < 0)
-			die("Invalid raw date \"%s\" in ident: %s", gt, buf);
+		if (validate_raw_date(date, ident + name_len, 24) < 0)
+			die("Invalid raw date \"%s\" in ident: %s", date, buf);
 		break;
 	case WHENSPEC_RFC2822:
-		if (parse_date(gt, ident + name_len, 24) < 0)
-			die("Invalid rfc2822 date \"%s\" in ident: %s", gt, buf);
+		if (parse_date(date, ident + name_len, 24) < 0)
+			die("Invalid rfc2822 date \"%s\" in ident: %s", date, buf);
 		break;
 	case WHENSPEC_NOW:
-		if (strcmp("now", gt))
+		if (strcmp("now", date))
 			die("Date in ident must be 'now': %s", buf);
 		datestamp(ident + name_len, 24);
 		break;
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index ed653a7..a7e379f 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -348,6 +348,81 @@ test_expect_success \
 
 cat >input <<INPUT_END
 commit refs/heads/branch
+author <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 14:49:52 -0500
+committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Nameless author, first attempt
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: require space after author name' '
+    test_must_fail git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author  <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 14:49:52 -0500
+committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Nameless author
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: do not require author name, though' '
+    git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 14:49:52 -0500
+committer C O >Mitter <$GIT_COMMITTER_EMAIL> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Odd committer
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: unparsable committer' '
+    test_must_fail git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 15:05:27 -0500
+committer $GIT_COMMITTER_NAME <aggh@<example.com> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Odd email
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: unparsable email' '
+    test_must_fail git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
+author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> Sat, 24 Apr 2010 15:05:27 -0500
+committer $GIT_COMMITTER_NAME <äggh!some!other!machine!example> Tue Feb 6 12:35:01 2007 -0500
+data <<COMMIT
+Bang path
+COMMIT
+
+from refs/heads/branch^0
+
+INPUT_END
+test_expect_success 'E: okay email' '
+    git fast-import --date-format=rfc2822 <input
+'
+
+cat >input <<INPUT_END
+commit refs/heads/branch
 author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> 1170783301 -  0500
 committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> 117078330 -0500
 data <<COMMIT
-- 
1.7.1.rc1

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]