Hi Paul, On 2015-06-18 13:25, Paul Tan wrote: > diff --git a/builtin/am.c b/builtin/am.c > index e9a3687..7b97ea8 100644 > --- a/builtin/am.c > +++ b/builtin/am.c > @@ -121,6 +121,96 @@ static void am_destroy(const struct am_state *state) > strbuf_release(&sb); > } > > +/* > + * Returns 1 if the file looks like a piece of email a-la RFC2822, 0 otherwise. > + * We check this by grabbing all the non-indented lines and seeing if they look > + * like they begin with valid header field names. > + */ > +static int is_email(const char *filename) > +{ > + struct strbuf sb = STRBUF_INIT; > + FILE *fp = xfopen(filename, "r"); > + int ret = 1; > + > + while (!strbuf_getline(&sb, fp, '\n')) { > + const char *x; > + > + strbuf_rtrim(&sb); > + > + if (!sb.len) > + break; /* End of header */ > + > + /* Ignore indented folded lines */ > + if (*sb.buf == '\t' || *sb.buf == ' ') > + continue; > + > + /* It's a header if it matches the regexp "^[!-9;-~]+:" */ Why not just compile a regex and use it here? We use regexes elsewhere anyway... > +/** > + * Attempts to detect the patch_format of the patches contained in `paths`, > + * returning the PATCH_FORMAT_* enum value. Returns PATCH_FORMAT_UNKNOWN if > + * detection fails. > + */ > +static int detect_patch_format(struct string_list *paths) > +{ > + enum patch_format ret = PATCH_FORMAT_UNKNOWN; > + struct strbuf l1 = STRBUF_INIT; > + struct strbuf l2 = STRBUF_INIT; > + struct strbuf l3 = STRBUF_INIT; > + FILE *fp; > + > + /* > + * We default to mbox format if input is from stdin and for directories > + */ > + if (!paths->nr || !strcmp(paths->items->string, "-") || > + is_directory(paths->items->string)) { > + ret = PATCH_FORMAT_MBOX; > + goto done; > + } > + > + /* > + * Otherwise, check the first 3 lines of the first patch, starting > + * from the first non-blank line, to try to detect its format. > + */ > + fp = xfopen(paths->items->string, "r"); > + while (!strbuf_getline(&l1, fp, '\n')) { > + strbuf_trim(&l1); > + if (l1.len) > + break; > + } > + strbuf_getline(&l2, fp, '\n'); We should test the return value of `strbuf_getline()`; if EOF was reached already, `strbuf_getwholeline()` does not touch the strbuf. I know, the strbuf is still initialized empty here, but it is too easy to forget when changing this code. > + strbuf_trim(&l2); > + strbuf_getline(&l3, fp, '\n'); > + strbuf_trim(&l3); > + fclose(fp); > + > + if (starts_with(l1.buf, "From ") || starts_with(l1.buf, "From: ")) > + ret = PATCH_FORMAT_MBOX; Hmm. We can test that earlier and return without reading from the file any further, I think. > + else if (l1.len && l2.len && l3.len && is_email(paths->items->string)) > + ret = PATCH_FORMAT_MBOX; Maybe we can do better than this by folding the `is_email() function into this here function, reusing the same strbuf to read the lines and keeping track of the email header lines we saw... I would really like to avoid opening the same file twice just to figure out whether it is in email format. The rest looks very nice! Dscho -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html