The newly added heuristic to parse less precision ISO-8601 conflicts with other heuristics to parse datetime-strings. E.g.: Thu, 7 Apr 2005 15:14:13 -0700 Let's limit the new heuristic to only datetime string with a 'T' followed immediately by some digits, and if we failed to parse the upcoming string, rollback the change. Signed-off-by: Đoàn Trần Công Danh <congdanhqx@xxxxxxxxx> --- Here is a better thought out change, which tried to minimize the impact of new heuristics. While I think it's a fixup, but I still needs explaination, I think I may reword it's as a full patch instead. Range-diff: 1: 4036e5a944 ! 1: b703425a57 fixup! date.c: allow ISO 8601 reduced precision times @@ Metadata Author: Đoàn Trần Công Danh <congdanhqx@xxxxxxxxx> ## Commit message ## - fixup! date.c: allow ISO 8601 reduced precision times + date.c: limit less precision ISO-8601 with its marker + + The newly added heuristic to parse less precision ISO-8601 conflicts + with other heuristics to parse datetime-strings. E.g.: + + Thu, 7 Apr 2005 15:14:13 -0700 + + Let's limit the new heuristic to only datetime string with a 'T' + followed immediately by some digits, and if we failed to parse the + upcoming string, rollback the change. Signed-off-by: Đoàn Trần Công Danh <congdanhqx@xxxxxxxxx> @@ date.c: static int match_alpha(const char *date, struct tm *tm, int *offset) } + /* ISO-8601 allows yyyymmDD'T'HHMMSS, with less precision */ -+ if (*date == 'T' && isdigit(date[1])) { -+ tm->tm_hour = tm->tm_min = tm->tm_sec = 0; -+ return strlen("T"); ++ if (*date == 'T' && isdigit(date[1]) && tm->tm_hour == -1) { ++ tm->tm_min = tm->tm_sec = 0; ++ return 1; + } + /* BAD CRAP */ @@ date.c: static inline int nodate(struct tm *tm) - * We just do a binary 'and' to see if the sign bit - * is set in all the values. + * Have we seen an ISO-8601-alike date, i.e. 20220101T0, -+ * In those special case, those fields have been set to 0 ++ * In which, hour is still unset, ++ * and minutes and second has been set to 0. */ -static inline int notime(struct tm *tm) +static inline int maybeiso8601(struct tm *tm) @@ date.c: static inline int nodate(struct tm *tm) - return (tm->tm_hour & - tm->tm_min & - tm->tm_sec) < 0; -+ return tm->tm_hour == 0 && ++ return tm->tm_hour == -1 && + tm->tm_min == 0 && + tm->tm_sec == 0; } /* @@ date.c: static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt - /* 4 digits, compact style of ISO-8601's time: HHMM */ - /* 2 digits, compact style of ISO-8601's time: HH */ - if (n == 8 || n == 6 || + + /* 8 digits, compact style of ISO-8601's date: YYYYmmDD */ + /* 6 digits, compact style of ISO-8601's time: HHMMSS */ +- /* 4 digits, compact style of ISO-8601's time: HHMM */ +- /* 2 digits, compact style of ISO-8601's time: HH */ +- if (n == 8 || n == 6 || - (!nodate(tm) && notime(tm) && -+ (!nodate(tm) && maybeiso8601(tm) && - (n == 4 || n == 2))) { +- (n == 4 || n == 2))) { ++ if (n == 8 || n == 6) { unsigned int num1 = num / 10000; unsigned int num2 = (num % 10000) / 100; + unsigned int num3 = num % 100; +@@ date.c: static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt + else if (n == 6 && set_time(num1, num2, num3, tm) == 0 && + *end == '.' && isdigit(end[1])) + strtoul(end + 1, &end, 10); +- else if (n == 4) +- set_time(num2, num3, 0, tm); +- else if (n == 2) +- set_time(num3, 0, 0, tm); + return end - date; + } + ++ /* reduced precision of ISO-8601's time: HHMM or HH */ ++ if (maybeiso8601(tm)) { ++ unsigned int num1 = num; ++ unsigned int num2 = 0; ++ if (n == 4) { ++ num1 = num / 100; ++ num2 = num % 100; ++ } ++ if ((n == 4 || n == 2) && !nodate(tm) && ++ set_time(num1, num2, 0, tm) == 0) ++ return n; ++ /* ++ * We thought this is an ISO-8601 time string, ++ * we set minutes and seconds to 0, ++ * turn out it isn't, rollback the change. ++ */ ++ tm->tm_min = tm->tm_sec = -1; ++ } ++ + /* Four-digit year or a timezone? */ + if (n == 4) { + if (num <= 1400 && *offset == -1) { ## t/t0006-date.sh ## @@ t/t0006-date.sh: check_parse '20080214T20:30' '2008-02-14 20:30:00 +0000' date.c | 49 +++++++++++++++++++++++++++++++++---------------- t/t0006-date.sh | 3 ++- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/date.c b/date.c index b011b9d6b3..6f45eeb356 100644 --- a/date.c +++ b/date.c @@ -493,6 +493,12 @@ static int match_alpha(const char *date, struct tm *tm, int *offset) return 2; } + /* ISO-8601 allows yyyymmDD'T'HHMMSS, with less precision */ + if (*date == 'T' && isdigit(date[1]) && tm->tm_hour == -1) { + tm->tm_min = tm->tm_sec = 0; + return 1; + } + /* BAD CRAP */ return skip_alpha(date); } @@ -639,15 +645,15 @@ static inline int nodate(struct tm *tm) } /* - * Have we filled in any part of the time yet? - * We just do a binary 'and' to see if the sign bit - * is set in all the values. + * Have we seen an ISO-8601-alike date, i.e. 20220101T0, + * In which, hour is still unset, + * and minutes and second has been set to 0. */ -static inline int notime(struct tm *tm) +static inline int maybeiso8601(struct tm *tm) { - return (tm->tm_hour & - tm->tm_min & - tm->tm_sec) < 0; + return tm->tm_hour == -1 && + tm->tm_min == 0 && + tm->tm_sec == 0; } /* @@ -701,11 +707,7 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt /* 8 digits, compact style of ISO-8601's date: YYYYmmDD */ /* 6 digits, compact style of ISO-8601's time: HHMMSS */ - /* 4 digits, compact style of ISO-8601's time: HHMM */ - /* 2 digits, compact style of ISO-8601's time: HH */ - if (n == 8 || n == 6 || - (!nodate(tm) && notime(tm) && - (n == 4 || n == 2))) { + if (n == 8 || n == 6) { unsigned int num1 = num / 10000; unsigned int num2 = (num % 10000) / 100; unsigned int num3 = num % 100; @@ -714,13 +716,28 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt else if (n == 6 && set_time(num1, num2, num3, tm) == 0 && *end == '.' && isdigit(end[1])) strtoul(end + 1, &end, 10); - else if (n == 4) - set_time(num2, num3, 0, tm); - else if (n == 2) - set_time(num3, 0, 0, tm); return end - date; } + /* reduced precision of ISO-8601's time: HHMM or HH */ + if (maybeiso8601(tm)) { + unsigned int num1 = num; + unsigned int num2 = 0; + if (n == 4) { + num1 = num / 100; + num2 = num % 100; + } + if ((n == 4 || n == 2) && !nodate(tm) && + set_time(num1, num2, 0, tm) == 0) + return n; + /* + * We thought this is an ISO-8601 time string, + * we set minutes and seconds to 0, + * turn out it isn't, rollback the change. + */ + tm->tm_min = tm->tm_sec = -1; + } + /* Four-digit year or a timezone? */ if (n == 4) { if (num <= 1400 && *offset == -1) { diff --git a/t/t0006-date.sh b/t/t0006-date.sh index 16fb0bf4bd..130207fc04 100755 --- a/t/t0006-date.sh +++ b/t/t0006-date.sh @@ -93,7 +93,8 @@ check_parse '20080214T20:30' '2008-02-14 20:30:00 +0000' check_parse '20080214T20' '2008-02-14 20:00:00 +0000' check_parse '20080214T203045' '2008-02-14 20:30:45 +0000' check_parse '20080214T2030' '2008-02-14 20:30:00 +0000' -check_parse '20080214T20' '2008-02-14 20:00:00 +0000' +check_parse '20080214T000000.20' '2008-02-14 00:00:00 +0000' +check_parse '20080214T00:00:00.20' '2008-02-14 00:00:00 +0000' check_parse '20080214T203045-04:00' '2008-02-14 20:30:45 -0400' check_parse '20080214T203045 -04:00' '2008-02-14 20:30:45 -0400' check_parse '20080214T203045.019-04:00' '2008-02-14 20:30:45 -0400' -- 2.39.0.287.g690a66fa66