Re: [PATCH] date.c: limit less precision ISO-8601 with its marker

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jan 9, 2023 at 4:29 AM Đoàn Trần Công Danh <congdanhqx@xxxxxxxxx> wrote:
>
> The newly added heuristic to parse less precision ISO-8601 conflicts
> with other heuristics to parse datetime-strings. E.g.:
>
>         Thu, 7 Apr 2005 15:14:13 -0700
>
> Let's limit the new heuristic to only datetime string with a 'T'
> followed immediately by some digits, and if we failed to parse the
> upcoming string, rollback the change.
>
> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@xxxxxxxxx>
> ---
>
> Here is a better thought out change, which tried to minimize the impact of
> new heuristics.
>
> While I think it's a fixup, but I still needs explaination, I think I may
> reword it's as a full patch instead.
> Range-diff:
> 1:  4036e5a944 ! 1:  b703425a57 fixup! date.c: allow ISO 8601 reduced precision times
>     @@ Metadata
>      Author: Đoàn Trần Công Danh <congdanhqx@xxxxxxxxx>
>
>       ## Commit message ##
>     -    fixup! date.c: allow ISO 8601 reduced precision times
>     +    date.c: limit less precision ISO-8601 with its marker
>     +
>     +    The newly added heuristic to parse less precision ISO-8601 conflicts
>     +    with other heuristics to parse datetime-strings. E.g.:
>     +
>     +            Thu, 7 Apr 2005 15:14:13 -0700
>     +
>     +    Let's limit the new heuristic to only datetime string with a 'T'
>     +    followed immediately by some digits, and if we failed to parse the
>     +    upcoming string, rollback the change.
>
>          Signed-off-by: Đoàn Trần Công Danh <congdanhqx@xxxxxxxxx>
>
>     @@ date.c: static int match_alpha(const char *date, struct tm *tm, int *offset)
>         }
>
>      +  /* ISO-8601 allows yyyymmDD'T'HHMMSS, with less precision */
>     -+  if (*date == 'T' && isdigit(date[1])) {
>     -+          tm->tm_hour = tm->tm_min = tm->tm_sec = 0;
>     -+          return strlen("T");
>     ++  if (*date == 'T' && isdigit(date[1]) && tm->tm_hour == -1) {
>     ++          tm->tm_min = tm->tm_sec = 0;
>     ++          return 1;
>      +  }
>      +
>         /* BAD CRAP */
>     @@ date.c: static inline int nodate(struct tm *tm)
>      - * We just do a binary 'and' to see if the sign bit
>      - * is set in all the values.
>      + * Have we seen an ISO-8601-alike date, i.e. 20220101T0,
>     -+ * In those special case, those fields have been set to 0
>     ++ * In which, hour is still unset,
>     ++ * and minutes and second has been set to 0.
>        */
>      -static inline int notime(struct tm *tm)
>      +static inline int maybeiso8601(struct tm *tm)
>     @@ date.c: static inline int nodate(struct tm *tm)
>      -  return (tm->tm_hour &
>      -          tm->tm_min &
>      -          tm->tm_sec) < 0;
>     -+  return tm->tm_hour == 0 &&
>     ++  return tm->tm_hour == -1 &&
>      +          tm->tm_min == 0 &&
>      +          tm->tm_sec == 0;
>       }
>
>       /*
>      @@ date.c: static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
>     -   /* 4 digits, compact style of ISO-8601's time: HHMM */
>     -   /* 2 digits, compact style of ISO-8601's time: HH */
>     -   if (n == 8 || n == 6 ||
>     +
>     +   /* 8 digits, compact style of ISO-8601's date: YYYYmmDD */
>     +   /* 6 digits, compact style of ISO-8601's time: HHMMSS */
>     +-  /* 4 digits, compact style of ISO-8601's time: HHMM */
>     +-  /* 2 digits, compact style of ISO-8601's time: HH */
>     +-  if (n == 8 || n == 6 ||
>      -          (!nodate(tm) && notime(tm) &&
>     -+          (!nodate(tm) && maybeiso8601(tm) &&
>     -           (n == 4 || n == 2))) {
>     +-          (n == 4 || n == 2))) {
>     ++  if (n == 8 || n == 6) {
>                 unsigned int num1 = num / 10000;
>                 unsigned int num2 = (num % 10000) / 100;
>     +           unsigned int num3 = num % 100;
>     +@@ date.c: static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
>     +           else if (n == 6 && set_time(num1, num2, num3, tm) == 0 &&
>     +                    *end == '.' && isdigit(end[1]))
>     +                   strtoul(end + 1, &end, 10);
>     +-          else if (n == 4)
>     +-                  set_time(num2, num3, 0, tm);
>     +-          else if (n == 2)
>     +-                  set_time(num3, 0, 0, tm);
>     +           return end - date;
>     +   }
>     +
>     ++  /* reduced precision of ISO-8601's time: HHMM or HH */
>     ++  if (maybeiso8601(tm)) {
>     ++          unsigned int num1 = num;
>     ++          unsigned int num2 = 0;
>     ++          if (n == 4) {
>     ++                  num1 = num / 100;
>     ++                  num2 = num % 100;
>     ++          }
>     ++          if ((n == 4 || n == 2) && !nodate(tm) &&
>     ++              set_time(num1, num2, 0, tm) == 0)
>     ++                  return n;
>     ++          /*
>     ++           * We thought this is an ISO-8601 time string,
>     ++           * we set minutes and seconds to 0,
>     ++           * turn out it isn't, rollback the change.
>     ++           */
>     ++          tm->tm_min = tm->tm_sec = -1;
>     ++  }
>     ++
>     +   /* Four-digit year or a timezone? */
>     +   if (n == 4) {
>     +           if (num <= 1400 && *offset == -1) {
>
>       ## t/t0006-date.sh ##
>      @@ t/t0006-date.sh: check_parse '20080214T20:30' '2008-02-14 20:30:00 +0000'
>
>  date.c          | 49 +++++++++++++++++++++++++++++++++----------------
>  t/t0006-date.sh |  3 ++-
>  2 files changed, 35 insertions(+), 17 deletions(-)
>
> diff --git a/date.c b/date.c
> index b011b9d6b3..6f45eeb356 100644
> --- a/date.c
> +++ b/date.c
> @@ -493,6 +493,12 @@ static int match_alpha(const char *date, struct tm *tm, int *offset)
>                 return 2;
>         }
>
> +       /* ISO-8601 allows yyyymmDD'T'HHMMSS, with less precision */
> +       if (*date == 'T' && isdigit(date[1]) && tm->tm_hour == -1) {
> +               tm->tm_min = tm->tm_sec = 0;
> +               return 1;
> +       }
> +
>         /* BAD CRAP */
>         return skip_alpha(date);
>  }
> @@ -639,15 +645,15 @@ static inline int nodate(struct tm *tm)
>  }
>
>  /*
> - * Have we filled in any part of the time yet?
> - * We just do a binary 'and' to see if the sign bit
> - * is set in all the values.
> + * Have we seen an ISO-8601-alike date, i.e. 20220101T0,
> + * In which, hour is still unset,
> + * and minutes and second has been set to 0.
>   */
> -static inline int notime(struct tm *tm)
> +static inline int maybeiso8601(struct tm *tm)
>  {
> -       return (tm->tm_hour &
> -               tm->tm_min &
> -               tm->tm_sec) < 0;
> +       return tm->tm_hour == -1 &&
> +               tm->tm_min == 0 &&
> +               tm->tm_sec == 0;
>  }
>
>  /*
> @@ -701,11 +707,7 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
>
>         /* 8 digits, compact style of ISO-8601's date: YYYYmmDD */
>         /* 6 digits, compact style of ISO-8601's time: HHMMSS */
> -       /* 4 digits, compact style of ISO-8601's time: HHMM */
> -       /* 2 digits, compact style of ISO-8601's time: HH */
> -       if (n == 8 || n == 6 ||
> -               (!nodate(tm) && notime(tm) &&
> -               (n == 4 || n == 2))) {
> +       if (n == 8 || n == 6) {
>                 unsigned int num1 = num / 10000;
>                 unsigned int num2 = (num % 10000) / 100;
>                 unsigned int num3 = num % 100;
> @@ -714,13 +716,28 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
>                 else if (n == 6 && set_time(num1, num2, num3, tm) == 0 &&
>                          *end == '.' && isdigit(end[1]))
>                         strtoul(end + 1, &end, 10);
> -               else if (n == 4)
> -                       set_time(num2, num3, 0, tm);
> -               else if (n == 2)
> -                       set_time(num3, 0, 0, tm);
>                 return end - date;
>         }
>
> +       /* reduced precision of ISO-8601's time: HHMM or HH */
> +       if (maybeiso8601(tm)) {
> +               unsigned int num1 = num;
> +               unsigned int num2 = 0;
> +               if (n == 4) {
> +                       num1 = num / 100;
> +                       num2 = num % 100;
> +               }
> +               if ((n == 4 || n == 2) && !nodate(tm) &&
> +                   set_time(num1, num2, 0, tm) == 0)
> +                       return n;
> +               /*
> +                * We thought this is an ISO-8601 time string,
> +                * we set minutes and seconds to 0,
> +                * turn out it isn't, rollback the change.
> +                */
> +               tm->tm_min = tm->tm_sec = -1;
> +       }
> +
>         /* Four-digit year or a timezone? */
>         if (n == 4) {
>                 if (num <= 1400 && *offset == -1) {
> diff --git a/t/t0006-date.sh b/t/t0006-date.sh
> index 16fb0bf4bd..130207fc04 100755
> --- a/t/t0006-date.sh
> +++ b/t/t0006-date.sh
> @@ -93,7 +93,8 @@ check_parse '20080214T20:30' '2008-02-14 20:30:00 +0000'
>  check_parse '20080214T20' '2008-02-14 20:00:00 +0000'
>  check_parse '20080214T203045' '2008-02-14 20:30:45 +0000'
>  check_parse '20080214T2030' '2008-02-14 20:30:00 +0000'
> -check_parse '20080214T20' '2008-02-14 20:00:00 +0000'
> +check_parse '20080214T000000.20' '2008-02-14 00:00:00 +0000'
> +check_parse '20080214T00:00:00.20' '2008-02-14 00:00:00 +0000'
>  check_parse '20080214T203045-04:00' '2008-02-14 20:30:45 -0400'
>  check_parse '20080214T203045 -04:00' '2008-02-14 20:30:45 -0400'
>  check_parse '20080214T203045.019-04:00' '2008-02-14 20:30:45 -0400'
> --
> 2.39.0.287.g690a66fa66
>

Thanks, Đoàn.  LGTM, and much safer.




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux