Re: [PATCH v12 04/10] utf8: teach same_encoding() alternative UTF encoding names

Eric Sunshine <sunshine@xxxxxxxxxxxxx> · Thu, 15 Mar 2018 19:25:16 -0400

On Thu, Mar 15, 2018 at 6:57 PM,  <lars.schneider@xxxxxxxxxxxx> wrote:
> The function same_encoding() checked only for alternative UTF-8 encoding
> names. Teach it to check for all kinds of alternative UTF encoding
> names.
>
> Signed-off-by: Lars Schneider <larsxschneider@xxxxxxxxx>
> ---
> diff --git a/utf8.c b/utf8.c
> @@ -401,11 +401,27 @@ void strbuf_utf8_replace(struct strbuf *sb_src, int pos, int width,
> +static int same_utf_encoding(const char *src, const char *dst)
> +{
> +       if (istarts_with(src, "utf") && istarts_with(dst, "utf")) {
> +               /* src[3] or dst[3] might be '\0' */
> +               int i = (src[3] == '-' ? 4 : 3);
> +               int j = (dst[3] == '-' ? 4 : 3);
> +               return !strcasecmp(src+i, dst+j);
> +       }
> +       return 0;
> +}

Alternate implementation without magic numbers:

    if (iskip_prefix(src, "utf", &src) &&
        iskip_prefix(dst, "utf", &dst)) {
        if (*src == '-')
            src++;
        if (*dst == '-')
            dst++;
        return !strcasecmp(src, dst);
    }
    return 0;

... assuming you add an iskip_prefix() function (patterned after skip_prefix()).

>  int is_encoding_utf8(const char *name)
>  {
>         if (!name)
>                 return 1;
> -       if (!strcasecmp(name, "utf-8") || !strcasecmp(name, "utf8"))
> +       if (same_utf_encoding("utf-8", name))
>                 return 1;
>         return 0;
>  }
> @@ -414,6 +430,8 @@ int same_encoding(const char *src, const char *dst)
>  {
>         if (is_encoding_utf8(src) && is_encoding_utf8(dst))
>                 return 1;
> +       if (same_utf_encoding(src, dst))
> +               return 1;
>         return !strcasecmp(src, dst);
>  }

This seems odd. I would have expected the newly-added generalized
conditional to replace the original UTF-8-specific conditional, not
supplement it. That is, shouldn't the entire function body be:

    if (same_utf_encoding(src, dst))
        return 1;
    return !strcasecmp(src, dst);

?