On Fri, Feb 10 2023, Masahiro Yamada wrote: > dowild() casts (char *) and (uchar *) back-and-forth, which is > ugly. > > This file was imported from rsync, which started to use (unsigned char) > since the following commit: > > | commit e11c42511903adc6d27cf1671cc76fa711ea37e5 > | Author: Wayne Davison <wayned@xxxxxxxxx> > | Date: Sun Jul 6 04:33:54 2003 +0000 > | > | - Added [:class:] handling to the character-class code. > | - Use explicit unsigned characters for proper set checks. > | - Made the character-class code honor backslash escapes. > | - Accept '^' as a class-negation character in addition to '!'. > > Perhaps, it was needed because rsync relies on is*() from <ctypes.h>. > > GIT has its own implementations, so the behavior is clear. > > In fact, commit 4546738b58a0 ("Unlocalized isspace and friends") > says one of the motivations is "we want the right signed behaviour". > > sane_istest() casts the given character to (unsigned char) anyway > before sane_ctype[] table lookup, so dowild() can use 'char'. > > Signed-off-by: Masahiro Yamada <masahiroy@xxxxxxxxxx> > --- > > wildmatch.c | 24 +++++++++++------------- > 1 file changed, 11 insertions(+), 13 deletions(-) > > diff --git a/wildmatch.c b/wildmatch.c > index 93800b8eac..7dffd783cb 100644 > --- a/wildmatch.c > +++ b/wildmatch.c > @@ -12,21 +12,19 @@ > #include "cache.h" > #include "wildmatch.h" > > -typedef unsigned char uchar; > - > #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \ > && *(class) == *(litmatch) \ > - && strncmp((char*)class, litmatch, len) == 0) > + && strncmp(class, litmatch, len) == 0) > > /* Match pattern "p" against "text" */ > -static int dowild(const uchar *p, const uchar *text, unsigned int flags) > +static int dowild(const char *p, const char *text, unsigned int flags) > { > - uchar p_ch; > - const uchar *pattern = p; > + char p_ch; > + const char *pattern = p; > > for ( ; (p_ch = *p) != '\0'; text++, p++) { > int matched, match_slash, negated; > - uchar t_ch, prev_ch; > + char t_ch, prev_ch; > if ((t_ch = *text) == '\0' && p_ch != '*') > return WM_ABORT_ALL; > if ((flags & WM_CASEFOLD) && isupper(t_ch)) > @@ -50,7 +48,7 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags) > continue; > case '*': > if (*++p == '*') { > - const uchar *prev_p = p - 2; > + const char *prev_p = p - 2; > while (*++p == '*') {} > if (!(flags & WM_PATHNAME)) > /* without WM_PATHNAME, '*' == '**' */ > @@ -90,10 +88,10 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags) > * with WM_PATHNAME matches the next > * directory > */ > - const char *slash = strchr((char*)text, '/'); > + const char *slash = strchr(text, '/'); > if (!slash) > return WM_NOMATCH; > - text = (const uchar*)slash; > + text = slash; > /* the slash is consumed by the top-level for loop */ > break; > } > @@ -160,13 +158,13 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags) > if (t_ch <= p_ch && t_ch >= prev_ch) > matched = 1; > else if ((flags & WM_CASEFOLD) && islower(t_ch)) { > - uchar t_ch_upper = toupper(t_ch); > + char t_ch_upper = toupper(t_ch); > if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch) > matched = 1; > } > p_ch = 0; /* This makes "prev_ch" get set to 0. */ > } else if (p_ch == '[' && p[1] == ':') { > - const uchar *s; > + const char *s; > int i; > for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/ > if (!p_ch) > @@ -237,5 +235,5 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags) > /* Match the "pattern" against the "text" string. */ > int wildmatch(const char *pattern, const char *text, unsigned int flags) > { > - return dowild((const uchar*)pattern, (const uchar*)text, flags); > + return dowild(pattern, text, flags); > } This looks good to me. I independently wrote much the same a while ago for another reason, in: https://github.com/avar/git/commit/079f555375a I.e. this happens to be the only bit in-tree that's stopping us from running the xlc compiler in the c99 mode. My solution was different, but I like yours better. I had not done your analysis to discover that we didn't need this to be unsigned in the first place, I merly converted the "uchar" to an "unsigned char".