Stefan Beller <sbeller@xxxxxxxxxx> writes: > +static int next_byte(const char **cp, const char **endp, > + const struct diff_options *diffopt) > +{ > + int retval; > + > + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) { > + while (*endp > *cp && isspace(**endp)) > + (*endp)--; > + } This should be done by the callers (both moved_entry_cmp() and get_string_hash()) before starting to iterate over the bytes from the beginning, no? > + if (*cp > *endp) > + return -1; > + > + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_CHANGE)) { > + while (*cp < *endp && isspace(**cp)) > + (*cp)++; > + /* > + * After skipping a couple of whitespaces, we still have to > + * account for one space. > + */ > + return (int)' '; > + } > + > + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE)) { > + while (*cp < *endp && isspace(**cp)) > + (*cp)++; > + /* return the first non-ws character via the usual below */ > + } > + > + retval = **cp; The char could be signed, and byte 0xff may become indistinguishable from the EOF (i.e. -1) you returned earlier. > + (*cp)++; /* advance */ > + return retval; > +} > + > +static int moved_entry_cmp(const struct moved_entry *a, > + const struct moved_entry *b, > + const void *keydata, > + const void *data) > +{ > + const struct diff_options *diffopt = data; > + const char *ap = a->es->line, *ae = a->es->line + a->es->len; > + const char *bp = b->es->line, *be = b->es->line + b->es->len; > + > + if (!(diffopt->xdl_opts & XDF_WHITESPACE_FLAGS)) > + return a->es->len != b->es->len || memcmp(ap, bp, a->es->len); > + > + while (1) { > + int ca, cb; > + ca = next_byte(&ap, &ae, diffopt); > + cb = next_byte(&bp, &be, diffopt); > + if (ca != cb) > + return 1; /* differs */ > + if (!ca) Shouldn't this check for "ca == -1", as we are not dealing with NUL terminated string but a <ptr, len> thing? > + return 0; > + }; > +} > + > +static unsigned get_string_hash(struct emitted_diff_symbol *es, struct diff_options *o) > +{ > + if (o->xdl_opts & XDF_WHITESPACE_FLAGS) { > + static struct strbuf sb = STRBUF_INIT; > + const char *ap = es->line, *ae = es->line + es->len; > + int c; > + > + strbuf_reset(&sb); > + while ((c = next_byte(&ap, &ae, o)) > 0) > + strbuf_addch(&sb, c); > + > + return memhash(sb.buf, sb.len); > + } else { > + return memhash(es->line, es->len); > + } > +}