Johannes Schindelin <Johannes.Schindelin@xxxxxx> writes: > I am rather interested in the semantics, i.e. if you can punch holes into > this 3-class approach. This is not the 3-class thing, but was done as a lunchtime hack. It removes more lines than it adds, with real comments ;-). * It removes the custom allocator used for minus/plus buffers and replaces it with the bog-standard strbuf; * The tokenization is done when diff_words_append() is called, i.e. when we read the original "added or deleted _lines_"; * The tokenization function is separated out, and gets the emit_callback, so anybody can enhance it with customization using gitattributes and other heuristics. More importantly, it is not byte oriented and would be easier to extend it to UTF-8 contents; * It does not have to play "suppressed_newline" games anymore. A LF is just a token. I haven't tested this at all (this is a lunchtime hack) and have a mild suspicion that it may have corner case miscounting (e.g. I blindly subtracts 3 from len when dealing with a line that represents a single token from the internal diff output --- do I always have 3 there even when the original file ends with an incomplete line? I didn't check), but other than that I think this is a lot easier to read and follow. --- diff.c | 216 +++++++++++++++++++++++++++++++-------------------------------- 1 files changed, 106 insertions(+), 110 deletions(-) diff --git a/diff.c b/diff.c index e35384b..344aaa6 100644 --- a/diff.c +++ b/diff.c @@ -351,87 +351,119 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) return 0; } -struct diff_words_buffer { - mmfile_t text; - long alloc; - long current; /* output pointer */ - int suppressed_newline; +typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len); + +struct emit_callback { + struct xdiff_emit_state xm; + int nparents, color_diff; + unsigned ws_rule; + sane_truncate_fn truncate; + const char **label_path; + struct diff_words_data *diff_words; + int *found_changesp; + FILE *file; }; -static void diff_words_append(char *line, unsigned long len, - struct diff_words_buffer *buffer) +static size_t diff_words_tokenize(struct emit_callback *ecbdata, + char *line, unsigned long len) { - if (buffer->text.size + len > buffer->alloc) { - buffer->alloc = (buffer->text.size + len) * 3 / 2; - buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc); + /* + * This function currently is deliberately done very stupid, + * but passing ecbdata here means that you can potentially + * implement different tokenization rules depending on + * the content (e.g. "gitattributes(5)"). + */ + int is_space; + char *line0 = line; + + if (!len) + return 0; + + is_space = isspace(*line); + while (len && (isspace(*line) == is_space)) { + line++; + len--; } + return line - line0; +} + +static void diff_words_append(struct emit_callback *ecbdata, + char *line, unsigned long len, + struct strbuf *text) +{ + /* Skip leading +/- first. */ line++; len--; - memcpy(buffer->text.ptr + buffer->text.size, line, len); - buffer->text.size += len; + + /* + * Tokenize and stuff the words in. + */ + while (len) { + size_t token_len = diff_words_tokenize(ecbdata, line, len); + + if (line[0] != '\n') { + /* + * A nonempty token has ' ' stuffed in front, + * so that we can recover the original + * end-of-line easily. Stupid, but works. + */ + strbuf_add(text, " ", 1); + strbuf_add(text, line, token_len); + strbuf_add(text, "\n", 1); + len -= token_len; + line += token_len; + } else { + /* A real LF */ + strbuf_add(text, "\n", 1); + break; + } + } } struct diff_words_data { struct xdiff_emit_state xm; - struct diff_words_buffer minus, plus; + struct strbuf minus; + struct strbuf plus; FILE *file; }; -static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color, - int suppress_newline) +static void emit_line(FILE *file, const char *set, const char *reset, const char *line, int len) { - const char *ptr; - int eol = 0; - - if (len == 0) - return; - - ptr = buffer->text.ptr + buffer->current; - buffer->current += len; - - if (ptr[len - 1] == '\n') { - eol = 1; - len--; - } - - fputs(diff_get_color(1, color), file); - fwrite(ptr, len, 1, file); - fputs(diff_get_color(1, DIFF_RESET), file); - - if (eol) { - if (suppress_newline) - buffer->suppressed_newline = 1; - else - putc('\n', file); - } + fputs(set, file); + fwrite(line, len, 1, file); + fputs(reset, file); } static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len) { struct diff_words_data *diff_words = priv; + const char *set; + const char *reset = diff_colors[DIFF_RESET]; - if (diff_words->minus.suppressed_newline) { - if (line[0] != '+') - putc('\n', diff_words->file); - diff_words->minus.suppressed_newline = 0; + switch (line[0]) { + case '-': + set = diff_colors[DIFF_FILE_OLD]; + break; + case '+': + set = diff_colors[DIFF_FILE_NEW]; + break; + case ' ': + set = diff_colors[DIFF_PLAIN]; + break; + default: + return; /* omit @@ -j,k +l,m @@ header */ } - len--; - switch (line[0]) { - case '-': - print_word(diff_words->file, - &diff_words->minus, len, DIFF_FILE_OLD, 1); - break; - case '+': - print_word(diff_words->file, - &diff_words->plus, len, DIFF_FILE_NEW, 0); - break; - case ' ': - print_word(diff_words->file, - &diff_words->plus, len, DIFF_PLAIN, 0); - diff_words->minus.current += len; - break; + if (line[1] == ' ') { + /* A token */ + line += 2; + len -= 3; /* drop the trailing LF */ + } else { + /* A real LF */ + line++; + len--; } + emit_line(diff_words->file, set, reset, line, len); } /* this executes the word diff on the accumulated buffers */ @@ -441,27 +473,18 @@ static void diff_words_show(struct diff_words_data *diff_words) xdemitconf_t xecfg; xdemitcb_t ecb; mmfile_t minus, plus; - int i; + unsigned long sz; memset(&xecfg, 0, sizeof(xecfg)); - minus.size = diff_words->minus.text.size; - minus.ptr = xmalloc(minus.size); - memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size); - for (i = 0; i < minus.size; i++) - if (isspace(minus.ptr[i])) - minus.ptr[i] = '\n'; - diff_words->minus.current = 0; - - plus.size = diff_words->plus.text.size; - plus.ptr = xmalloc(plus.size); - memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size); - for (i = 0; i < plus.size; i++) - if (isspace(plus.ptr[i])) - plus.ptr[i] = '\n'; - diff_words->plus.current = 0; + + minus.ptr = strbuf_detach(&diff_words->minus, &sz); + minus.size = sz; + plus.ptr = strbuf_detach(&diff_words->plus, &sz); + plus.size = sz; xpp.flags = XDF_NEED_MINIMAL; - xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc; + /* hack to make it a single hunk to show all */ + xecfg.ctxlen = minus.size + plus.size; ecb.outf = xdiff_outf; ecb.priv = diff_words; diff_words->xm.consume = fn_out_diff_words_aux; @@ -469,37 +492,15 @@ static void diff_words_show(struct diff_words_data *diff_words) free(minus.ptr); free(plus.ptr); - diff_words->minus.text.size = diff_words->plus.text.size = 0; - - if (diff_words->minus.suppressed_newline) { - putc('\n', diff_words->file); - diff_words->minus.suppressed_newline = 0; - } } -typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len); - -struct emit_callback { - struct xdiff_emit_state xm; - int nparents, color_diff; - unsigned ws_rule; - sane_truncate_fn truncate; - const char **label_path; - struct diff_words_data *diff_words; - int *found_changesp; - FILE *file; -}; - static void free_diff_words_data(struct emit_callback *ecbdata) { if (ecbdata->diff_words) { /* flush buffers */ - if (ecbdata->diff_words->minus.text.size || - ecbdata->diff_words->plus.text.size) + if (ecbdata->diff_words->minus.len || + ecbdata->diff_words->plus.len) diff_words_show(ecbdata->diff_words); - - free (ecbdata->diff_words->minus.text.ptr); - free (ecbdata->diff_words->plus.text.ptr); free(ecbdata->diff_words); ecbdata->diff_words = NULL; } @@ -512,13 +513,6 @@ const char *diff_get_color(int diff_use_color, enum color_diff ix) return ""; } -static void emit_line(FILE *file, const char *set, const char *reset, const char *line, int len) -{ - fputs(set, file); - fwrite(line, len, 1, file); - fputs(reset, file); -} - static void emit_add_line(const char *reset, struct emit_callback *ecbdata, const char *line, int len) { const char *ws = diff_get_color(ecbdata->color_diff, DIFF_WHITESPACE); @@ -604,16 +598,16 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) free_diff_words_data(ecbdata); if (ecbdata->diff_words) { if (line[0] == '-') { - diff_words_append(line, len, + diff_words_append(ecbdata, line, len, &ecbdata->diff_words->minus); return; } else if (line[0] == '+') { - diff_words_append(line, len, + diff_words_append(ecbdata, line, len, &ecbdata->diff_words->plus); return; } - if (ecbdata->diff_words->minus.text.size || - ecbdata->diff_words->plus.text.size) + if (ecbdata->diff_words->minus.len || + ecbdata->diff_words->plus.len) diff_words_show(ecbdata->diff_words); line++; len--; @@ -1470,6 +1464,8 @@ static void builtin_diff(const char *name_a, if (DIFF_OPT_TST(o, COLOR_DIFF_WORDS)) { ecbdata.diff_words = xcalloc(1, sizeof(struct diff_words_data)); + strbuf_init(&ecbdata.diff_words->minus, 0); + strbuf_init(&ecbdata.diff_words->plus, 0); ecbdata.diff_words->file = o->file; } xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb); -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html