This is just half-a-review (bottom half of the file). > +static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, > + int line1, int count1, int line2, int count2) > +{ What does this function return? > + struct histindex index; > + struct region lcs; > + int sz; > + int result = -1; > + > + if (count1 <= 0 && count2 <= 0) > + return 0; > + > + if (LINE_END(1) >= MAX_PTR) > + return -1; > + > + if (!count1) { > + while(count2--) > + env->xdf2.rchg[line2++ - 1] = 1; > + return 0; > + } else if (!count2) { > + while(count1--) > + env->xdf1.rchg[line1++ - 1] = 1; > + return 0; > + } > + > + memset(&index, 0, sizeof(index)); > + > + index.env = env; > + index.xpp = xpp; > + > + index.records = NULL; > + index.line_map = NULL; > + /* in case of early xdl_cha_free() */ > + index.rcha.head = NULL; > + > + index.table_bits = xdl_hashbits(count1); > + sz = index.records_size = 1 << index.table_bits; > + sz *= sizeof(struct record *); > + if (!(index.records = (struct record **) xdl_malloc(sz))) > + goto cleanup; > + memset(index.records, 0, sz); > + > + sz = index.line_map_size = count1; > + sz *= sizeof(struct record *); > + if (!(index.line_map = (struct record **) xdl_malloc(sz))) > + goto cleanup; > + memset(index.line_map, 0, sz); > + > + sz = index.line_map_size; > + sz *= sizeof(unsigned int); > + if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz))) > + goto cleanup; > + memset(index.next_ptrs, 0, sz); > + > + /* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */ > + if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0) > + goto cleanup; > + > + index.ptr_shift = line1; > + index.max_chain_length = 64; > + > + memset(&lcs, 0, sizeof(lcs)); > + if (find_lcs(&index, &lcs, line1, count1, line2, count2)) > + result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); > + else { > + result = 0; > + if (lcs.begin1 == 0 && lcs.begin2 == 0) { > + int ptr; > + for (ptr = 0; ptr < count1; ptr++) > + env->xdf1.rchg[line1 + ptr - 1] = 1; > + for (ptr = 0; ptr < count2; ptr++) > + env->xdf2.rchg[line2 + ptr - 1] = 1; > + } else { > + result = histogram_diff(xpp, env, > + line1, lcs.begin1 - line1, > + line2, lcs.begin2 - line2); > + result = histogram_diff(xpp, env, > + lcs.end1 + 1, LINE_END(1) - lcs.end1, > + lcs.end2 + 1, LINE_END(2) - lcs.end2); The result from the first half before lcs is discarded? > + result *= -1; Again, what does this function (called recursively) return, and what does flipping the sign of it do? > + } > + } > + > +cleanup: > + xdl_free(index.records); > + xdl_free(index.line_map); > + xdl_free(index.next_ptrs); > + xdl_cha_free(&index.rcha); > + > + return result; > +} > + > +int xdl_do_histogram_diff(mmfile_t *file1, mmfile_t *file2, > + xpparam_t const *xpp, xdfenv_t *env) > +{ > + int line1, line2, count1, count2; > + > + if (xdl_prepare_env(file1, file2, xpp, env) < 0) > + return -1; > + > + line1 = line2 = 1; > + count1 = env->xdf1.nrec; > + count2 = env->xdf2.nrec; > + > + reduce_common_start_end(xpp, env, &line1, &count1, &line2, &count2); What this does is logically not specific to histogram algorithm but can be applied to other backends, no? And I vaguely recall that Linus did try something like this once, found some issues with it when context is set to non zero, and stopped doing it (sorry, I do not have any more details). I am not suggesting you to remove this call or hoist the call to one level up to xdl_do_diff(), but I do have to wonder how much of the performance improvement you reported is due to this common head/tail reduction. > + return histogram_diff(xpp, env, line1, count1, line2, count2); > +} -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html