When going from a commit to its parents, we map the "interesting" range of lines according to the change made. For non-merge commit, we just run map_range on the ranges, which works as follows: 1. Run diffcore_std to find out the pre/postimage for each file. 2. Run xdi_diff_hunks on each interesting set of pre/postimages. 3. The map_range_cb callback is invoked for each hunk by the diff engine, and we use it to calculate the pre-image range from the post-image range in the function map_lines. For merge commits, we run map_range once for every parent. Simultaneously we use a take_range pass to eliminate all ranges that are identical. If any ranges remain after that, then the merge is considered non-trivial. The algorithm that maps lines from post-image to pre-image is in the function map_lines. Generally, we use simple line number calculation method to do the map. Signed-off-by: Bo Yang <struggleyb.nku@xxxxxxxxx> --- line.c | 502 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ revision.h | 5 +- 2 files changed, 506 insertions(+), 1 deletions(-) diff --git a/line.c b/line.c index 1b77172..ae52832 100644 --- a/line.c +++ b/line.c @@ -504,3 +504,505 @@ void setup_line(struct rev_info *rev, struct diff_line_range *r) diff_tree_release_paths(opt); } +struct take_range_cb_data { + struct diff_line_range *interesting; /* currently interesting ranges */ + struct diff_line_range *range; + /* the ranges corresponds to the interesting ranges of parent commit */ + long plno, tlno; + /* the last line number of diff hunk */ + int diff; + /* whether there is some line changes between the current + * commit and its parent */ +}; + +#define SCALE_FACTOR 4 +/* + * [p_start, p_end] represents the pre-image of current diff hunk, + * [t_start, t_end] represents the post-image of the current diff hunk, + * [start, end] represents the currently interesting line range in + * post-image, + * [o_start, o_end] represents the original line range that coresponds + * to current line range. + */ +void map_lines(long p_start, long p_end, long t_start, long t_end, + long start, long end, long *o_start, long *o_end) +{ + /* + * Normally, p_start should be less than p_end, so does the + * t_start and t_end. But when the line range is added from + * scratch, p_start will be greater than p_end. When the line + * range is deleted, t_start will be greater than t_end. + */ + if (p_start > p_end) { + *o_start = *o_end = 0; + return; + } + /* A deletion */ + if (t_start > t_end) { + *o_start = p_start; + *o_end = p_end; + return; + } + + if (start == t_start && end == t_end) { + *o_start = p_start; + *o_end = p_end; + return; + } + + /* + * A heuristic for lines mapping: + * + * When the pre-image is no more than 1/SCALE_FACTOR of the post-image, + * there is no effective way to find out which part of pre-image + * corresponds to the currently interesting range of post-image. + * And we are in the danger of tracking totally useless lines. + * So, we just treat all the post-image lines as added from scratch. + */ + if (SCALE_FACTOR * (p_end - p_start + 1) < (t_end - t_start + 1)) { + *o_start = *o_end = 0; + return; + } + + *o_start = p_start + start - t_start; + *o_end = p_end - (t_end - end); + + if (*o_start > *o_end) { + int temp = *o_start; + *o_start = *o_end; + *o_end = temp; + } + + if (*o_start < p_start) + *o_start = p_start; + if (*o_end > p_end) + *o_end = p_end; +} + +/* + * When same == 1: + * [p_start, p_end] represents the diff hunk line range of pre-image, + * [t_start, t_end] represents the diff hunk line range of post-image. + * When same == 0, they represent a range of identical lines between + * two images. + * + * This function find out the corresponding line ranges of currently + * interesting ranges which this diff hunk touches. + */ +static void map_range(struct take_range_cb_data *data, int same, + long p_start, long p_end, long t_start, long t_end) +{ + struct line_range *ranges = data->interesting->ranges; + long takens, takene, start, end; + int i = 0, out = 0, added = 0; + long op_start = p_start, op_end = p_end, ot_start = t_start, ot_end = t_end; + + for (; i < data->interesting->nr; i++) { + added = 0; + if (t_start > ranges[i].end) + continue; + if (t_end < ranges[i].start) + break; + + if (t_start > ranges[i].start) { + start = t_start; + takens = p_start; + if (t_end >= ranges[i].end) { + end = ranges[i].end; + takene = p_start + end - t_start; + } else { + end = t_end; + takene = p_end; + out = 1; + } + } else { + start = ranges[i].start; + takens = p_start + start - t_start; + if (t_end >= ranges[i].end) { + end = ranges[i].end; + takene = p_start + end - t_start; + } else { + end = t_end; + takene = p_end; + out = 1; + } + } + + if (!same) { + struct print_pair *pair = &ranges[i].pair; + struct print_range *rr = NULL; + PRINT_PAIR_GROW(pair); + rr = pair->ranges + pair->nr - 1; + PRINT_RANGE_INIT(rr); + rr->start = start; + rr->end = end; + map_lines(op_start, op_end, ot_start, ot_end, start, end, + &takens, &takene); + if (takens == 0 && takene == 0) { + added = 1; + rr->line_added = 1; + } + rr->pstart = takens; + rr->pend = takene; + data->diff = 1; + data->interesting->diff = 1; + ranges[i].diff = 1; + } + if (added) { + /* Code movement/copy detect here, now place two dummy statements here */ + int dummy = 0; + dummy = 1; + } else { + struct line_range *added_range = diff_line_range_insert(data->range, + NULL, takens, takene); + assert(added_range); + ranges[i].pstart = added_range->start; + ranges[i].pend = added_range->end; + } + + t_start = end + 1; + p_start = takene + 1; + + if (out) + break; + } +} + +/* + * [p_start, p_end] represents the line range of pre-image, + * [t_start, t_end] represents the line range of post-image, + * and they are identical lines. + * + * This function substracts out the identical lines between current + * commit and its parent, from currently interesting ranges. + */ +static void take_range(struct take_range_cb_data *data, + long p_start, long p_end, long t_start, long t_end) +{ + struct line_range *ranges = data->interesting->ranges; + long takens, takene, start, end; + int i = 0, out = 0, added = 0; + + for (; i < data->interesting->nr; i++) { + added = 0; + if (t_start > ranges[i].end) + continue; + if (t_end < ranges[i].start) + break; + + if (t_start > ranges[i].start) { + long tmp = ranges[i].end; + ranges[i].end = t_start - 1; + start = t_start; + takens = p_start; + if (t_end >= tmp) { + end = tmp; + takene = p_start + end - t_start; + p_start = takene + 1; + t_start = end + 1; + } else { + end = t_end; + takene = p_end; + diff_line_range_insert(data->interesting, NULL, + t_end + 1, tmp); + out = 1; + } + } else { + start = ranges[i].start; + takens = p_start + start - t_start; + if (t_end >= ranges[i].end) { + int num = data->interesting->nr - 1; + end = ranges[i].end; + takene = p_start + end - t_start; + t_start = end + 1; + p_start = takene + 1; + memmove(ranges + i, ranges + i + 1, (num - i) * sizeof(*ranges)); + data->interesting->nr = num; + i--; + } else { + end = t_end; + takene = p_end; + ranges[i].start = t_end + 1; + out = 1; + } + } + + diff_line_range_insert(data->range, NULL, takens, takene); + + if (out) + break; + } +} + +static void take_range_cb(void *data, long same, long p_next, long t_next) +{ + struct take_range_cb_data *d = data; + long p_start = d->plno + 1, t_start = d->tlno + 1; + long p_end = p_start + same - t_start, t_end = same; + + /* If one file is added from scratch, we should not bother to call + * take_range, since there is nothing to take + */ + if (t_end >= t_start) + take_range(d, p_start, p_end, t_start, t_end); + d->plno = p_next; + d->tlno = t_next; +} + +static void map_range_cb(void *data, long same, long p_next, long t_next) +{ + struct take_range_cb_data *d = data; + + long p_start = d->plno + 1; + long t_start = d->tlno + 1; + long p_end = same - t_start + p_start; + long t_end = same; + + /* Firstly, take the unchanged lines from child */ + if (t_end >= t_start) + map_range(d, 1, p_start, p_end, t_start, t_end); + + /* find out which lines to print */ + t_start = same + 1; + p_start = d->plno + t_start - d->tlno; + map_range(d, 0, p_start, p_next, t_start, t_next); + + d->plno = p_next; + d->tlno = t_next; +} + +/* + * We support two kinds of operation in this function: + * 1. map == 0, take the same lines from the current commit and assign it + * to parent; + * 2. map == 1, in addition to the same lines, we also map the changed lines + * from the current commit to the parent according to the + * diff output. + * take_range_cb and take_range are used to take same lines from current commit + * to parents. + * map_range_cb and map_range are used to map line ranges to the parent. + */ +static void assign_range_to_parent(struct rev_info *rev, struct commit *c, + struct commit *p, struct diff_line_range *r, + struct diff_options *opt, int map) +{ + struct diff_line_range *rr = xmalloc(sizeof(*rr)); + struct diff_line_range *cr = rr, *prev_r = rr; + struct diff_line_range *rg = NULL; + struct tree_desc desc1, desc2; + void *tree1 = NULL, *tree2 = NULL; + unsigned long size1, size2; + struct diff_queue_struct *queue; + struct take_range_cb_data cb = {NULL, cr, 0, 0}; + xpparam_t xpp; + xdemitconf_t xecfg; + int i, diff = 0; + xdiff_emit_hunk_consume_fn fn = map ? map_range_cb : take_range_cb; + + DIFF_LINE_RANGE_INIT(cr); + memset(&xpp, 0, sizeof(xpp)); + memset(&xecfg, 0, sizeof(xecfg)); + xecfg.ctxlen = xecfg.interhunkctxlen = 0; + + /* + * Compose up two trees, for root commit, we make up a empty tree. + */ + assert(c); + tree2 = read_object_with_reference(c->tree->object.sha1, "tree", + &size2, NULL); + if (tree2 == NULL) + die("Unable to read tree (%s)", sha1_to_hex(c->tree->object.sha1)); + init_tree_desc(&desc2, tree2, size2); + if (p) { + tree1 = read_object_with_reference(p->tree->object.sha1, + "tree", &size1, NULL); + if (tree1 == NULL) + die("Unable to read tree (%s)", + sha1_to_hex(p->tree->object.sha1)); + init_tree_desc(&desc1, tree1, size1); + } else { + init_tree_desc(&desc1, "", 0); + } + + DIFF_QUEUE_CLEAR(&diff_queued_diff); + diff_tree(&desc1, &desc2, "", opt); + diffcore_std(opt); + + queue = &diff_queued_diff; + for (i = 0; i < queue->nr; i++) { + struct diff_filepair *pair = queue->queue[i]; + struct diff_line_range *rg = r; + mmfile_t file_p, file_t; + assert(pair->two->path); + while (rg) { + assert(rg->spec->path); + if (!strcmp(rg->spec->path, pair->two->path)) + break; + rg = rg->next; + } + + if (rg == NULL) + continue; + rg->touch = 1; + if (rg->nr == 0) + continue; + + rg->status = pair->status; + assert(pair->two->sha1_valid); + diff_populate_filespec(pair->two, 0); + file_t.ptr = pair->two->data; + file_t.size = pair->two->size; + + if (rg->prev) + free_filespec(rg->prev); + rg->prev = pair->one; + rg->prev->count++; + if (pair->one->sha1_valid) { + diff_populate_filespec(pair->one, 0); + file_p.ptr = pair->one->data; + file_p.size = pair->one->size; + } else { + file_p.ptr = ""; + file_p.size = 0; + } + + if (cr->nr != 0) { + struct diff_line_range *tmp = xmalloc(sizeof(*tmp)); + cr->next = tmp; + prev_r = cr; + cr = tmp; + } else if (cr->spec) + DIFF_LINE_RANGE_CLEAR(cr); + + DIFF_LINE_RANGE_INIT(cr); + if (pair->one->sha1_valid) { + cr->spec = pair->one; + cr->spec->count++; + } + + cb.interesting = rg; + cb.range = cr; + cb.diff = 0; + cb.plno = cb.tlno = 0; + xdi_diff_hunks(&file_p, &file_t, fn, &cb, &xpp, &xecfg); + if (cb.diff) + diff = 1; + /* + * The remain part is the same part. + * Instead of calculating the true line number of the two files, + * use the biggest integer. + */ + if (map) + map_range(&cb, 1, cb.plno + 1, INT_MAX, cb.tlno + 1, 0x7FFFFFFF); + else + take_range(&cb, cb.plno + 1, INT_MAX, cb.tlno + 1, 0x7FFFFFFF); + } + opt->output_format = DIFF_FORMAT_NO_OUTPUT; + diff_flush(opt); + + /* + * Collect the untouch ranges, this comes from the files not changed + * between two commit. + */ + rg = r; + while (rg) { + /* clear the touch one to make it usable in next round */ + if (rg->touch) { + rg->touch = 0; + } else { + struct diff_line_range *untouch = diff_line_range_clone(rg); + if (prev_r == rr && rr->nr == 0) { + rr = prev_r = untouch; + } else { + prev_r->next = untouch; + prev_r = untouch; + } + } + rg = rg->next; + } + + if (cr->nr == 0) { + DIFF_LINE_RANGE_CLEAR(cr); + free(cr); + if (prev_r == cr) + rr = NULL; + else + prev_r->next = NULL; + } + + if (rr) { + assert(p); + add_line_range(rev, p, rr); + } + + /* and the ranges of current commit c is updated */ + c->object.flags &= ~RANGE_UPDATE; + if (diff) + c->object.flags |= NEED_PRINT; + + if (tree1) + free(tree1); + if (tree2) + free(tree2); +} + +static void diff_update_parent_range(struct rev_info *rev, + struct commit *commit) +{ + struct diff_line_range *r = lookup_line_range(rev, commit); + struct commit_list *parents = commit->parents; + struct commit *c = NULL; + if (parents) { + assert(parents->next == NULL); + c = parents->item; + } + + assign_range_to_parent(rev, commit, c, r, &rev->diffopt, 1); +} + +static void assign_parents_range(struct rev_info *rev, struct commit *commit) +{ + struct commit_list *parents = commit->parents; + struct diff_line_range *r = lookup_line_range(rev, commit); + struct diff_line_range *evil = NULL, *range = NULL; + int nontrivial = 0; + + /* + * If we are in linear history, update range and flush the patch if + * necessary + */ + if (parents == NULL || parents->next == NULL) + return diff_update_parent_range(rev, commit); + + /* + * Loop on the parents and assign the ranges to different + * parents, if there is any range left, this commit must + * be an evil merge. + */ + evil = diff_line_range_clone_deeply(r); + parents = commit->parents; + while (parents) { + struct commit *p = parents->item; + assign_range_to_parent(rev, commit, p, r, &rev->diffopt, 1); + assign_range_to_parent(rev, commit, p, evil, &rev->diffopt, 0); + parents = parents->next; + } + + /* + * yes, this must be an evil merge. + */ + range = evil; + while (range) { + if (range->nr) { + commit->object.flags |= NEED_PRINT | EVIL_MERGE; + nontrivial = 1; + } + range = range->next; + } + + if (nontrivial) + add_decoration(&rev->nontrivial_merge, &commit->object, evil); + else + cleanup(evil); +} + diff --git a/revision.h b/revision.h index c0d5065..2627ec4 100644 --- a/revision.h +++ b/revision.h @@ -15,7 +15,9 @@ #define ADDED (1u<<7) /* Parents already parsed and added? */ #define SYMMETRIC_LEFT (1u<<8) #define RANGE_UPDATE (1u<<9) /* for line level traverse */ -#define ALL_REV_FLAGS ((1u<<10)-1) +#define NEED_PRINT (1u<<10) +#define EVIL_MERGE (1u<<11) +#define ALL_REV_FLAGS ((1u<<12)-1) #define DECORATE_SHORT_REFS 1 #define DECORATE_FULL_REFS 2 @@ -141,6 +143,7 @@ struct rev_info { int count_right; /* line level range that we are chasing */ struct decoration line_range; + struct decoration nontrivial_merge; }; #define REV_TREE_SAME 0 -- 1.7.2.19.g79e5d -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html