Re: [PATCH v3 03/13] add the basic data structure for line level history

Junio C Hamano <gitster@xxxxxxxxx> · Mon, 12 Jul 2010 09:50:18 -0700

Bo Yang <struggleyb.nku@xxxxxxxxx> writes:

> Subject: [PATCH v3 revised 03/13] add the basic data structure for line level history
>
> 'struct diff_line_range' is the main data structure to store
> the user interesting line range. There is one 'diff_line_range'

-ECANTPARSE. Perhaps "to keep track of the line ranges the user is
interested in"?

Is it the end user, or the code, that is interested in the range recorded
in this structure?  If you are adjusting the line range while traversing
the history, then it is more of the latter; i.e. "the user originally
started digging from this range, but after examining the diff that affect
that range by this commit, we found that the range corresponds to this
widened/narrowed range in the history leading to the commit, so we will
update the range and use it from now on until we hit another commit that
affects this range".  In that case "to keep track of the line ranges we
are currently interested in" would be more appropriate than "the ranges
the user is interested in".

> for each file, and there are multiple 'struct range' in each
> 'diff_line_range'. In this way, we support multiple ranges.
>
> Within 'struct range', there are multiple 'struct print_range'
> which represent a diff chunk.

We call them "hunks" not "chunks", I think.

> diff --git a/diffcore.h b/diffcore.h
> index 491bea0..13d8e93 100644
> --- a/diffcore.h
> +++ b/diffcore.h
> @@ -23,6 +23,7 @@
>  #define MINIMUM_BREAK_SIZE     400 /* do not break a file smaller than this */
>
>  struct userdiff_driver;
> +struct diff_options;

Why???

>  struct diff_filespec {
>  	unsigned char sha1[20];

> diff --git a/line.c b/line.c
> new file mode 100644
> index 0000000..2f82fd8
> --- /dev/null
> +++ b/line.c
> @@ -0,0 +1,456 @@
> ...
> +
> +static struct object *verify_commit(struct rev_info *revs)
> +{
> +	struct object *commit = NULL;
> +	const char *name = NULL;

Why not use "int found = -1" instead and point at the commit in the
pending.objects[] array you found with that variable?  Or use a variable
of the type of a pointer to an element in the pending.objects[] array,
initialized to NULL.

That way, you do not have to tell stupid compilers that name is set once
commit is set to avoid warnings, which is the only reason you initialize
name to NULL.

> +	int i;
> +
> +	for (i = 0; i < revs->pending.nr; i++) {
> +		struct object *obj = revs->pending.objects[i].item;
> +		if (obj->flags & UNINTERESTING)
> +			continue;
> +		while (obj->type == OBJ_TAG)
> +			obj = deref_tag(obj, NULL, 0);
> +		if (obj->type != OBJ_COMMIT)
> +			die("Non commit %s?", revs->pending.objects[i].name);
> +		if (commit)
> +			die("More than one commit to dig from: %s and %s?",
> +			    revs->pending.objects[i].name, name);
> +		commit = obj;
> +		name = revs->pending.objects[i].name;
> +	}
> +
> +	if (commit == NULL)
> +		die("No commit specified?");
> +
> +	return commit;
> +}
> +
> +static void fill_blob_sha1(struct commit *commit, struct diff_line_range *r)
> +{
> +	unsigned mode;
> +	unsigned char sha1[20];
> +
> +	while (r) {
> +		if (get_tree_entry(commit->object.sha1, r->spec->path,
> +			sha1, &mode))
> +			goto error;
> +		fill_filespec(r->spec, sha1, mode);
> +		r = r->next;
> +	}
> +
> +	return;
> +error:
> +	die("There is no path %s in the commit", r->spec->path);
> +}

These two look vaguely familiar...  Cut and paste without refactoring?

> +static void fill_line_ends(struct diff_filespec *spec, long *lines,
> +	unsigned long **line_ends)
> +{
> +	int num = 0, size = 50;
> +	long cur = 0;
> +	unsigned long *ends = NULL;
> +	char *data = NULL;
> +
> +	if (diff_populate_filespec(spec, 0))
> +		die("Cannot read blob %s", sha1_to_hex(spec->sha1));
> +
> +	ends = xmalloc(size * sizeof(*ends));
> +	ends[cur++] = -1;

Wasn't this an array of unsigned longs?

> +static const char *nth_line(struct diff_filespec *spec, int line,
> +		long lines, unsigned long *line_ends)
> +{
> +	assert(line < lines);
> +	assert(spec && spec->data);

Why aren't "line" and "lines" of the same type?

> +static void parse_range(long lines, unsigned long *line_ends,
> +		struct range *r, struct diff_filespec *spec)
> +{
> +	const char *term;
> +
> +	term = parse_loc(r->arg, spec, lines, line_ends, 1, &r->start);
> +	if (*term == ',') {
> +		term = parse_loc(term + 1, spec, lines, line_ends,
> +			r->start + 1, &r->end);
> +		if (*term) {
> +			die("-L parameter's argument should be <start>,<end>");
> +		}

Excess {} around a single statement.

> +struct range *diff_line_range_insert(struct diff_line_range *r, const
> char *arg,
> +				int start, int end)
> +{
> +	int i = 0;
> +	struct range *rs = r->ranges;
> +	int left_extend = 0, right_extend = 0;
> +
> +	assert(r != NULL);
> +	assert(start <= end);
> +
> +	if (r->nr == 0 || rs[r->nr - 1].end < start - 1) {
> +		DIFF_LINE_RANGE_GROW(r);
> +		rs = r->ranges;
> +		int num = r->nr - 1;

decl-after-statement

> ...
> +out:
> +	assert(r->nr != i);
> +	if (left_extend) {

Sounds more like a "merge" than "extend" to me...

> diff --git a/line.h b/line.h
> new file mode 100644
> index 0000000..a2f8545
> --- /dev/null
> +++ b/line.h
> @@ -0,0 +1,122 @@
> ...
> +#define PRINT_RANGE_INIT(r) \
> +	do { \
> +		(r)->line_added = 0; \
> +	} while(0);

	} while (0)

- No semicolon after a macro (the user will give it to us).
- A SP after "while".

Does this even have to be "do { ... } while (0)"???

> +
> +#define PRINT_PAIR_INIT(p) \
> +	do { \
> +		(p)->alloc = (p)->nr = 0; \
> +		(p)->ranges = NULL; \
> +	} while(0);
> +
> +#define PRINT_PAIR_GROW(p) \
> +	do { \
> +		(p)->nr ++; \

	(p)->nr++;

> +		ALLOC_GROW((p)->ranges, (p)->nr, (p)->alloc); \
> +	} while (0);
> +
> +#define PRINT_PAIR_CLEAR(p) \
> +	do { \
> +		(p)->alloc = (p)->nr = 0; \
> +		if ((p)->ranges) \
> +			free((p)->ranges); \
> +		(p)->ranges = NULL; \
> +	} while(0);
> +
> +struct range {
> +	const char *arg;	//The argument to specify this line range

No C++/C99 comments.

Do you need to keep track of this string representation once you have
parsed the user input and your main computation have started?

Isn't "range" too generic a term?   Unless you make this as a static
declaration only visible to functions where "range" can only mean "line
ranges" in their context, that is.

> +#define RANGE_INIT(r) \

Same comment for the name possibly being too generic.

> +extern struct range *diff_line_range_insert(struct diff_line_range
> *r, const char *arg,

Linewrapped?

> diff --git a/revision.c b/revision.c
> index 7e82efd..63f37ea 100644
> --- a/revision.c
> +++ b/revision.c
> @@ -1637,6 +1637,12 @@ int setup_revisions(int argc, const char
> **argv, struct rev_info *revs, struct s
>  	if (revs->combine_merges)
>  		revs->ignore_merges = 0;
>  	revs->diffopt.abbrev = revs->abbrev;
> +
> +	if (revs->line) {

"line" what?

> diff --git a/revision.h b/revision.h
> index 36fdf22..55836e0 100644
> --- a/revision.h
> +++ b/revision.h
> @@ -14,7 +14,8 @@
>  #define CHILD_SHOWN	(1u<<6)
>  #define ADDED		(1u<<7)	/* Parents already parsed and added? */
>  #define SYMMETRIC_LEFT	(1u<<8)
> -#define ALL_REV_FLAGS	((1u<<9)-1)
> +#define RANGE_UPDATE	(1u<<9) /* for line level traverse */
> +#define ALL_REV_FLAGS	((1u<<10)-1)
>
>  #define DECORATE_SHORT_REFS	1
>  #define DECORATE_FULL_REFS	2
> @@ -68,7 +69,8 @@ struct rev_info {
>  			cherry_pick:1,
>  			bisect:1,
>  			ancestry_path:1,
> -			first_parent_only:1;
> +			first_parent_only:1,
> +			line:1;

Is this really a traversal flag that affects how the history is walked?

>  	/* Diff flags */
>  	unsigned int	diff:1,
> @@ -137,6 +139,8 @@ struct rev_info {
>  	/* commit counts */
>  	int count_left;
>  	int count_right;
> +	/* line-level intersting range */
> +	struct decoration line_range;

s/intersting//; or perhaps "line level range that we are chasing"

>  };
>
>  #define REV_TREE_SAME		0
> -- 
> 1.7.0.2.273.gc2413.dirty
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html