Re: [PATCH 03/12] add the basic data structure for line level history

Junio C Hamano <gitster@xxxxxxxxx> · Sun, 27 Jun 2010 11:23:15 -0700

Bo Yang <struggleyb.nku@xxxxxxxxx> writes:

> 'struct diff_line_range' is the main data structure to store
> the user interesting line range. There is one 'diff_line_range'
> for each file, and there are multiple 'struct range' in each
> 'diff_line_range'. In this way, we support multiple ranges.
>
> Within 'struct range', there are multiple 'struct print_range'
> which represent a diff chunk.
>
> Signed-off-by: Bo Yang <struggleyb.nku@xxxxxxxxx>
> ---
> ...
> diff --git a/diffcore.h b/diffcore.h
> index 491bea0..06a6934 100644
> --- a/diffcore.h
> +++ b/diffcore.h
> @@ -51,6 +52,8 @@ struct diff_filespec {
>  	int is_binary;
>  };
>  
> +#define FILESPEC_VALID(f) (f)->sha1_valid

The filespec itself is valid even when it refers to an entity in the
working tree. Drop this, as it is misleading, and you don't seem to use
this anywhere.

> +static void cleanup(struct diff_line_range *r)
> +{
> +	while (r) {
> +		struct diff_line_range *tmp = r->next;
> +		DIFF_LINE_RANGE_CLEAR(r);
> +		free(r);
> +		r = tmp;

Nit. s/tmp/next/;

> +static struct object *find_commit(struct rev_info *revs)
> +{
> +	struct object *commit = NULL;
> +	const char *name = NULL;
> +	int i;
> +
> +	for (i = 0; i < revs->pending.nr; i++) {
> +		struct object *obj = revs->pending.objects[i].item;
> +		if (obj->flags & UNINTERESTING)
> +			continue;
> +		while (obj->type == OBJ_TAG)
> +			obj = deref_tag(obj, NULL, 0);
> +		if (obj->type != OBJ_COMMIT)
> +			die("Non commit %s?", revs->pending.objects[i].name);
> +		if (commit)
> +			die("More than one commit to dig from: %s and %s?",
> +			    revs->pending.objects[i].name, name);
> +		commit = obj;
> +		name = revs->pending.objects[i].name;
> +	}
> +
> +	return commit;
> +}
> +

This function is misnamed, as it does a lot more than "find commit".  It
also enforces that you have at most one positive commit-ish on the command
line (or whatever populated revs->pending[] array), while making the
caller responsible for diagnosing (and dying if necessary) if there is no
commit-ish.

The sole caller of this doesn't seem to check if the returned value is
NULL.  If you make this die upon "more than one commit", probably you
should also make it die upon "no commit".

> +static void fill_blob_sha1(struct commit *commit, struct diff_line_range *r)
> +{
> +	unsigned mode;
> +	unsigned char sha1[20];
> +
> +	while (r) {
> +		if (get_tree_entry(commit->object.sha1, r->spec->path,
> +			sha1, &mode))
> +			goto error;
> +		fill_filespec(r->spec, sha1, mode);
> +		r = r->next;
> +	}
> +
> +	return ;

There is a stray SP before semicolon.  Drop it.

> +error:
> +	die("There is no path %s in the commit", r->spec->path);
> +}
> +
> +static void make_line_log_file(struct diff_filespec *spec, int *lines, int **line_ends)
> +{

I don't see anything like "log-file" in this function.

When we have the whole contents of a blob in a chunk of memory, we always
use "unsigned long" to keep the offsets into it, I think.  The type of
"line-ends" should be "unsigned long **" for consistency.  I don't offhand
recall how we count number of lines (i.e. type of "lines"), but I suspect
we count them in long.

> +	int num = 0, size = 50, cur = 0;
> +	int *ends = NULL;
> +	char *data = NULL;
> +
> +	if (diff_populate_filespec(spec, 0))
> +		die("Cannot read blob %s", sha1_to_hex(spec->sha1));
> +
> +	ends = xmalloc(size * sizeof(int));
> +	ends[cur++] = -1;
> +	data = spec->data;
> +	while (num < spec->size) {
> +		if (data[num] == '\n' || num == spec->size - 1) {
> +			if (cur >= size) {
> +				size = size * 2;
> +				ends = xrealloc(ends, size * sizeof(int));
> +			}
> +			ends[cur++] = num;
> +		}

ARRAY_GROW()???

> +		num ++;

There is a stray SP between the operand and unary operator.  Drop it.

> +/*
> + * copied from blame.c, indeed, we can even to use this to test

A bit of refactoring would certainly help, before this series graduates
the WIP/RFC stage.

> +static void parse_lines(struct commit *commit, struct diff_line_range *r)
> +{
> +	int i;
> +	struct range *rg = NULL;
> +	int lines = 0;
> +	int *ends = NULL;
> +
> +	while (r) {
> +		struct diff_filespec *spec = r->spec;
> +		int num = r->nr;
> +		assert(spec);
> +		fill_blob_sha1(commit, r);
> +		rg = r->ranges;
> +		r->ranges = NULL;
> +		r->nr = r->alloc = 0;
> +		make_line_log_file(spec, &lines, &ends);
> +		for (i = 0; i < num; i++) {
> +			parse_range(lines, ends, rg + i, spec);
> +			diff_line_range_insert(r, rg[i].arg, rg[i].start, rg[i].end);
> +		}
> +
> +		if (ends != NULL) {
> +			free(ends);
> +			ends = NULL;
> +		}

Under what condition can "ends" be NULL here?  You would have died inside
nth_line() when you called parse_range().

Besides, free(NULL) is perfectly Ok.

> +
> +		r = r->next;
> +		/* release the memory of old array */
> +		free(rg);

If you gave "rg" a more descriptive name, the readers would understand
without this comment.

> +/*
> + * Insert a new line range into a diff_line_range struct, and keep the
> + * r->ranges sorted by their starting line number.
> + */
> +struct range *diff_line_range_insert(struct diff_line_range *r, const char *arg,
> +				int start, int end)
> +{
> +	assert(r != NULL);
> +	assert(start <= end);
> +
> +	int i = 0;
> +	struct range *rs = r->ranges;
> +	int left_extend = 0, right_extend = 0;

We avoid decl-after-statement.

> diff --git a/revision.c b/revision.c
> index 7847921..3186e0e 100644
> --- a/revision.c
> +++ b/revision.c
> @@ -1397,18 +1397,19 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
>  	return 1;
>  }
>  
> -void parse_revision_opt(struct rev_info *revs, struct parse_opt_ctx_t *ctx,
> +int parse_revision_opt(struct rev_info *revs, struct parse_opt_ctx_t *ctx,
>  			const struct option *options,
>  			const char * const usagestr[])
>  {
>  	int n = handle_revision_opt(revs, ctx->argc, ctx->argv,
>  				    &ctx->cpidx, ctx->out);
>  	if (n <= 0) {
> -		error("unknown option `%s'", ctx->argv[0]);
> -		usage_with_options(usagestr, options);
> +		return -1;
>  	}
>  	ctx->argv += n;
>  	ctx->argc -= n;
> +
> +	return 0;
>  }

The function has existing callers and they expect it to fail with
usage_with_options(), never to return.  Doesn't this change break them?

This change is not described nor justified in the commit log message.

>  static int for_each_bad_bisect_ref(each_ref_fn fn, void *cb_data)
> @@ -1631,6 +1632,12 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
>  	if (revs->combine_merges)
>  		revs->ignore_merges = 0;
>  	revs->diffopt.abbrev = revs->abbrev;
> +
> +	if (revs->line) {
> +		revs->limited = 1;
> +		revs->topo_order = 1;
> +	}
> +
>  	if (diff_setup_done(&revs->diffopt) < 0)
>  		die("diff_setup_done failed");
>  
> diff --git a/revision.h b/revision.h
> index 855464f..26c2ff5 100644
> --- a/revision.h
> +++ b/revision.h
> @@ -14,6 +14,7 @@
>  #define CHILD_SHOWN	(1u<<6)
>  #define ADDED		(1u<<7)	/* Parents already parsed and added? */
>  #define SYMMETRIC_LEFT	(1u<<8)
> +#define RANGE_UPDATE	(1u<<9) /* for line level traverse */
>  #define ALL_REV_FLAGS	((1u<<9)-1)

It doesn't make sense to add a global flag here and keep ALL_REV_FLAGS the
same value.  Have you audited the existing code and made sure that they
either do use 1<<9 for their own or their uses of such a custom flag are
compatible with this?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html