Re: [PATCH 05/38] pack v4: add commit object parsing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,


On Thu, Sep 05, 2013 at 02:19:28AM -0400, Nicolas Pitre wrote:
> Let's create another dictionary table to hold the author and committer
> entries.  We use the same table format used for tree entries where the
> 16 bit data prefix is conveniently used to store the timezone value.
> 
> In order to copy straight from a commit object buffer, dict_add_entry()
> is modified to get the string length as the provided string pointer is
> not always be null terminated.
> 
> Signed-off-by: Nicolas Pitre <nico@xxxxxxxxxxx>
> ---
>  packv4-create.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 89 insertions(+), 9 deletions(-)
> 
> diff --git a/packv4-create.c b/packv4-create.c
> index eccd9fc..5c08871 100644
> --- a/packv4-create.c
> +++ b/packv4-create.c
> @@ -1,5 +1,5 @@
>  /*
> - * packv4-create.c: management of dictionary tables used in pack v4
> + * packv4-create.c: creation of dictionary tables and objects used in pack v4
>   *
>   * (C) Nicolas Pitre <nico@xxxxxxxxxxx>
>   *
> @@ -80,9 +80,9 @@ static void rehash_entries(struct dict_table *t)
>  	}
>  }
>  
> -int dict_add_entry(struct dict_table *t, int val, const char *str)
> +int dict_add_entry(struct dict_table *t, int val, const char *str, int str_len)
>  {
> -	int i, val_len = 2, str_len = strlen(str) + 1;
> +	int i, val_len = 2;
>  
>  	if (t->ptr + val_len + str_len > t->size) {

We need a +1 here on the left side, i.e.

        if (t->ptr + val_len + str_len + 1 > t->size) {

The str_len variable accounted for the terminating null character
before, but this patch removes str_len = strlen(str) + 1; above, and
callers specify the length of str without the terminating null in
str_len.  Thus it can lead to memory corruption, when the new entry
happens to end at 't->ptr + val_len + str_len' and the line added in
the next hunk writes the terminating null beyond the end of the
buffer.  I couldn't create a v4 pack from a current linux repo because
of this; either glibc detected something or 'git packv4-create'
crashed.

Sidenote: couldn't we call the 'ptr' field something else, like
end_offset or end_idx?  It took me some headscratching to figure out
why is it OK to compare a pointer to an integer above, or use a
pointer without dereferencing as an index into an array below (because
ptr is, well, not a pointer after all).

>  		t->size = (t->size + val_len + str_len + 1024) * 3 / 2;
> @@ -92,6 +92,7 @@ int dict_add_entry(struct dict_table *t, int val, const char *str)
>  	t->data[t->ptr] = val >> 8;
>  	t->data[t->ptr + 1] = val;
>  	memcpy(t->data + t->ptr + val_len, str, str_len);
> +	t->data[t->ptr + val_len + str_len] = 0;
>  
>  	i = (t->nb_entries) ?
>  		locate_entry(t, t->data + t->ptr, val_len + str_len) : -1;
> @@ -107,7 +108,7 @@ int dict_add_entry(struct dict_table *t, int val, const char *str)
>  	t->entry[t->nb_entries].offset = t->ptr;
>  	t->entry[t->nb_entries].size = val_len + str_len;
>  	t->entry[t->nb_entries].hits = 1;
> -	t->ptr += val_len + str_len;
> +	t->ptr += val_len + str_len + 1;

Good.


Best,
Gábor


>  	t->nb_entries++;
>  
>  	if (t->hash_size * 3 <= t->nb_entries * 4)
> @@ -135,8 +136,73 @@ static void sort_dict_entries_by_hits(struct dict_table *t)
>  	rehash_entries(t);
>  }
>  
> +static struct dict_table *commit_name_table;
>  static struct dict_table *tree_path_table;
>  
> +/*
> + * Parse the author/committer line from a canonical commit object.
> + * The 'from' argument points right after the "author " or "committer "
> + * string.  The time zone is parsed and stored in *tz_val.  The returned
> + * pointer is right after the end of the email address which is also just
> + * before the time value, or NULL if a parsing error is encountered.
> + */
> +static char *get_nameend_and_tz(char *from, int *tz_val)
> +{
> +	char *end, *tz;
> +
> +	tz = strchr(from, '\n');
> +	/* let's assume the smallest possible string to be "x <x> 0 +0000\n" */
> +	if (!tz || tz - from < 13)
> +		return NULL;
> +	tz -= 4;
> +	end = tz - 4;
> +	while (end - from > 5 && *end != ' ')
> +		end--;
> +	if (end[-1] != '>' || end[0] != ' ' || tz[-2] != ' ')
> +		return NULL;
> +	*tz_val = (tz[0] - '0') * 1000 +
> +		  (tz[1] - '0') * 100 +
> +		  (tz[2] - '0') * 10 +
> +		  (tz[3] - '0');
> +	switch (tz[-1]) {
> +	default:	return NULL;
> +	case '+':	break;
> +	case '-':	*tz_val = -*tz_val;
> +	}
> +	return end;
> +}
> +
> +static int add_commit_dict_entries(void *buf, unsigned long size)
> +{
> +	char *name, *end = NULL;
> +	int tz_val;
> +
> +	if (!commit_name_table)
> +		commit_name_table = create_dict_table();
> +
> +	/* parse and add author info */
> +	name = strstr(buf, "\nauthor ");
> +	if (name) {
> +		name += 8;
> +		end = get_nameend_and_tz(name, &tz_val);
> +	}
> +	if (!name || !end)
> +		return -1;
> +	dict_add_entry(commit_name_table, tz_val, name, end - name);
> +
> +	/* parse and add committer info */
> +	name = strstr(end, "\ncommitter ");
> +	if (name) {
> +	       name += 11;
> +	       end = get_nameend_and_tz(name, &tz_val);
> +	}
> +	if (!name || !end)
> +		return -1;
> +	dict_add_entry(commit_name_table, tz_val, name, end - name);
> +
> +	return 0;
> +}
> +
>  static int add_tree_dict_entries(void *buf, unsigned long size)
>  {
>  	struct tree_desc desc;
> @@ -146,13 +212,16 @@ static int add_tree_dict_entries(void *buf, unsigned long size)
>  		tree_path_table = create_dict_table();
>  
>  	init_tree_desc(&desc, buf, size);
> -	while (tree_entry(&desc, &name_entry))
> +	while (tree_entry(&desc, &name_entry)) {
> +		int pathlen = tree_entry_len(&name_entry);
>  		dict_add_entry(tree_path_table, name_entry.mode,
> -			       name_entry.path);
> +				name_entry.path, pathlen);
> +	}
> +
>  	return 0;
>  }
>  
> -void dict_dump(struct dict_table *t)
> +void dump_dict_table(struct dict_table *t)
>  {
>  	int i;
>  
> @@ -169,6 +238,12 @@ void dict_dump(struct dict_table *t)
>  	}
>  }
>  
> +static void dict_dump(void)
> +{
> +	dump_dict_table(commit_name_table);
> +	dump_dict_table(tree_path_table);
> +}
> +
>  struct idx_entry
>  {
>  	off_t                offset;
> @@ -205,6 +280,7 @@ static int create_pack_dictionaries(struct packed_git *p)
>  		enum object_type type;
>  		unsigned long size;
>  		struct object_info oi = {};
> +		int (*add_dict_entries)(void *, unsigned long);
>  
>  		oi.typep = &type;
>  		oi.sizep = &size;
> @@ -213,7 +289,11 @@ static int create_pack_dictionaries(struct packed_git *p)
>  			    sha1_to_hex(objects[i].sha1), p->pack_name);
>  
>  		switch (type) {
> +		case OBJ_COMMIT:
> +			add_dict_entries = add_commit_dict_entries;
> +			break;
>  		case OBJ_TREE:
> +			add_dict_entries = add_tree_dict_entries;
>  			break;
>  		default:
>  			continue;
> @@ -225,7 +305,7 @@ static int create_pack_dictionaries(struct packed_git *p)
>  		if (check_sha1_signature(objects[i].sha1, data, size, typename(type)))
>  			die("packed %s from %s is corrupt",
>  			    sha1_to_hex(objects[i].sha1), p->pack_name);
> -		if (add_tree_dict_entries(data, size) < 0)
> +		if (add_dict_entries(data, size) < 0)
>  			die("can't process %s object %s",
>  				typename(type), sha1_to_hex(objects[i].sha1));
>  		free(data);
> @@ -285,6 +365,6 @@ int main(int argc, char *argv[])
>  		exit(1);
>  	}
>  	process_one_pack(argv[1]);
> -	dict_dump(tree_path_table);
> +	dict_dump();
>  	return 0;
>  }
> -- 
> 1.8.4.38.g317e65b
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]