Re: [PATCH 12/23] pack v4: creation code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Nicolas Pitre <nico@xxxxxxxxxxx> writes:

> Let's actually open the destination pack file and write the header and
> the tables.
>
> The header isn't much different from pack v3, except for the pack version
> number of course.
>
> The first table is the sorted SHA1 table normally found in the pack index
> file.  With pack v4 we write this table in the main pack file instead as
> it is index referenced by subsequent objects in the pack.  Doing so has
> many advantages:
>
> - The SHA1 references used to be duplicated on disk: once in the pack
>   index file, and then at least once or more within commit and tree
>   objects referencing them.  The only SHA1 which is not being listed more
>   than once this way is the one for a branch tip commit object and those
>   are normally very few.  Now all that SHA1 data is represented only once.
>

This tickles my curiosity. Why isn't this SHA-1 table sorted by
reference count the same way as the tree path and the people name
tables to keep the average length of varint references short?

> - The SHA1 references found in commit and tree objects can be obtained
>   on disk directly without having to deflate those objects first.
>
> The SHA1 table size is obtained by multiplying the number of objects by 20.
>
> And then the commit and path dictionary tables are written right after
> the SHA1 table.

> Signed-off-by: Nicolas Pitre <nico@xxxxxxxxxxx>
> ---
>  packv4-create.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 55 insertions(+), 5 deletions(-)
>
> diff --git a/packv4-create.c b/packv4-create.c
> index 2956fda..5211f9c 100644
> --- a/packv4-create.c
> +++ b/packv4-create.c
> @@ -605,6 +605,48 @@ static unsigned long write_dict_table(struct sha1file *f, struct dict_table *t)
>  	return hdrlen + datalen;
>  }
>  
> +static struct sha1file * packv4_open(char *path)
> +{
> +	int fd;
> +
> +	fd = open(path, O_CREAT|O_EXCL|O_WRONLY, 0600);
> +	if (fd < 0)
> +		die_errno("unable to create '%s'", path);
> +	return sha1fd(fd, path);
> +}
> +
> +static unsigned int packv4_write_header(struct sha1file *f, unsigned nr_objects)
> +{
> +	struct pack_header hdr;
> +
> +	hdr.hdr_signature = htonl(PACK_SIGNATURE);
> +	hdr.hdr_version = htonl(4);
> +	hdr.hdr_entries = htonl(nr_objects);
> +	sha1write(f, &hdr, sizeof(hdr));
> +
> +	return sizeof(hdr);
> +}
> +
> +static unsigned long packv4_write_tables(struct sha1file *f, unsigned nr_objects,
> +					 struct pack_idx_entry *objs)
> +{
> +	unsigned i;
> +	unsigned long written = 0;
> +
> +	/* The sorted list of object SHA1's is always first */
> +	for (i = 0; i < nr_objects; i++)
> +		sha1write(f, objs[i].sha1, 20);
> +	written = 20 * nr_objects;
> +
> +	/* Then the commit dictionary table */
> +	written += write_dict_table(f, commit_name_table);
> +
> +	/* Followed by the path component dictionary table */
> +	written += write_dict_table(f, tree_path_table);
> +
> +	return written;
> +}
> +
>  static struct packed_git *open_pack(const char *path)
>  {
>  	char arg[PATH_MAX];
> @@ -658,9 +700,10 @@ static struct packed_git *open_pack(const char *path)
>  	return p;
>  }
>  
> -static void process_one_pack(char *src_pack)
> +static void process_one_pack(char *src_pack, char *dst_pack)
>  {
>  	struct packed_git *p;
> +	struct sha1file *f;
>  	struct pack_idx_entry *objs, **p_objs;
>  	unsigned nr_objects;
>  
> @@ -673,15 +716,22 @@ static void process_one_pack(char *src_pack)
>  	p_objs = sort_objs_by_offset(objs, nr_objects);
>  
>  	create_pack_dictionaries(p, p_objs);
> +
> +	f = packv4_open(dst_pack);
> +	if (!f)
> +		die("unable to open destination pack");
> +	packv4_write_header(f, nr_objects);
> +	packv4_write_tables(f, nr_objects, objs);
>  }
>  
>  int main(int argc, char *argv[])
>  {
> -	if (argc != 2) {
> -		fprintf(stderr, "Usage: %s <packfile>\n", argv[0]);
> +	if (argc != 3) {
> +		fprintf(stderr, "Usage: %s <src_packfile> <dst_packfile>\n", argv[0]);
>  		exit(1);
>  	}
> -	process_one_pack(argv[1]);
> -	dict_dump();
> +	process_one_pack(argv[1], argv[2]);
> +	if (0)
> +		dict_dump();
>  	return 0;
>  }
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]