git-packv4-create now becomes test-packv4. Code that will not be used by pack-objects.c is moved to test-packv4.c. It may be removed when the code transition to pack-objects completes. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> --- Makefile | 4 +- packv4-create.c | 491 +--------------------------------------------------- packv4-create.h | 39 +++++ test-packv4.c (new) | 476 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 525 insertions(+), 485 deletions(-) create mode 100644 test-packv4.c diff --git a/Makefile b/Makefile index 22fc276..af2e3e3 100644 --- a/Makefile +++ b/Makefile @@ -550,7 +550,6 @@ PROGRAM_OBJS += shell.o PROGRAM_OBJS += show-index.o PROGRAM_OBJS += upload-pack.o PROGRAM_OBJS += remote-testsvn.o -PROGRAM_OBJS += packv4-create.o # Binary suffix, set to .exe for Windows builds X = @@ -568,6 +567,7 @@ TEST_PROGRAMS_NEED_X += test-line-buffer TEST_PROGRAMS_NEED_X += test-match-trees TEST_PROGRAMS_NEED_X += test-mergesort TEST_PROGRAMS_NEED_X += test-mktemp +TEST_PROGRAMS_NEED_X += test-packv4 TEST_PROGRAMS_NEED_X += test-parse-options TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-prio-queue @@ -702,6 +702,7 @@ LIB_H += notes.h LIB_H += object.h LIB_H += pack-revindex.h LIB_H += pack.h +LIB_H += packv4-create.h LIB_H += packv4-parse.h LIB_H += parse-options.h LIB_H += patch-ids.h @@ -839,6 +840,7 @@ LIB_OBJS += object.o LIB_OBJS += pack-check.o LIB_OBJS += pack-revindex.o LIB_OBJS += pack-write.o +LIB_OBJS += packv4-create.o LIB_OBJS += packv4-parse.o LIB_OBJS += pager.o LIB_OBJS += parse-options.o diff --git a/packv4-create.c b/packv4-create.c index 920a0b4..cdf82c0 100644 --- a/packv4-create.c +++ b/packv4-create.c @@ -18,9 +18,9 @@ #include "packv4-create.h" -static int pack_compression_seen; -static int pack_compression_level = Z_DEFAULT_COMPRESSION; -static int min_tree_copy = 1; +int pack_compression_seen; +int pack_compression_level = Z_DEFAULT_COMPRESSION; +int min_tree_copy = 1; struct data_entry { unsigned offset; @@ -28,17 +28,6 @@ struct data_entry { unsigned hits; }; -struct dict_table { - unsigned char *data; - unsigned cur_offset; - unsigned size; - struct data_entry *entry; - unsigned nb_entries; - unsigned max_entries; - unsigned *hash; - unsigned hash_size; -}; - struct dict_table *create_dict_table(void) { return xcalloc(sizeof(struct dict_table), 1); @@ -139,7 +128,7 @@ static int cmp_dict_entries(const void *a_, const void *b_) return diff; } -static void sort_dict_entries_by_hits(struct dict_table *t) +void sort_dict_entries_by_hits(struct dict_table *t) { qsort(t->entry, t->nb_entries, sizeof(*t->entry), cmp_dict_entries); t->hash_size = (t->nb_entries * 4 / 3) / 2; @@ -208,7 +197,7 @@ int add_commit_dict_entries(struct dict_table *commit_ident_table, return 0; } -static int add_tree_dict_entries(struct dict_table *tree_path_table, +int add_tree_dict_entries(struct dict_table *tree_path_table, void *buf, unsigned long size) { struct tree_desc desc; @@ -224,7 +213,7 @@ static int add_tree_dict_entries(struct dict_table *tree_path_table, return 0; } -void dump_dict_table(struct dict_table *t) +static void dump_dict_table(struct dict_table *t) { int i; @@ -241,7 +230,7 @@ void dump_dict_table(struct dict_table *t) } } -static void dict_dump(struct packv4_tables *v4) +void dict_dump(struct packv4_tables *v4) { dump_dict_table(v4->commit_ident_table); dump_dict_table(v4->tree_path_table); @@ -611,103 +600,6 @@ void *pv4_encode_tree(const struct packv4_tables *v4, return buffer; } -static struct pack_idx_entry *get_packed_object_list(struct packed_git *p) -{ - unsigned i, nr_objects = p->num_objects; - struct pack_idx_entry *objects; - - objects = xmalloc((nr_objects + 1) * sizeof(*objects)); - objects[nr_objects].offset = p->pack_size - 20; - for (i = 0; i < nr_objects; i++) { - hashcpy(objects[i].sha1, nth_packed_object_sha1(p, i)); - objects[i].offset = nth_packed_object_offset(p, i); - } - - return objects; -} - -static int sort_by_offset(const void *e1, const void *e2) -{ - const struct pack_idx_entry * const *entry1 = e1; - const struct pack_idx_entry * const *entry2 = e2; - if ((*entry1)->offset < (*entry2)->offset) - return -1; - if ((*entry1)->offset > (*entry2)->offset) - return 1; - return 0; -} - -static struct pack_idx_entry **sort_objs_by_offset(struct pack_idx_entry *list, - unsigned nr_objects) -{ - unsigned i; - struct pack_idx_entry **sorted; - - sorted = xmalloc((nr_objects + 1) * sizeof(*sorted)); - for (i = 0; i < nr_objects + 1; i++) - sorted[i] = &list[i]; - qsort(sorted, nr_objects + 1, sizeof(*sorted), sort_by_offset); - - return sorted; -} - -static int create_pack_dictionaries(struct packv4_tables *v4, - struct packed_git *p, - struct pack_idx_entry **obj_list) -{ - struct progress *progress_state; - unsigned int i; - - v4->commit_ident_table = create_dict_table(); - v4->tree_path_table = create_dict_table(); - - progress_state = start_progress("Scanning objects", p->num_objects); - for (i = 0; i < p->num_objects; i++) { - struct pack_idx_entry *obj = obj_list[i]; - void *data; - enum object_type type; - unsigned long size; - struct object_info oi = {}; - int (*add_dict_entries)(struct dict_table *, void *, unsigned long); - struct dict_table *dict; - - display_progress(progress_state, i+1); - - oi.typep = &type; - oi.sizep = &size; - if (packed_object_info(p, obj->offset, &oi) < 0) - die("cannot get type of %s from %s", - sha1_to_hex(obj->sha1), p->pack_name); - - switch (type) { - case OBJ_COMMIT: - add_dict_entries = add_commit_dict_entries; - dict = v4->commit_ident_table; - break; - case OBJ_TREE: - add_dict_entries = add_tree_dict_entries; - dict = v4->tree_path_table; - break; - default: - continue; - } - data = unpack_entry(p, obj->offset, &type, &size); - if (!data) - die("cannot unpack %s from %s", - sha1_to_hex(obj->sha1), p->pack_name); - if (check_sha1_signature(obj->sha1, data, size, typename(type))) - die("packed %s from %s is corrupt", - sha1_to_hex(obj->sha1), p->pack_name); - if (add_dict_entries(dict, data, size) < 0) - die("can't process %s object %s", - typename(type), sha1_to_hex(obj->sha1)); - free(data); - } - - stop_progress(&progress_state); - return 0; -} - static unsigned long write_dict_table(struct sha1file *f, struct dict_table *t) { unsigned char buffer[1024]; @@ -757,28 +649,6 @@ static unsigned long write_dict_table(struct sha1file *f, struct dict_table *t) return hdrlen + datalen; } -static struct sha1file * packv4_open(char *path) -{ - int fd; - - fd = open(path, O_CREAT|O_EXCL|O_WRONLY, 0600); - if (fd < 0) - die_errno("unable to create '%s'", path); - return sha1fd(fd, path); -} - -static unsigned int packv4_write_header(struct sha1file *f, unsigned nr_objects) -{ - struct pack_header hdr; - - hdr.hdr_signature = htonl(PACK_SIGNATURE); - hdr.hdr_version = htonl(4); - hdr.hdr_entries = htonl(nr_objects); - sha1write(f, &hdr, sizeof(hdr)); - - return sizeof(hdr); -} - unsigned long packv4_write_tables(struct sha1file *f, const struct packv4_tables *v4) { @@ -802,350 +672,3 @@ unsigned long packv4_write_tables(struct sha1file *f, return written; } - -static int write_object_header(struct sha1file *f, enum object_type type, unsigned long size) -{ - unsigned char buf[16]; - uint64_t val; - int len; - - /* - * We really have only one kind of delta object. - */ - if (type == OBJ_OFS_DELTA) - type = OBJ_REF_DELTA; - - /* - * We allocate 4 bits in the LSB for the object type which should - * be good for quite a while, given that we effectively encodes - * only 5 object types: commit, tree, blob, delta, tag. - */ - val = size; - if (MSB(val, 4)) - die("fixme: the code doesn't currently cope with big sizes"); - val <<= 4; - val |= type; - len = encode_varint(val, buf); - sha1write(f, buf, len); - return len; -} - -static unsigned long copy_object_data(struct packv4_tables *v4, - struct sha1file *f, struct packed_git *p, - off_t offset) -{ - struct pack_window *w_curs = NULL; - struct revindex_entry *revidx; - enum object_type type; - unsigned long avail, size, datalen, written; - int hdrlen, reflen, idx_nr; - unsigned char *src, buf[24]; - - revidx = find_pack_revindex(p, offset); - idx_nr = revidx->nr; - datalen = revidx[1].offset - offset; - - src = use_pack(p, &w_curs, offset, &avail); - hdrlen = unpack_object_header_buffer(src, avail, &type, &size); - - written = write_object_header(f, type, size); - - if (type == OBJ_OFS_DELTA) { - const unsigned char *cp = src + hdrlen; - off_t base_offset = decode_varint(&cp); - hdrlen = cp - src; - base_offset = offset - base_offset; - if (base_offset <= 0 || base_offset >= offset) - die("delta offset out of bound"); - revidx = find_pack_revindex(p, base_offset); - reflen = encode_sha1ref(v4, - nth_packed_object_sha1(p, revidx->nr), - buf); - sha1write(f, buf, reflen); - written += reflen; - } else if (type == OBJ_REF_DELTA) { - reflen = encode_sha1ref(v4, src + hdrlen, buf); - hdrlen += 20; - sha1write(f, buf, reflen); - written += reflen; - } - - if (p->index_version > 1 && - check_pack_crc(p, &w_curs, offset, datalen, idx_nr)) - die("bad CRC for object at offset %"PRIuMAX" in %s", - (uintmax_t)offset, p->pack_name); - - offset += hdrlen; - datalen -= hdrlen; - - while (datalen) { - src = use_pack(p, &w_curs, offset, &avail); - if (avail > datalen) - avail = datalen; - sha1write(f, src, avail); - written += avail; - offset += avail; - datalen -= avail; - } - unuse_pack(&w_curs); - - return written; -} - -static unsigned char *get_delta_base(struct packed_git *p, off_t offset, - unsigned char *sha1_buf) -{ - struct pack_window *w_curs = NULL; - enum object_type type; - unsigned long avail, size; - int hdrlen; - unsigned char *src; - const unsigned char *base_sha1 = NULL; ; - - src = use_pack(p, &w_curs, offset, &avail); - hdrlen = unpack_object_header_buffer(src, avail, &type, &size); - - if (type == OBJ_OFS_DELTA) { - const unsigned char *cp = src + hdrlen; - off_t base_offset = decode_varint(&cp); - base_offset = offset - base_offset; - if (base_offset <= 0 || base_offset >= offset) { - error("delta offset out of bound"); - } else { - struct revindex_entry *revidx; - revidx = find_pack_revindex(p, base_offset); - base_sha1 = nth_packed_object_sha1(p, revidx->nr); - } - } else if (type == OBJ_REF_DELTA) { - base_sha1 = src + hdrlen; - } else - error("expected to get a delta but got a %s", typename(type)); - - unuse_pack(&w_curs); - - if (!base_sha1) - return NULL; - hashcpy(sha1_buf, base_sha1); - return sha1_buf; -} - -static off_t packv4_write_object(struct packv4_tables *v4, - struct sha1file *f, struct packed_git *p, - struct pack_idx_entry *obj) -{ - void *src, *result; - struct object_info oi = {}; - enum object_type type, packed_type; - unsigned long obj_size, buf_size; - unsigned int hdrlen; - - oi.typep = &type; - oi.sizep = &obj_size; - packed_type = packed_object_info(p, obj->offset, &oi); - if (packed_type < 0) - die("cannot get type of %s from %s", - sha1_to_hex(obj->sha1), p->pack_name); - - /* Some objects are copied without decompression */ - switch (type) { - case OBJ_COMMIT: - case OBJ_TREE: - break; - default: - return copy_object_data(v4, f, p, obj->offset); - } - - /* The rest is converted into their new format */ - src = unpack_entry(p, obj->offset, &type, &buf_size); - if (!src || obj_size != buf_size) - die("cannot unpack %s from %s", - sha1_to_hex(obj->sha1), p->pack_name); - if (check_sha1_signature(obj->sha1, src, buf_size, typename(type))) - die("packed %s from %s is corrupt", - sha1_to_hex(obj->sha1), p->pack_name); - - switch (type) { - case OBJ_COMMIT: - result = pv4_encode_commit(v4, src, &buf_size); - break; - case OBJ_TREE: - if (packed_type != OBJ_TREE) { - unsigned char sha1_buf[20], *ref_sha1; - void *ref; - enum object_type ref_type; - unsigned long ref_size; - - ref_sha1 = get_delta_base(p, obj->offset, sha1_buf); - if (!ref_sha1) - die("unable to get delta base sha1 for %s", - sha1_to_hex(obj->sha1)); - ref = read_sha1_file(ref_sha1, &ref_type, &ref_size); - if (!ref || ref_type != OBJ_TREE) - die("cannot obtain delta base for %s", - sha1_to_hex(obj->sha1)); - result = pv4_encode_tree(v4, src, &buf_size, - ref, ref_size, ref_sha1); - free(ref); - } else { - result = pv4_encode_tree(v4, src, &buf_size, - NULL, 0, NULL); - } - break; - default: - die("unexpected object type %d", type); - } - free(src); - if (!result) { - warning("can't convert %s object %s", - typename(type), sha1_to_hex(obj->sha1)); - /* fall back to copy the object in its original form */ - return copy_object_data(v4, f, p, obj->offset); - } - - /* Use bit 3 to indicate a special type encoding */ - type += 8; - hdrlen = write_object_header(f, type, obj_size); - sha1write(f, result, buf_size); - free(result); - return hdrlen + buf_size; -} - -static char *normalize_pack_name(const char *path) -{ - char buf[PATH_MAX]; - int len; - - len = strlcpy(buf, path, PATH_MAX); - if (len >= PATH_MAX - 6) - die("name too long: %s", path); - - /* - * In addition to "foo.idx" we accept "foo.pack" and "foo"; - * normalize these forms to "foo.pack". - */ - if (has_extension(buf, ".idx")) { - strcpy(buf + len - 4, ".pack"); - len++; - } else if (!has_extension(buf, ".pack")) { - strcpy(buf + len, ".pack"); - len += 5; - } - - return xstrdup(buf); -} - -static struct packed_git *open_pack(const char *path) -{ - char *packname = normalize_pack_name(path); - int len = strlen(packname); - struct packed_git *p; - - strcpy(packname + len - 5, ".idx"); - p = add_packed_git(packname, len - 1, 1); - if (!p) - die("packfile %s not found.", packname); - - install_packed_git(p); - if (open_pack_index(p)) - die("packfile %s index not opened", p->pack_name); - - free(packname); - return p; -} - -static void process_one_pack(struct packv4_tables *v4, char *src_pack, char *dst_pack) -{ - struct packed_git *p; - struct sha1file *f; - struct pack_idx_entry *objs, **p_objs; - struct pack_idx_option idx_opts; - unsigned i, nr_objects; - off_t written = 0; - char *packname; - unsigned char pack_sha1[20]; - struct progress *progress_state; - - p = open_pack(src_pack); - if (!p) - die("unable to open source pack"); - - nr_objects = p->num_objects; - objs = get_packed_object_list(p); - p_objs = sort_objs_by_offset(objs, nr_objects); - - create_pack_dictionaries(v4, p, p_objs); - sort_dict_entries_by_hits(v4->commit_ident_table); - sort_dict_entries_by_hits(v4->tree_path_table); - - packname = normalize_pack_name(dst_pack); - f = packv4_open(packname); - if (!f) - die("unable to open destination pack"); - written += packv4_write_header(f, nr_objects); - written += packv4_write_tables(f, v4); - - /* Let's write objects out, updating the object index list in place */ - progress_state = start_progress("Writing objects", nr_objects); - v4->all_objs = objs; - v4->all_objs_nr = nr_objects; - for (i = 0; i < nr_objects; i++) { - off_t obj_pos = written; - struct pack_idx_entry *obj = p_objs[i]; - crc32_begin(f); - written += packv4_write_object(v4, f, p, obj); - obj->offset = obj_pos; - obj->crc32 = crc32_end(f); - display_progress(progress_state, i+1); - } - stop_progress(&progress_state); - - sha1close(f, pack_sha1, CSUM_CLOSE | CSUM_FSYNC); - - reset_pack_idx_option(&idx_opts); - idx_opts.version = 3; - strcpy(packname + strlen(packname) - 5, ".idx"); - write_idx_file(packname, p_objs, nr_objects, &idx_opts, pack_sha1); - - free(packname); -} - -static int git_pack_config(const char *k, const char *v, void *cb) -{ - if (!strcmp(k, "pack.compression")) { - int level = git_config_int(k, v); - if (level == -1) - level = Z_DEFAULT_COMPRESSION; - else if (level < 0 || level > Z_BEST_COMPRESSION) - die("bad pack compression level %d", level); - pack_compression_level = level; - pack_compression_seen = 1; - return 0; - } - return git_default_config(k, v, cb); -} - -int main(int argc, char *argv[]) -{ - struct packv4_tables v4; - char *src_pack, *dst_pack; - - if (argc == 3) { - src_pack = argv[1]; - dst_pack = argv[2]; - } else if (argc == 4 && !prefixcmp(argv[1], "--min-tree-copy=")) { - min_tree_copy = atoi(argv[1] + strlen("--min-tree-copy=")); - src_pack = argv[2]; - dst_pack = argv[3]; - } else { - fprintf(stderr, "Usage: %s [--min-tree-copy=<n>] <src_packfile> <dst_packfile>\n", argv[0]); - exit(1); - } - - git_config(git_pack_config, NULL); - if (!pack_compression_seen && core_compression_seen) - pack_compression_level = core_compression_level; - process_one_pack(&v4, src_pack, dst_pack); - if (0) - dict_dump(&v4); - return 0; -} diff --git a/packv4-create.h b/packv4-create.h index 0c8c77b..c1f32fd 100644 --- a/packv4-create.h +++ b/packv4-create.h @@ -8,4 +8,43 @@ struct packv4_tables { struct dict_table *tree_path_table; }; +struct dict_table { + unsigned char *data; + unsigned cur_offset; + unsigned size; + struct data_entry *entry; + unsigned nb_entries; + unsigned max_entries; + unsigned *hash; + unsigned hash_size; +}; + + +struct sha1file; + +struct dict_table *create_dict_table(void); +int dict_add_entry(struct dict_table *t, int val, const char *str, int str_len); +void destroy_dict_table(struct dict_table *t); +void dict_dump(struct packv4_tables *v4); + +int add_commit_dict_entries(struct dict_table *commit_ident_table, + void *buf, unsigned long size); +int add_tree_dict_entries(struct dict_table *tree_path_table, + void *buf, unsigned long size); +void sort_dict_entries_by_hits(struct dict_table *t); + +int encode_sha1ref(const struct packv4_tables *v4, + const unsigned char *sha1, unsigned char *buf); +unsigned long packv4_write_tables(struct sha1file *f, + const struct packv4_tables *v4); +void *pv4_encode_commit(const struct packv4_tables *v4, + void *buffer, unsigned long *sizep); +void *pv4_encode_tree(const struct packv4_tables *v4, + void *_buffer, unsigned long *sizep, + void *delta, unsigned long delta_size, + const unsigned char *delta_sha1); + +void process_one_pack(struct packv4_tables *v4, + char *src_pack, char *dst_pack); + #endif diff --git a/test-packv4.c b/test-packv4.c new file mode 100644 index 0000000..3b0d7a2 --- /dev/null +++ b/test-packv4.c @@ -0,0 +1,476 @@ +#include "cache.h" +#include "pack.h" +#include "pack-revindex.h" +#include "progress.h" +#include "varint.h" +#include "packv4-create.h" + +extern int pack_compression_seen; +extern int pack_compression_level; +extern int min_tree_copy; + +static struct pack_idx_entry *get_packed_object_list(struct packed_git *p) +{ + unsigned i, nr_objects = p->num_objects; + struct pack_idx_entry *objects; + + objects = xmalloc((nr_objects + 1) * sizeof(*objects)); + objects[nr_objects].offset = p->pack_size - 20; + for (i = 0; i < nr_objects; i++) { + hashcpy(objects[i].sha1, nth_packed_object_sha1(p, i)); + objects[i].offset = nth_packed_object_offset(p, i); + } + + return objects; +} + +static int sort_by_offset(const void *e1, const void *e2) +{ + const struct pack_idx_entry * const *entry1 = e1; + const struct pack_idx_entry * const *entry2 = e2; + if ((*entry1)->offset < (*entry2)->offset) + return -1; + if ((*entry1)->offset > (*entry2)->offset) + return 1; + return 0; +} + +static struct pack_idx_entry **sort_objs_by_offset(struct pack_idx_entry *list, + unsigned nr_objects) +{ + unsigned i; + struct pack_idx_entry **sorted; + + sorted = xmalloc((nr_objects + 1) * sizeof(*sorted)); + for (i = 0; i < nr_objects + 1; i++) + sorted[i] = &list[i]; + qsort(sorted, nr_objects + 1, sizeof(*sorted), sort_by_offset); + + return sorted; +} + +static int create_pack_dictionaries(struct packv4_tables *v4, + struct packed_git *p, + struct pack_idx_entry **obj_list) +{ + struct progress *progress_state; + unsigned int i; + + v4->commit_ident_table = create_dict_table(); + v4->tree_path_table = create_dict_table(); + + progress_state = start_progress("Scanning objects", p->num_objects); + for (i = 0; i < p->num_objects; i++) { + struct pack_idx_entry *obj = obj_list[i]; + void *data; + enum object_type type; + unsigned long size; + struct object_info oi = {}; + int (*add_dict_entries)(struct dict_table *, void *, unsigned long); + struct dict_table *dict; + + display_progress(progress_state, i+1); + + oi.typep = &type; + oi.sizep = &size; + if (packed_object_info(p, obj->offset, &oi) < 0) + die("cannot get type of %s from %s", + sha1_to_hex(obj->sha1), p->pack_name); + + switch (type) { + case OBJ_COMMIT: + add_dict_entries = add_commit_dict_entries; + dict = v4->commit_ident_table; + break; + case OBJ_TREE: + add_dict_entries = add_tree_dict_entries; + dict = v4->tree_path_table; + break; + default: + continue; + } + data = unpack_entry(p, obj->offset, &type, &size); + if (!data) + die("cannot unpack %s from %s", + sha1_to_hex(obj->sha1), p->pack_name); + if (check_sha1_signature(obj->sha1, data, size, typename(type))) + die("packed %s from %s is corrupt", + sha1_to_hex(obj->sha1), p->pack_name); + if (add_dict_entries(dict, data, size) < 0) + die("can't process %s object %s", + typename(type), sha1_to_hex(obj->sha1)); + free(data); + } + + stop_progress(&progress_state); + return 0; +} + +static struct sha1file * packv4_open(char *path) +{ + int fd; + + fd = open(path, O_CREAT|O_EXCL|O_WRONLY, 0600); + if (fd < 0) + die_errno("unable to create '%s'", path); + return sha1fd(fd, path); +} + +static unsigned int packv4_write_header(struct sha1file *f, unsigned nr_objects) +{ + struct pack_header hdr; + + hdr.hdr_signature = htonl(PACK_SIGNATURE); + hdr.hdr_version = htonl(4); + hdr.hdr_entries = htonl(nr_objects); + sha1write(f, &hdr, sizeof(hdr)); + + return sizeof(hdr); +} + +static int write_object_header(struct sha1file *f, enum object_type type, unsigned long size) +{ + unsigned char buf[16]; + uint64_t val; + int len; + + /* + * We really have only one kind of delta object. + */ + if (type == OBJ_OFS_DELTA) + type = OBJ_REF_DELTA; + + /* + * We allocate 4 bits in the LSB for the object type which should + * be good for quite a while, given that we effectively encodes + * only 5 object types: commit, tree, blob, delta, tag. + */ + val = size; + if (MSB(val, 4)) + die("fixme: the code doesn't currently cope with big sizes"); + val <<= 4; + val |= type; + len = encode_varint(val, buf); + sha1write(f, buf, len); + return len; +} + +static unsigned long copy_object_data(struct packv4_tables *v4, + struct sha1file *f, struct packed_git *p, + off_t offset) +{ + struct pack_window *w_curs = NULL; + struct revindex_entry *revidx; + enum object_type type; + unsigned long avail, size, datalen, written; + int hdrlen, reflen, idx_nr; + unsigned char *src, buf[24]; + + revidx = find_pack_revindex(p, offset); + idx_nr = revidx->nr; + datalen = revidx[1].offset - offset; + + src = use_pack(p, &w_curs, offset, &avail); + hdrlen = unpack_object_header_buffer(src, avail, &type, &size); + + written = write_object_header(f, type, size); + + if (type == OBJ_OFS_DELTA) { + const unsigned char *cp = src + hdrlen; + off_t base_offset = decode_varint(&cp); + hdrlen = cp - src; + base_offset = offset - base_offset; + if (base_offset <= 0 || base_offset >= offset) + die("delta offset out of bound"); + revidx = find_pack_revindex(p, base_offset); + reflen = encode_sha1ref(v4, + nth_packed_object_sha1(p, revidx->nr), + buf); + sha1write(f, buf, reflen); + written += reflen; + } else if (type == OBJ_REF_DELTA) { + reflen = encode_sha1ref(v4, src + hdrlen, buf); + hdrlen += 20; + sha1write(f, buf, reflen); + written += reflen; + } + + if (p->index_version > 1 && + check_pack_crc(p, &w_curs, offset, datalen, idx_nr)) + die("bad CRC for object at offset %"PRIuMAX" in %s", + (uintmax_t)offset, p->pack_name); + + offset += hdrlen; + datalen -= hdrlen; + + while (datalen) { + src = use_pack(p, &w_curs, offset, &avail); + if (avail > datalen) + avail = datalen; + sha1write(f, src, avail); + written += avail; + offset += avail; + datalen -= avail; + } + unuse_pack(&w_curs); + + return written; +} + +static unsigned char *get_delta_base(struct packed_git *p, off_t offset, + unsigned char *sha1_buf) +{ + struct pack_window *w_curs = NULL; + enum object_type type; + unsigned long avail, size; + int hdrlen; + unsigned char *src; + const unsigned char *base_sha1 = NULL; ; + + src = use_pack(p, &w_curs, offset, &avail); + hdrlen = unpack_object_header_buffer(src, avail, &type, &size); + + if (type == OBJ_OFS_DELTA) { + const unsigned char *cp = src + hdrlen; + off_t base_offset = decode_varint(&cp); + base_offset = offset - base_offset; + if (base_offset <= 0 || base_offset >= offset) { + error("delta offset out of bound"); + } else { + struct revindex_entry *revidx; + revidx = find_pack_revindex(p, base_offset); + base_sha1 = nth_packed_object_sha1(p, revidx->nr); + } + } else if (type == OBJ_REF_DELTA) { + base_sha1 = src + hdrlen; + } else + error("expected to get a delta but got a %s", typename(type)); + + unuse_pack(&w_curs); + + if (!base_sha1) + return NULL; + hashcpy(sha1_buf, base_sha1); + return sha1_buf; +} + +static off_t packv4_write_object(struct packv4_tables *v4, + struct sha1file *f, struct packed_git *p, + struct pack_idx_entry *obj) +{ + void *src, *result; + struct object_info oi = {}; + enum object_type type, packed_type; + unsigned long obj_size, buf_size; + unsigned int hdrlen; + + oi.typep = &type; + oi.sizep = &obj_size; + packed_type = packed_object_info(p, obj->offset, &oi); + if (packed_type < 0) + die("cannot get type of %s from %s", + sha1_to_hex(obj->sha1), p->pack_name); + + /* Some objects are copied without decompression */ + switch (type) { + case OBJ_COMMIT: + case OBJ_TREE: + break; + default: + return copy_object_data(v4, f, p, obj->offset); + } + + /* The rest is converted into their new format */ + src = unpack_entry(p, obj->offset, &type, &buf_size); + if (!src || obj_size != buf_size) + die("cannot unpack %s from %s", + sha1_to_hex(obj->sha1), p->pack_name); + if (check_sha1_signature(obj->sha1, src, buf_size, typename(type))) + die("packed %s from %s is corrupt", + sha1_to_hex(obj->sha1), p->pack_name); + + switch (type) { + case OBJ_COMMIT: + result = pv4_encode_commit(v4, src, &buf_size); + break; + case OBJ_TREE: + if (packed_type != OBJ_TREE) { + unsigned char sha1_buf[20], *ref_sha1; + void *ref; + enum object_type ref_type; + unsigned long ref_size; + + ref_sha1 = get_delta_base(p, obj->offset, sha1_buf); + if (!ref_sha1) + die("unable to get delta base sha1 for %s", + sha1_to_hex(obj->sha1)); + ref = read_sha1_file(ref_sha1, &ref_type, &ref_size); + if (!ref || ref_type != OBJ_TREE) + die("cannot obtain delta base for %s", + sha1_to_hex(obj->sha1)); + result = pv4_encode_tree(v4, src, &buf_size, + ref, ref_size, ref_sha1); + free(ref); + } else { + result = pv4_encode_tree(v4, src, &buf_size, + NULL, 0, NULL); + } + break; + default: + die("unexpected object type %d", type); + } + free(src); + if (!result) { + warning("can't convert %s object %s", + typename(type), sha1_to_hex(obj->sha1)); + /* fall back to copy the object in its original form */ + return copy_object_data(v4, f, p, obj->offset); + } + + /* Use bit 3 to indicate a special type encoding */ + type += 8; + hdrlen = write_object_header(f, type, obj_size); + sha1write(f, result, buf_size); + free(result); + return hdrlen + buf_size; +} + +static char *normalize_pack_name(const char *path) +{ + char buf[PATH_MAX]; + int len; + + len = strlcpy(buf, path, PATH_MAX); + if (len >= PATH_MAX - 6) + die("name too long: %s", path); + + /* + * In addition to "foo.idx" we accept "foo.pack" and "foo"; + * normalize these forms to "foo.pack". + */ + if (has_extension(buf, ".idx")) { + strcpy(buf + len - 4, ".pack"); + len++; + } else if (!has_extension(buf, ".pack")) { + strcpy(buf + len, ".pack"); + len += 5; + } + + return xstrdup(buf); +} + +static struct packed_git *open_pack(const char *path) +{ + char *packname = normalize_pack_name(path); + int len = strlen(packname); + struct packed_git *p; + + strcpy(packname + len - 5, ".idx"); + p = add_packed_git(packname, len - 1, 1); + if (!p) + die("packfile %s not found.", packname); + + install_packed_git(p); + if (open_pack_index(p)) + die("packfile %s index not opened", p->pack_name); + + free(packname); + return p; +} + +void process_one_pack(struct packv4_tables *v4, char *src_pack, char *dst_pack) +{ + struct packed_git *p; + struct sha1file *f; + struct pack_idx_entry *objs, **p_objs; + struct pack_idx_option idx_opts; + unsigned i, nr_objects; + off_t written = 0; + char *packname; + unsigned char pack_sha1[20]; + struct progress *progress_state; + + p = open_pack(src_pack); + if (!p) + die("unable to open source pack"); + + nr_objects = p->num_objects; + objs = get_packed_object_list(p); + p_objs = sort_objs_by_offset(objs, nr_objects); + + create_pack_dictionaries(v4, p, p_objs); + sort_dict_entries_by_hits(v4->commit_ident_table); + sort_dict_entries_by_hits(v4->tree_path_table); + + packname = normalize_pack_name(dst_pack); + f = packv4_open(packname); + if (!f) + die("unable to open destination pack"); + written += packv4_write_header(f, nr_objects); + written += packv4_write_tables(f, v4); + + /* Let's write objects out, updating the object index list in place */ + progress_state = start_progress("Writing objects", nr_objects); + v4->all_objs = objs; + v4->all_objs_nr = nr_objects; + for (i = 0; i < nr_objects; i++) { + off_t obj_pos = written; + struct pack_idx_entry *obj = p_objs[i]; + crc32_begin(f); + written += packv4_write_object(v4, f, p, obj); + obj->offset = obj_pos; + obj->crc32 = crc32_end(f); + display_progress(progress_state, i+1); + } + stop_progress(&progress_state); + + sha1close(f, pack_sha1, CSUM_CLOSE | CSUM_FSYNC); + + reset_pack_idx_option(&idx_opts); + idx_opts.version = 3; + strcpy(packname + strlen(packname) - 5, ".idx"); + write_idx_file(packname, p_objs, nr_objects, &idx_opts, pack_sha1); + + free(packname); +} + +static int git_pack_config(const char *k, const char *v, void *cb) +{ + if (!strcmp(k, "pack.compression")) { + int level = git_config_int(k, v); + if (level == -1) + level = Z_DEFAULT_COMPRESSION; + else if (level < 0 || level > Z_BEST_COMPRESSION) + die("bad pack compression level %d", level); + pack_compression_level = level; + pack_compression_seen = 1; + return 0; + } + return git_default_config(k, v, cb); +} + +int main(int argc, char *argv[]) +{ + struct packv4_tables v4; + char *src_pack, *dst_pack; + + if (argc == 3) { + src_pack = argv[1]; + dst_pack = argv[2]; + } else if (argc == 4 && !prefixcmp(argv[1], "--min-tree-copy=")) { + min_tree_copy = atoi(argv[1] + strlen("--min-tree-copy=")); + src_pack = argv[2]; + dst_pack = argv[3]; + } else { + fprintf(stderr, "Usage: %s [--min-tree-copy=<n>] <src_packfile> <dst_packfile>\n", argv[0]); + exit(1); + } + + git_config(git_pack_config, NULL); + if (!pack_compression_seen && core_compression_seen) + pack_compression_level = core_compression_level; + process_one_pack(&v4, src_pack, dst_pack); + if (0) + dict_dump(&v4); + return 0; +} -- 1.8.2.83.gc99314b -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html