The utilities for creating and querying pack files, for the fast-import of files, and for pruning a git repository were modified to support message digests (either in individual files or in 'mds' files that parallel pack index files). Signed-off-by: Bill Zaumen <bill.zaumen+git@xxxxxxxxx> --- builtin/count-objects.c | 92 +++++++++++++++++++++++++++++- builtin/index-pack.c | 139 +++++++++++++++++++++++++++++++++++++++++---- builtin/pack-objects.c | 81 ++++++++++++++++++++++++++- builtin/pack-redundant.c | 14 ++++- builtin/prune-packed.c | 21 ++++++- builtin/prune.c | 1 + builtin/verify-pack.c | 14 ++++- fast-import.c | 77 ++++++++++++++++++++++++- git-repack.sh | 12 +++- t/t5300-pack-object.sh | 17 +++++- t/t5301-sliding-window.sh | 14 ++++- t/t5302-pack-index.sh | 6 +- t/t5304-prune.sh | 13 +++-- t/t9300-fast-import.sh | 8 ++- 14 files changed, 467 insertions(+), 42 deletions(-) diff --git a/builtin/count-objects.c b/builtin/count-objects.c index c37cb98..47135e1 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -8,6 +8,12 @@ #include "dir.h" #include "builtin.h" #include "parse-options.h" +#include "mdsdb.h" + +int mdsmode = 0; +unsigned long has_loose_mds = 0; +unsigned long loose_mds_missing = 0; + static void count_objects(DIR *d, char *path, int len, int verbose, unsigned long *loose, @@ -53,20 +59,37 @@ static void count_objects(DIR *d, char *path, int len, int verbose, continue; } (*loose)++; - if (!verbose) + if (!verbose) { + if (mdsmode) { + if (get_sha1_hex(hex, sha1)) { + die("internal error"); + } else if (mdsdb_lookup(NULL, sha1, NULL) > 0) { + has_loose_mds++; + } else { + loose_mds_missing++; + } + } continue; + } memcpy(hex, path+len, 2); memcpy(hex+2, ent->d_name, 38); hex[40] = 0; if (get_sha1_hex(hex, sha1)) die("internal error"); + if (mdsmode) { + if (mdsdb_lookup(NULL, sha1, NULL) > 0) { + has_loose_mds++; + } else { + loose_mds_missing++; + } + } if (has_sha1_pack(sha1)) (*packed_loose)++; } } static char const * const count_objects_usage[] = { - "git count-objects [-v]", + "git count-objects [-v] [-M]", NULL }; @@ -80,6 +103,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) off_t loose_size = 0; struct option opts[] = { OPT__VERBOSE(&verbose, "be verbose"), + OPT_BOOLEAN('M', "count-md", &mdsmode, + "count MDs (Message Digests)"), OPT_END(), }; @@ -90,6 +115,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) memcpy(path, objdir, len); if (len && objdir[len-1] != '/') path[len++] = '/'; + mdsdb_open(NULL); for (i = 0; i < 256; i++) { DIR *d; sprintf(path + len, "%02x", i); @@ -100,10 +126,16 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) &loose, &loose_size, &packed_loose, &garbage); closedir(d); } + mdsdb_close(NULL); if (verbose) { struct packed_git *p; unsigned long num_pack = 0; off_t size_pack = 0; + unsigned long mds_mismatched = 0; + unsigned long missing_mdsfile_count = 0; + unsigned long mds_count = 0; + int wsize = 0; + mdigest_t digest; if (!packed_git) prepare_packed_git(); for (p = packed_git; p; p = p->next) { @@ -114,6 +146,40 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) packed += p->num_objects; size_pack += p->pack_size + p->index_size; num_pack++; + if (!mdsmode) + continue; + if (open_pack_mds(p)) { + missing_mdsfile_count++; + continue; + } + /* + * Assume mds version 1 for now. We check that + * the mds file has the right size and record if it + * doesn't. If it is the right size, we go through + * all the entries and count the number of sha1 hashes + * for which there is a recorded CRC. We do not + * check if the CRC is the right one for the + * corresponding object: run git pack-verify to do + * that. + */ + if (p->mds_size > 7) { + wsize = ((unsigned char *)(p->mds_data))[7] * 4; + } + if (p->mds_size == (size_t)8 + + (((size_t) + ((p->num_objects)/4 + (p->num_objects % 4 != 0)) + * (size_t)4 * (size_t)(1 + wsize)) + + (size_t)(20 * 2))) { + for (i = 0; i < p->num_objects; i++) { + mds_count += + (nth_packed_object_mdigest(p, + i, + &digest) + == 1); + } + } else { + mds_mismatched++; + } } printf("count: %lu\n", loose); printf("size: %lu\n", (unsigned long) (loose_size / 1024)); @@ -122,9 +188,31 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) printf("size-pack: %lu\n", (unsigned long) (size_pack / 1024)); printf("prune-packable: %lu\n", packed_loose); printf("garbage: %lu\n", garbage); + if (mdsmode) { + if (missing_mdsfile_count) { + printf("missing MD (Message Digest) " + "files: %lu\n", + missing_mdsfile_count); + } + if (mds_mismatched) + printf("MD (Message Digest) files with" + " wrong size: %lu " + "(file extension = .mds)\n", + mds_mismatched); + if (packed != mds_count) { + printf("missing MD (Message Digest)" + " count: %lu\n", + packed - mds_count); + } + } } else printf("%lu objects, %lu kilobytes\n", loose, (unsigned long) (loose_size / 1024)); + if (mdsmode && loose_mds_missing) { + assert(loose == (loose_mds_missing + has_loose_mds)); + printf("%lu loose objects with no MD (Message Digest)\n", + loose_mds_missing); + } return 0; } diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 98025da..127f879 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1,3 +1,4 @@ +#include <unistd.h> #include "builtin.h" #include "delta.h" #include "pack.h" @@ -23,6 +24,14 @@ struct object_entry { int base_object_no; }; +static int sha1_compare(const void *_a, const void *_b) +{ + struct object_entry *a = (struct object_entry *)_a; + struct object_entry *b = (struct object_entry *)_b; + return hashcmp(a->idx.sha1, b->idx.sha1); +} + + union delta_base { unsigned char sha1[20]; off_t offset; @@ -447,9 +456,10 @@ static void find_delta_children(const union delta_base *base, } static void sha1_object(const void *data, unsigned long size, - enum object_type type, unsigned char *sha1) + enum object_type type, unsigned char *sha1, + mdigest_t *digestp) { - hash_sha1_file(data, size, typename(type), sha1); + hash_sha1_file_extended(data, size, typename(type), sha1, digestp); if (has_sha1_file(sha1)) { void *has_data; enum object_type has_type; @@ -549,7 +559,8 @@ static void resolve_delta(struct object_entry *delta_obj, if (!result->data) bad_object(delta_obj->idx.offset, "failed to apply delta"); sha1_object(result->data, result->size, delta_obj->real_type, - delta_obj->idx.sha1); + delta_obj->idx.sha1, &(delta_obj->idx.digest)); + delta_obj->idx.has_digest = 1; nr_resolved_deltas++; } @@ -643,8 +654,12 @@ static void parse_pack_objects(unsigned char *sha1) nr_deltas++; delta->obj_no = i; delta++; - } else - sha1_object(data, obj->size, obj->type, obj->idx.sha1); + } else { + sha1_object(data, obj->size, obj->type, obj->idx.sha1, + &(obj->idx.digest)); + obj->idx.has_digest = 1; + } + free(data); display_progress(progress, i+1); } @@ -804,6 +819,7 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved) static void final(const char *final_pack_name, const char *curr_pack_name, const char *final_index_name, const char *curr_index_name, + const char *final_mds_name, const char *curr_mds_name, const char *keep_name, const char *keep_msg, unsigned char *sha1) { @@ -866,6 +882,18 @@ static void final(const char *final_pack_name, const char *curr_pack_name, } else chmod(final_index_name, 0444); + if (final_mds_name != curr_mds_name) { + if (!final_mds_name) { + snprintf(name, sizeof(name), "%s/pack/pack-%s.mds", + get_object_directory(), sha1_to_hex(sha1)); + final_mds_name = name; + } + if (move_temp_to_file(curr_mds_name, final_mds_name)) + die("cannot store mds file"); + } else + chmod(final_mds_name, 0444); + + if (!from_stdin) { printf("%s\n", sha1_to_hex(sha1)); } else { @@ -972,18 +1000,46 @@ static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) free(p); } -static void show_pack_info(int stat_only) +static void show_pack_info(int stat, int stat_only, int show_mds, + int mds_file_exists, const char *path) { int i, baseobjects = nr_objects - nr_deltas; unsigned long *chain_histogram = NULL; + void *data = NULL; + size_t mds_size = 0; + struct packed_git pg; + + if (mds_file_exists) { + int fd = git_open_noatime(path); + size_t required_size = 0; + struct stat st; + if (fd >= 0) { + if (fstat(fd, &st)) { + close(fd); + } else { + mds_size = xsize_t(st.st_size); + data = xmmap(NULL, mds_size, + PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + required_size = required_git_packed_mds_size + (path, data, nr_objects, mds_size); + if (required_size == 0) { + munmap(data, mds_size); + data = NULL; + } + } + } + if (data == NULL) mds_file_exists = 0; + pg.mds_data = data; + } - if (deepest_delta) + if (stat && deepest_delta) chain_histogram = xcalloc(deepest_delta, sizeof(unsigned long)); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - if (is_delta_type(obj->type)) + if (chain_histogram && is_delta_type(obj->type)) chain_histogram[obj->delta_depth - 1]++; if (stat_only) continue; @@ -992,12 +1048,41 @@ static void show_pack_info(int stat_only) typename(obj->real_type), obj->size, (unsigned long)(obj[1].idx.offset - obj->idx.offset), (uintmax_t)obj->idx.offset); + if (show_mds) { + if (mds_file_exists) { + mdigest_t digest; + int has_digest = nth_packed_object_mdigest + (&pg, i, &digest); + if (has_digest) { + printf(" md=%s", + mdigest_to_external_hex + (&digest)); + if (obj->idx.has_digest) { + if (mdigest_tst + (&digest, + &(obj->idx.digest))) { + printf + (" (should be %s) ", + mdigest_to_external_hex + (&(obj->idx.digest))); + } + } + } else { + printf(" <no md> "); + } + } else { + printf(" <no md> "); + } + } if (is_delta_type(obj->type)) { struct object_entry *bobj = &objects[obj->base_object_no]; printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1)); } putchar('\n'); } + if (data) munmap(data, mds_size); + if (!stat) + return; if (baseobjects) printf("non delta: %d object%s\n", @@ -1015,10 +1100,12 @@ static void show_pack_info(int stat_only) int cmd_index_pack(int argc, const char **argv, const char *prefix) { int i, fix_thin_pack = 0, verify = 0, stat_only = 0, stat = 0; - const char *curr_pack, *curr_index; - const char *index_name = NULL, *pack_name = NULL; + int show_mds = 0; + const char *curr_pack, *curr_index, *curr_mds; + const char *index_name = NULL, *pack_name = NULL, *mds_name = NULL;; const char *keep_name = NULL, *keep_msg = NULL; char *index_name_buf = NULL, *keep_name_buf = NULL; + char *mds_name_buf = NULL; struct pack_idx_entry **idx_objects; struct pack_idx_option opts; unsigned char pack_sha1[20]; @@ -1052,6 +1139,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) verify = 1; stat = 1; stat_only = 1; + } else if (!strcmp(arg, "-M") || + !strcmp(arg, "--show-mds")) { + verify = 1; + show_mds = 1; } else if (!strcmp(arg, "--keep")) { keep_msg = ""; } else if (!prefixcmp(arg, "--keep=")) { @@ -1075,6 +1166,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (index_name || (i+1) >= argc) usage(index_pack_usage); index_name = argv[++i]; + } else if (!strcmp(arg, "-m")) { + if (mds_name || (i+1) >= argc) + usage(index_pack_usage); + mds_name = argv[++i]; } else if (!prefixcmp(arg, "--index-version=")) { char *c; opts.version = strtoul(arg + 16, &c, 10); @@ -1108,6 +1203,16 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) strcpy(index_name_buf + len - 5, ".idx"); index_name = index_name_buf; } + if (!mds_name && pack_name) { + int len = strlen(pack_name); + if (!has_extension(pack_name, ".pack")) + die("packfile name '%s' does not end with '.pack'", + pack_name); + mds_name_buf = xmalloc(len); + memcpy(mds_name_buf, pack_name, len - 5); + strcpy(mds_name_buf + len - 5, ".mds"); + mds_name = mds_name_buf; + } if (keep_msg && !keep_name && pack_name) { int len = strlen(pack_name); if (!has_extension(pack_name, ".pack")) @@ -1170,24 +1275,34 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (strict) check_objects(); - if (stat) - show_pack_info(stat_only); + if (stat || show_mds) { + int mds_file_exists = !access(mds_name, R_OK); + if (mds_file_exists && show_mds) { + qsort (objects, nr_objects, sizeof (struct object_entry), + sha1_compare); + } + show_pack_info(stat, stat_only, show_mds, mds_file_exists, + mds_name); + } idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *)); for (i = 0; i < nr_objects; i++) idx_objects[i] = &objects[i].idx; curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1); + curr_mds = write_mds_file(mds_name, idx_objects, nr_objects, &opts,pack_sha1); free(idx_objects); if (!verify) final(pack_name, curr_pack, index_name, curr_index, + mds_name, curr_mds, keep_name, keep_msg, pack_sha1); else close(input_fd); free(objects); free(index_name_buf); + free(mds_name_buf); free(keep_name_buf); if (pack_name == NULL) free((void *) curr_pack); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 96c1680..ccfe824 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -17,6 +17,7 @@ #include "progress.h" #include "refs.h" #include "thread-utils.h" +#include "mdsdb.h" static const char pack_usage[] = "git pack-objects [ -q | --progress | --all-progress ]\n" @@ -560,6 +561,8 @@ static struct object_entry **compute_write_order(void) objects[i].filled = 0; objects[i].delta_child = NULL; objects[i].delta_sibling = NULL; + objects[i].idx.has_digest = 0; + mdigest_clear(&objects[i].idx.digest); } /* @@ -684,8 +687,28 @@ static void write_pack_file(void) if (!pack_to_stdout) { struct stat st; +#if 1 char tmpname[PATH_MAX]; +#else + const char *idx_tmp_name; + const char *mds_tmp_name; + char tmpname[PATH_MAX]; + + idx_tmp_name = write_idx_file(NULL, written_list, nr_written, + &pack_idx_opts, sha1); + mds_tmp_name = write_mds_file(NULL, written_list, nr_written, + &pack_idx_opts, sha1); + + snprintf(tmpname, sizeof(tmpname), "%s-%s.pack", + base_name, sha1_to_hex(sha1)); + free_pack_by_name(tmpname); + if (adjust_shared_perm(pack_tmp_name)) + die_errno("unable to make temporary pack file readable"); + if (rename(pack_tmp_name, tmpname)) + die_errno("unable to rename temporary pack file"); +#endif + /* * Packs are runtime accessed in their mtime * order since newer packs are more likely to contain @@ -707,6 +730,7 @@ static void write_pack_file(void) tmpname, strerror(errno)); } +#if 1 /* Enough space for "-<sha-1>.pack"? */ if (sizeof(tmpname) <= strlen(base_name) + 50) die("pack base name '%s' too long", base_name); @@ -714,6 +738,25 @@ static void write_pack_file(void) finish_tmp_packfile(tmpname, pack_tmp_name, written_list, nr_written, &pack_idx_opts, sha1); +#else + snprintf(tmpname, sizeof(tmpname), "%s-%s.idx", + base_name, sha1_to_hex(sha1)); + if (adjust_shared_perm(idx_tmp_name)) + die_errno("unable to make temporary index file readable"); + if (rename(idx_tmp_name, tmpname)) + die_errno("unable to rename temporary index file"); + + snprintf(tmpname, sizeof(tmpname), "%s-%s.mds", + base_name, sha1_to_hex(sha1)); + if (adjust_shared_perm(mds_tmp_name)) + die_errno("unable to make temporary mds file readable"); + if (rename(mds_tmp_name, tmpname)) + die_errno("unable to rename temporary mds file"); + + + free((void *) idx_tmp_name); + free((void *) mds_tmp_name); +#endif free(pack_tmp_name); puts(sha1_to_hex(sha1)); } @@ -830,6 +873,8 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type, off_t found_offset = 0; int ix; unsigned hash = name_hash(name); + int hasdigest; + mdigest_t digest; ix = nr_objects ? locate_object_entry_hash(sha1) : -1; if (ix >= 0) { @@ -846,7 +891,11 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type, return 0; for (p = packed_git; p; p = p->next) { - off_t offset = find_pack_entry_one(sha1, p); + off_t offset; + hasdigest = 0; + mdigest_clear(&digest); + offset = find_pack_entry_one_extended(sha1, p, + &hasdigest, &digest); if (offset) { if (!found_pack) { if (!is_pack_valid(p)) { @@ -874,7 +923,37 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type, entry = objects + nr_objects++; memset(entry, 0, sizeof(*entry)); + if (hasdigest == 0) { + /* + * We pick up MDs for local objects (we already checked the + * pack files). If that doesn't work, we compute it from + * scratch (which should occur rarely if at all). + */ + mdsdb_open(NULL); + switch (mdsdb_lookup(NULL, sha1, &digest)) { + case 1: + hasdigest = 1; + break; + default: + hasdigest = 0; + } + mdsdb_close(NULL); + if (!hasdigest) { + enum object_type type; + unsigned long size; + unsigned char sbuf[20]; + void *buf = read_sha1_file(sha1, &type, &size); + if (buf) { + const char *stype = typename(type); + hash_sha1_file_extended(buf, size, stype, sbuf, + &digest); + hasdigest = 1; + } + } + } hashcpy(entry->idx.sha1, sha1); + entry->idx.has_digest = hasdigest; + entry->idx.digest = digest; entry->hash = hash; if (type) entry->type = type; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index f5c6afc..c09397c 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -6,6 +6,7 @@ * */ +#include <unistd.h> #include "builtin.h" #define BLKSIZE 512 @@ -682,9 +683,16 @@ int cmd_pack_redundant(int argc, const char **argv, const char *prefix) } pl = red = pack_list_difference(local_packs, min); while (pl) { - printf("%s\n%s\n", - sha1_pack_index_name(pl->pack->sha1), - pl->pack->pack_name); + char *mdsfile = sha1_pack_mds_name(pl->pack->sha1); + if (!access(mdsfile, F_OK)) { + printf("%s\n%s\n%s\n", mdsfile, + sha1_pack_index_name(pl->pack->sha1), + pl->pack->pack_name); + } else { + printf("%s\n%s\n", + sha1_pack_index_name(pl->pack->sha1), + pl->pack->pack_name); + } pl = pl->next; } if (verbose) diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c index f9463de..ec5dfe8 100644 --- a/builtin/prune-packed.c +++ b/builtin/prune-packed.c @@ -43,8 +43,11 @@ void prune_packed_objects(int opts) { int i; static char pathname[PATH_MAX]; + static char mds_pathname[PATH_MAX]; const char *dir = get_object_directory(); + const char *mdsdir = get_object_mds_directory(); int len = strlen(dir); + int mdslen = strlen(mdsdir); if (opts == VERBOSE) progress = start_progress_delay("Removing duplicate objects", @@ -55,16 +58,26 @@ void prune_packed_objects(int opts) memcpy(pathname, dir, len); if (len && pathname[len-1] != '/') pathname[len++] = '/'; + memcpy(mds_pathname, mdsdir, mdslen); + if (mdslen && mds_pathname[mdslen-1] != '/') + mds_pathname[mdslen++] = '/'; for (i = 0; i < 256; i++) { DIR *d; + DIR *mds_d; display_progress(progress, i + 1); sprintf(pathname + len, "%02x/", i); d = opendir(pathname); - if (!d) - continue; - prune_dir(i, d, pathname, len + 3, opts); - closedir(d); + sprintf(mds_pathname + len, "%02x/", i); + mds_d = opendir(mds_pathname); + if (d) { + prune_dir(i, d, pathname, len + 3, opts); + closedir(d); + } + if (mds_d) { + prune_dir(i, mds_d, mds_pathname, mdslen + 3, opts); + closedir(mds_d); + } } stop_progress(&progress); } diff --git a/builtin/prune.c b/builtin/prune.c index 58d7cb8..25dde51 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -165,6 +165,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix) mark_reachable_objects(&revs, 1, progress); stop_progress(&progress); prune_object_dir(get_object_directory()); + prune_object_dir(get_object_mds_directory()); prune_packed_objects(show_only); remove_temporary_files(get_object_directory()); diff --git a/builtin/verify-pack.c b/builtin/verify-pack.c index e841b4a..b94a11e 100644 --- a/builtin/verify-pack.c +++ b/builtin/verify-pack.c @@ -5,14 +5,16 @@ #define VERIFY_PACK_VERBOSE 01 #define VERIFY_PACK_STAT_ONLY 02 +#define SHOW_MDS 04 static int verify_one_pack(const char *path, unsigned int flags) { struct child_process index_pack; - const char *argv[] = {"index-pack", NULL, NULL, NULL }; + const char *argv[] = {"index-pack", NULL, NULL, NULL, NULL }; struct strbuf arg = STRBUF_INIT; int verbose = flags & VERIFY_PACK_VERBOSE; int stat_only = flags & VERIFY_PACK_STAT_ONLY; + int show_mds = ((flags & SHOW_MDS) != 0) && !stat_only; int err; if (stat_only) @@ -22,6 +24,8 @@ static int verify_one_pack(const char *path, unsigned int flags) else argv[1] = "--verify"; + if (show_mds) argv[2] = "-M"; + /* * In addition to "foo.pack" we accept "foo.idx" and "foo"; * normalize these forms to "foo.pack" for "index-pack --verify". @@ -31,7 +35,7 @@ static int verify_one_pack(const char *path, unsigned int flags) strbuf_splice(&arg, arg.len - 3, 3, "pack", 4); else if (!has_extension(arg.buf, ".pack")) strbuf_add(&arg, ".pack", 5); - argv[2] = arg.buf; + argv[2 + show_mds] = arg.buf; memset(&index_pack, 0, sizeof(index_pack)); index_pack.argv = argv; @@ -46,6 +50,10 @@ static int verify_one_pack(const char *path, unsigned int flags) if (!stat_only) printf("%s: ok\n", arg.buf); } + } else if (show_mds) { + printf("%s: listed (%s)\n-----------------\n", arg.buf, + (err? "bad": "ok")); + } strbuf_release(&arg); @@ -67,6 +75,8 @@ int cmd_verify_pack(int argc, const char **argv, const char *prefix) VERIFY_PACK_VERBOSE), OPT_BIT('s', "stat-only", &flags, "show statistics only", VERIFY_PACK_STAT_ONLY), + OPT_BIT('M', "show-mds", &flags, "show message digests / CRCs", + SHOW_MDS), OPT_END() }; diff --git a/fast-import.c b/fast-import.c index 4b9c4b7..f672a76 100644 --- a/fast-import.c +++ b/fast-import.c @@ -165,6 +165,11 @@ Format of STDIN stream: #include "exec_cmd.h" #include "dir.h" +#ifdef PACKDB +#include "packdb.h" +#endif + + #define PACK_ID_BITS 16 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1) #define DEPTH_BITS 13 @@ -558,6 +563,8 @@ static struct object_entry *new_object(unsigned char *sha1) e = blocks->next_free++; hashcpy(e->idx.sha1, sha1); + e->idx.has_digest = 0; + mdigest_clear(&e->idx.digest); return e; } @@ -904,9 +911,34 @@ static const char *create_index(void) return tmpfile; } -static char *keep_pack(const char *curr_index_name) +static const char *create_mds(void) +{ + const char *tmpfile; + struct pack_idx_entry **mds, **c, **last; + struct object_entry *e; + struct object_entry_pool *o; + + /* Build the table of object IDs. */ + mds = xmalloc(object_count * sizeof(*mds)); + c = mds; + for (o = blocks; o; o = o->next_pool) + for (e = o->next_free; e-- != o->entries;) + if (pack_id == e->pack_id) + *c++ = &e->idx; + last = mds + object_count; + if (c != last) + die("internal consistency error creating the mds file"); + + tmpfile = write_mds_file(NULL, mds, object_count, &pack_idx_opts, pack_data->sha1); + free(mds); + return tmpfile; +} + + +static char *keep_pack(const char *curr_index_name, const char *curr_mds_name) { static char name[PATH_MAX]; + static char rname[PATH_MAX]; static const char *keep_msg = "fast-import"; int keep_fd; @@ -927,6 +959,13 @@ static char *keep_pack(const char *curr_index_name) if (move_temp_to_file(curr_index_name, name)) die("cannot store index file"); free((void *)curr_index_name); + + snprintf(rname, sizeof(rname), "%s/pack/pack-%s.mds", + get_object_directory(), sha1_to_hex(pack_data->sha1)); + if (move_temp_to_file(curr_mds_name, rname)) + die("cannot store index file"); + free((void *)curr_mds_name); + return name; } @@ -951,6 +990,7 @@ static void end_packfile(void) if (object_count) { unsigned char cur_pack_sha1[20]; char *idx_name; + const char *n1, *n2; int i; struct branch *b; struct tag *t; @@ -961,7 +1001,9 @@ static void end_packfile(void) pack_data->pack_name, object_count, cur_pack_sha1, pack_size); close(pack_data->pack_fd); - idx_name = keep_pack(create_index()); + n1 = create_index(); + n2 = create_mds(); + idx_name = keep_pack(n1, n2); /* Register the packfile with core git's machinery. */ new_p = add_packed_git(idx_name, strlen(idx_name), 1); @@ -1021,6 +1063,8 @@ static int store_object( unsigned long hdrlen, deltalen; git_SHA_CTX c; git_zstream s; + mdigest_t digest; + mdigest_context_t mdc; hdrlen = sprintf((char *)hdr,"%s %lu", typename(type), (unsigned long)dat->len) + 1; @@ -1028,10 +1072,28 @@ static int store_object( git_SHA1_Update(&c, hdr, hdrlen); git_SHA1_Update(&c, dat->buf, dat->len); git_SHA1_Final(sha1, &c); + mdigest_Init(&mdc, MDIGEST_DEFAULT); + mdigest_Update(&mdc, (unsigned char *)(dat->buf), dat->len); + mdigest_Final(&digest, &mdc); + + if (has_sha1_file(sha1)) { + mdigest_t old_digest; + if (has_sha1_file_digest(sha1, &old_digest)) { + if (mdigest_tst(&digest,&old_digest)) { + die("hash collision on %s [fast-import]", + sha1_to_hex(sha1)); + } + } + } if (sha1out) hashcpy(sha1out, sha1); e = insert_object(sha1); + e->idx.has_digest = 1; + e->idx.digest = digest; +#ifdef PACKDB + packdb_process(sha1, &digest); +#endif if (mark) insert_mark(mark, e); if (e->idx.offset) { @@ -1157,6 +1219,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) unsigned char *out_buf = xmalloc(out_sz); struct object_entry *e; unsigned char sha1[20]; + mdigest_t digest; + mdigest_context_t mdc; unsigned long hdrlen; off_t offset; git_SHA_CTX c; @@ -1177,6 +1241,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) die("impossibly large object header"); git_SHA1_Init(&c); + mdigest_Init(&mdc, MDIGEST_DEFAULT); git_SHA1_Update(&c, out_buf, hdrlen); crc32_begin(pack_file); @@ -1199,6 +1264,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) die("EOF in data (%" PRIuMAX " bytes remaining)", len); git_SHA1_Update(&c, in_buf, n); + mdigest_Update(&mdc, in_buf, n); s.next_in = in_buf; s.avail_in = n; len -= n; @@ -1225,11 +1291,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) } git_deflate_end(&s); git_SHA1_Final(sha1, &c); + mdigest_Final(&digest, &mdc); if (sha1out) hashcpy(sha1out, sha1); e = insert_object(sha1); + e->idx.has_digest = 1; + e->idx.digest = digest; if (mark) insert_mark(mark, e); @@ -1828,6 +1897,8 @@ static void read_marks(void) if (type < 0) die("object not found: %s", sha1_to_hex(sha1)); e = insert_object(sha1); + e->idx.has_digest = + has_sha1_file_digest(sha1, &e->idx.digest); e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -2896,6 +2967,8 @@ static struct object_entry *dereference(struct object_entry *oe, die("object not found: %s", sha1_to_hex(sha1)); /* cache it! */ oe = insert_object(sha1); + oe->idx.has_digest = + has_sha1_file_digest(sha1, &oe->idx.digest); oe->type = type; oe->pack_id = MAX_PACK_ID; oe->idx.offset = 1; diff --git a/git-repack.sh b/git-repack.sh index 624feec..7602853 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -91,6 +91,7 @@ if [ -z "$names" ]; then say Nothing new to pack. fi + # Ok we have prepared all new packfiles. # First see if there are packs of the same name and if so @@ -100,7 +101,7 @@ rollback= failed= for name in $names do - for sfx in pack idx + for sfx in pack idx mds do file=pack-$name.$sfx test -f "$PACKDIR/$file" || continue @@ -148,15 +149,22 @@ do fullbases="$fullbases pack-$name" chmod a-w "$PACKTMP-$name.pack" chmod a-w "$PACKTMP-$name.idx" + (chmod a-w "$PACKTMP-$name.mds" 2>/dev/null || exit 0 ) mv -f "$PACKTMP-$name.pack" "$PACKDIR/pack-$name.pack" && mv -f "$PACKTMP-$name.idx" "$PACKDIR/pack-$name.idx" || exit + if test -f "$PACKTMP-$name.mds" + then + mv -f "$PACKTMP-$name.mds" "$PACKDIR/pack-$name.mds" \ + 2>/dev/null || exit + fi done # Remove the "old-" files for name in $names do rm -f "$PACKDIR/old-pack-$name.idx" + rm -f "$PACKDIR/old-pack-$name.mds" rm -f "$PACKDIR/old-pack-$name.pack" done @@ -172,7 +180,7 @@ then do case " $fullbases " in *" $e "*) ;; - *) rm -f "$e.pack" "$e.idx" "$e.keep" ;; + *) rm -f "$e.pack" "$e.idx" "$e.mds" "$e.keep" ;; esac done ) diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index 602806d..1b72d46 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -54,7 +54,7 @@ cd "$TRASH/.git2" test_expect_success \ 'check unpack without delta' \ - '(cd ../.git && find objects -type f -print) | + '(cd ../.git && find objects -type f -print) | grep -v mdsd | grep -v packdb | while read path do cmp $path ../.git/$path || { @@ -84,7 +84,7 @@ unset GIT_OBJECT_DIRECTORY cd "$TRASH/.git2" test_expect_success \ 'check unpack with REF_DELTA' \ - '(cd ../.git && find objects -type f -print) | + '(cd ../.git && find objects -type f -print) | grep -v mdsd | grep -v packdb | while read path do cmp $path ../.git/$path || { @@ -114,7 +114,7 @@ unset GIT_OBJECT_DIRECTORY cd "$TRASH/.git2" test_expect_success \ 'check unpack with OFS_DELTA' \ - '(cd ../.git && find objects -type f -print) | + '(cd ../.git && find objects -type f -print) | grep -v mdsd | grep -v packdb | while read path do cmp $path ../.git/$path || { @@ -211,6 +211,17 @@ test_expect_success \ test-3-${packname_3}.idx' test_expect_success \ + 'verify pack -v -M' \ + 'test -z "`git verify-pack -v -M test-1-${packname_1}.idx \ + test-2-${packname_2}.idx \ + test-3-${packname_3}.idx | grep \<no\ md\>`" && + test 0 != `git verify-pack -v -M test-1-${packname_1}.idx | grep md= | wc -l` && + test -z "`git verify-pack -v -M test-1-${packname_1}.idx | grep "should be"`" && + (x=`git verify-pack -v -M test-1-${packname_1}.idx | wc -l` + y=`git verify-pack -v -M test-1-${packname_1}.idx |grep -v \<no\ md\> | wc -l` + test $x = $y)' + +test_expect_success \ 'verify-pack catches mismatched .idx and .pack files' \ 'cat test-1-${packname_1}.idx >test-3.idx && cat test-2-${packname_2}.pack >test-3.pack && diff --git a/t/t5301-sliding-window.sh b/t/t5301-sliding-window.sh index 2fc5af6..ec0d72f 100755 --- a/t/t5301-sliding-window.sh +++ b/t/t5301-sliding-window.sh @@ -22,13 +22,19 @@ test_expect_success \ git repack -a -d && test "`git count-objects`" = "0 objects, 0 kilobytes" && pack1=`ls .git/objects/pack/*.pack` && - test -f "$pack1"' + test -f "$pack1" && + test -z "`git count-objects -v -M | grep MD`"' test_expect_success \ 'verify-pack -v, defaults' \ 'git verify-pack -v "$pack1"' test_expect_success \ + 'verify-pack -v -M, defaults' \ + 'git verify-pack -v -M "$pack1" | grep "<no md>" > tmp + test -z "`cat tmp`"' + +test_expect_success \ 'verify-pack -v, packedGitWindowSize == 1 page' \ 'git config core.packedGitWindowSize 512 && git verify-pack -v "$pack1"' @@ -49,12 +55,14 @@ test_expect_success \ test "`git count-objects`" = "0 objects, 0 kilobytes" && pack2=`ls .git/objects/pack/*.pack` && test -f "$pack2" && - test "$pack1" \!= "$pack2"' + test "$pack1" \!= "$pack2" && + test -z "`git count-objects -v -M | grep MD`"' test_expect_success \ 'verify-pack -v, defaults' \ 'git config --unset core.packedGitWindowSize && git config --unset core.packedGitLimit && - git verify-pack -v "$pack2"' + git verify-pack -v "$pack2" && + test -z "`git count-objects -v -M | grep MD`"' test_done diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index f8fa924..da10200 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -37,12 +37,14 @@ test_expect_success \ test_expect_success \ 'pack-objects with index version 1' \ 'pack1=$(git pack-objects --index-version=1 test-1 <obj-list) && - git verify-pack -v "test-1-${pack1}.pack"' + git verify-pack -v "test-1-${pack1}.pack" && + test -z "`git count-objects -v -M | grep MD`"' test_expect_success \ 'pack-objects with index version 2' \ 'pack2=$(git pack-objects --index-version=2 test-2 <obj-list) && - git verify-pack -v "test-2-${pack2}.pack"' + git verify-pack -v "test-2-${pack2}.pack" && + test -z "`git count-objects -v -M | grep MD`"' test_expect_success \ 'both packs should be identical' \ diff --git a/t/t5304-prune.sh b/t/t5304-prune.sh index d645328..86075a7 100755 --- a/t/t5304-prune.sh +++ b/t/t5304-prune.sh @@ -37,7 +37,8 @@ test_expect_success 'prune stale packs' ' git prune --expire 1.day && test -f $orig_pack && test -f .git/objects/tmp_2.pack && - ! test -f .git/objects/tmp_1.pack + ! test -f .git/objects/tmp_1.pack && + test -z "`git count-objects -v -M | grep MD`" ' @@ -50,7 +51,8 @@ test_expect_success 'prune --expire' ' test-chmtime =-86500 $BLOB_FILE && git prune --expire 1.day && test $before = $(git count-objects | sed "s/ .*//") && - ! test -f $BLOB_FILE + ! test -f $BLOB_FILE && + test -z "`git count-objects -v -M | grep MD`" ' @@ -64,7 +66,8 @@ test_expect_success 'gc: implicit prune --expire' ' test-chmtime =-$((2*$week+1)) $BLOB_FILE && git gc && test $before = $(git count-objects | sed "s/ .*//") && - ! test -f $BLOB_FILE + ! test -f $BLOB_FILE && + test -z "`git count-objects -v -M | grep MD`" ' @@ -78,8 +81,8 @@ test_expect_success 'gc: refuse to start with invalid gc.pruneExpire' ' test_expect_success 'gc: start with ok gc.pruneExpire' ' git config gc.pruneExpire 2.days.ago && - git gc - + git gc && + test -z "`git count-objects -v -M | grep MD`" ' test_expect_success 'prune: prune nonsense parameters' ' diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 438aaf6..63b4a13 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -109,6 +109,12 @@ test_expect_success \ 'A: verify pack' \ 'for p in .git/objects/pack/*.pack;do git verify-pack $p||exit;done' +test_expect_success \ + 'A: verify pack -v -M --- all objects have CRCs' \ + 'for p in .git/objects/pack/*.pack; + do git verify-pack -v -M $p | grep "<no md>" > tmp; + test -z "`cat tmp`" || exit; done' + cat >expect <<EOF author $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE @@ -1504,7 +1510,7 @@ INPUT_END test_expect_success \ 'O: blank lines not necessary after other commands' \ 'git fast-import <input && - test 8 = `find .git/objects/pack -type f | wc -l` && + test 8 = `find .git/objects/pack -type f | grep -v .mds | wc -l` && test `git rev-parse refs/tags/O3-2nd` = `git rev-parse O3^` && git log --reverse --pretty=oneline O3 | sed s/^.*z// >actual && test_cmp expect actual' -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html