blob content is not used by verify_pack caller (currently only fsck), we only need to make sure blob sha-1 signature matches its content. unpack_entry() is taught to hash pack entry as it is unpacked, eliminating the need to keep whole blob in memory. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> --- cache.h | 2 +- fast-import.c | 2 +- pack-check.c | 21 ++++++++++++++++++++- sha1_file.c | 45 +++++++++++++++++++++++++++++++++++---------- t/t1050-large.sh | 2 +- 5 files changed, 58 insertions(+), 14 deletions(-) diff --git a/cache.h b/cache.h index 6ce691b..33bfb69 100644 --- a/cache.h +++ b/cache.h @@ -1065,7 +1065,7 @@ extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t); extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *); extern int is_pack_valid(struct packed_git *); -extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *); +extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *, unsigned char *); extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); diff --git a/fast-import.c b/fast-import.c index 6cd19e5..5e94a64 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1303,7 +1303,7 @@ static void *gfi_unpack_entry( */ p->pack_size = pack_size + 20; } - return unpack_entry(p, oe->idx.offset, &type, sizep); + return unpack_entry(p, oe->idx.offset, &type, sizep, NULL); } static const char *get_mode(const char *str, uint16_t *modep) diff --git a/pack-check.c b/pack-check.c index 63a595c..1920bdb 100644 --- a/pack-check.c +++ b/pack-check.c @@ -105,6 +105,7 @@ static int verify_packfile(struct packed_git *p, void *data; enum object_type type; unsigned long size; + off_t curpos = entries[i].offset; if (p->index_version > 1) { off_t offset = entries[i].offset; @@ -116,7 +117,25 @@ static int verify_packfile(struct packed_git *p, sha1_to_hex(entries[i].sha1), p->pack_name, (uintmax_t)offset); } - data = unpack_entry(p, entries[i].offset, &type, &size); + type = unpack_object_header(p, w_curs, &curpos, &size); + unuse_pack(w_curs); + if (type == OBJ_BLOB) { + unsigned char sha1[20]; + data = unpack_entry(p, entries[i].offset, &type, &size, sha1); + if (!data) { + if (hashcmp(entries[i].sha1, sha1)) + err = error("packed %s from %s is corrupt", + sha1_to_hex(entries[i].sha1), p->pack_name); + else if (fn) { + int eaten = 0; + fn(entries[i].sha1, type, size, NULL, &eaten); + } + if (((base_count + i) & 1023) == 0) + display_progress(progress, base_count + i); + continue; + } + } + data = unpack_entry(p, entries[i].offset, &type, &size, NULL); if (!data) err = error("cannot unpack %s from %s at offset %"PRIuMAX"", sha1_to_hex(entries[i].sha1), p->pack_name, diff --git a/sha1_file.c b/sha1_file.c index a77ef0a..d68a5b0 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1653,28 +1653,51 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset, } static void *unpack_compressed_entry(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos, - unsigned long size) + struct pack_window **w_curs, + off_t curpos, + unsigned long size, + enum object_type type, + unsigned char *sha1) { + static unsigned char fixed_buf[8192]; int st; git_zstream stream; unsigned char *buffer, *in; + git_SHA_CTX c; + + if (sha1) { /* do hash_sha1_file internally */ + char hdr[32]; + int hdrlen = sprintf(hdr, "%s %lu", typename(type), size)+1; + git_SHA1_Init(&c); + git_SHA1_Update(&c, hdr, hdrlen); + + buffer = fixed_buf; + } else { + buffer = xmallocz(size); + } - buffer = xmallocz(size); memset(&stream, 0, sizeof(stream)); stream.next_out = buffer; - stream.avail_out = size + 1; + stream.avail_out = buffer == fixed_buf ? sizeof(fixed_buf) : size + 1; git_inflate_init(&stream); do { in = use_pack(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; st = git_inflate(&stream, Z_FINISH); - if (!stream.avail_out) + if (sha1) { + git_SHA1_Update(&c, buffer, stream.next_out - (unsigned char *)buffer); + stream.next_out = buffer; + stream.avail_out = sizeof(fixed_buf); + } + else if (!stream.avail_out) break; /* the payload is larger than it should be */ curpos += stream.next_in - in; } while (st == Z_OK || st == Z_BUF_ERROR); + if (sha1) { + git_SHA1_Final(sha1, &c); + buffer = NULL; + } git_inflate_end(&stream); if ((st != Z_STREAM_END) || stream.total_out != size) { free(buffer); @@ -1727,7 +1750,7 @@ static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, ret = ent->data; if (!ret || ent->p != p || ent->base_offset != base_offset) - return unpack_entry(p, base_offset, type, base_size); + return unpack_entry(p, base_offset, type, base_size, NULL); if (!keep_cache) { ent->data = NULL; @@ -1844,7 +1867,7 @@ static void *unpack_delta_entry(struct packed_git *p, return NULL; } - delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size); + delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size, OBJ_NONE, NULL); if (!delta_data) { error("failed to unpack compressed delta " "at offset %"PRIuMAX" from %s", @@ -1883,7 +1906,8 @@ static void write_pack_access_log(struct packed_git *p, off_t obj_offset) int do_check_packed_object_crc; void *unpack_entry(struct packed_git *p, off_t obj_offset, - enum object_type *type, unsigned long *sizep) + enum object_type *type, unsigned long *sizep, + unsigned char *sha1) { struct pack_window *w_curs = NULL; off_t curpos = obj_offset; @@ -1917,7 +1941,8 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - data = unpack_compressed_entry(p, &w_curs, curpos, *sizep); + data = unpack_compressed_entry(p, &w_curs, curpos, + *sizep, *type, sha1); break; default: data = NULL; diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 7e78c72..c749ecb 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -141,7 +141,7 @@ test_expect_success 'fetch updates' ' ) ' -test_expect_failure 'fsck' ' +test_expect_success 'fsck' ' git fsck --full ' -- 1.7.3.1.256.g2539c.dirty -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html