When putting whole objects in core is unavoidable, try match object type and size first before actually inflating. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> --- The bypassing has_sha1_object() and doing sha1_object_info() directly mainly to help avoid unnecessary collision tesst when user run this command on an in-repo pack. But I realized there's no sure way to check if the given pack (especiall with --stdin) is in repo. So I'll drop 5/6 and 6/6. People just have to set GIT_DIR=/nowhere. builtin/index-pack.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++--- t/t5300-pack-object.sh | 5 +++ 2 files changed, 82 insertions(+), 5 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 9129299..8b5c1eb 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -9,6 +9,7 @@ #include "progress.h" #include "fsck.h" #include "exec_cmd.h" +#include "streaming.h" #include "thread-utils.h" static const char index_pack_usage[] = @@ -621,31 +622,102 @@ static void find_delta_children(const union delta_base *base, *last_index = last; } +struct compare_data { + struct object_entry *entry; + struct git_istream *st; + unsigned char *buf; + unsigned long buf_size; +}; + +static int compare_objects(const unsigned char *buf, unsigned long size, + void *cb_data) +{ + struct compare_data *data = cb_data; + + if (data->buf_size < size) { + free(data->buf); + data->buf = xmalloc(size); + data->buf_size = size; + } + + while (size) { + ssize_t len = read_istream(data->st, data->buf, size); + if (len == 0) + die(_("SHA1 COLLISION FOUND WITH %s !"), + sha1_to_hex(data->entry->idx.sha1)); + if (len < 0) + die(_("unable to read %s"), + sha1_to_hex(data->entry->idx.sha1)); + if (memcmp(buf, data->buf, len)) + die(_("SHA1 COLLISION FOUND WITH %s !"), + sha1_to_hex(data->entry->idx.sha1)); + size -= len; + buf += len; + } + return 0; +} + +static int check_collison(struct object_entry *entry) +{ + struct compare_data data; + enum object_type type; + unsigned long size; + + if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB) + return -1; + + memset(&data, 0, sizeof(data)); + data.entry = entry; + data.st = open_istream(entry->idx.sha1, &type, &size, NULL); + if (!data.st) + return -1; + if (size != entry->size || type != entry->type) + die(_("SHA1 COLLISION FOUND WITH %s !"), + sha1_to_hex(entry->idx.sha1)); + unpack_data(entry, compare_objects, &data); + close_istream(data.st); + free(data.buf); + return 0; +} + static void sha1_object(const void *data, struct object_entry *obj_entry, unsigned long size, enum object_type type, const unsigned char *sha1) { void *new_data = NULL; + int collision_test_needed; assert(data || obj_entry); read_lock(); - if (has_sha1_file(sha1)) { + collision_test_needed = has_sha1_file(sha1); + read_unlock(); + + if (collision_test_needed && !data) { + read_lock(); + if (!check_collison(obj_entry)) + collision_test_needed = 0; + read_unlock(); + } + if (collision_test_needed) { void *has_data; enum object_type has_type; unsigned long has_size; - if (!data) - data = new_data = get_data_from_pack(obj_entry); + read_lock(); + has_type = sha1_object_info(sha1, &has_size); + if (has_type != type || has_size != size) + die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1)); has_data = read_sha1_file(sha1, &has_type, &has_size); read_unlock(); + if (!data) + data = new_data = get_data_from_pack(obj_entry); if (!has_data) die(_("cannot read existing object %s"), sha1_to_hex(sha1)); if (size != has_size || type != has_type || memcmp(data, has_data, size) != 0) die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1)); free(has_data); - } else - read_unlock(); + } if (strict) { read_lock(); diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index d9d856b..300ed91 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -418,4 +418,9 @@ test_expect_success \ 'test_must_fail git index-pack -o bad.idx test-3.pack 2>msg && grep "SHA1 COLLISION FOUND" msg' +test_expect_success \ + 'make sure index-pack detects the SHA1 collision (large blobs)' \ + 'test_must_fail git -c core.bigfilethreshold=1 index-pack -o bad.idx test-3.pack 2>msg && + grep "SHA1 COLLISION FOUND" msg' + test_done -- 1.7.10.2.549.g9354186 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html