Jeff King <peff@xxxxxxxx> writes: > diff --git a/ewah/ewah_io.c b/ewah/ewah_io.c > index bed1994551..33c08c40f8 100644 > --- a/ewah/ewah_io.c > +++ b/ewah/ewah_io.c > @@ -122,16 +122,23 @@ int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *sb) > return ewah_serialize_to(self, write_strbuf, sb); > } > > -int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len) > +ssize_t ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len) > { > const uint8_t *ptr = map; > + size_t data_len; > size_t i; > > + if (len < sizeof(uint32_t)) > + return error("corrupt ewah bitmap: eof before bit size"); > self->bit_size = get_be32(ptr); > ptr += sizeof(uint32_t); > + len -= sizeof(uint32_t); > > + if (len < sizeof(uint32_t)) > + return error("corrupt ewah bitmap: eof before length"); > self->buffer_size = self->alloc_size = get_be32(ptr); > ptr += sizeof(uint32_t); > + len -= sizeof(uint32_t); > > REALLOC_ARRAY(self->buffer, self->alloc_size); > > @@ -141,15 +148,25 @@ int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len) > * the endianness conversion in a separate pass to ensure > * we're loading 8-byte aligned words. > */ > - memcpy(self->buffer, ptr, self->buffer_size * sizeof(eword_t)); > - ptr += self->buffer_size * sizeof(eword_t); > + data_len = st_mult(self->buffer_size, sizeof(eword_t)); This is a faithful conversion from the original, but I somehow would have appreciated if the latter were not sizeof(eword_t) but rather sizeof(self->buffer_size[0]), especially as I wondered ... > + if (len < data_len) > + return error("corrupt ewah bitmap: eof in data " > + "(%"PRIuMAX" bytes short)", > + (uintmax_t)(data_len - len)); > + memcpy(self->buffer, ptr, data_len); > + ptr += data_len; > + len -= data_len; > > for (i = 0; i < self->buffer_size; ++i) > self->buffer[i] = ntohll(self->buffer[i]); ... what individual datum one iteration of this loop is copying, and then realized "buffer_size" is a misleading field name (anything that claims to be size and not measuring in bytes is misleading to me ;-). > + if (len < sizeof(uint32_t)) > + return error("corrupt ewah bitmap: eof before rlw"); > self->rlw = self->buffer + get_be32(ptr); > + ptr += sizeof(uint32_t); > + len -= sizeof(uint32_t); > > - return (3 * 4) + (self->buffer_size * 8); > + return ptr - (const uint8_t *)map; Much nicer; I needed to wonder what these 12 and 8 in the original are. > > int ewah_deserialize(struct ewah_bitmap *self, int fd); > -int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len); > +ssize_t ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len); I double checked all the callers and made sure that they are already prepared to react sensibly to error returns, which is good. > > uint32_t ewah_checksum(struct ewah_bitmap *self); > > diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh > index 423c0a475f..237ee6e5fc 100755 > --- a/t/t5310-pack-bitmaps.sh > +++ b/t/t5310-pack-bitmaps.sh > @@ -331,4 +331,17 @@ test_expect_success 'pack reuse respects --incremental' ' > git show-index <empty.idx >actual && > test_cmp expect actual > ' > + > +test_expect_success 'truncated bitmap fails gracefully' ' > + git repack -ad && > + git rev-list --use-bitmap-index --count --all >expect && > + bitmap=$(ls .git/objects/pack/*.bitmap) && > + test_when_finished "rm -f $bitmap" && > + head -c 512 <$bitmap >$bitmap.tmp && > + mv $bitmap.tmp $bitmap && > + git rev-list --use-bitmap-index --count --all >actual 2>stderr && > + test_cmp expect actual && > + test_i18ngrep corrupt stderr > +' > + > test_done