On 2017-03-15 17:13, Jeff King wrote: > On Wed, Mar 15, 2017 at 11:59:52AM -0400, Jeff King wrote: > >> I agree that detecting the situation in the meantime is a good idea. >> The patch above probably handles the bulk-checkin code path, I'd guess. >> It might be nice to have similar checks in other places, too: >> >> - when reading from an existing packfile >> >> Looks like we may already have such a check in >> unpack_object_header_buffer(). >> >> - when taking in new objects via index-pack or unpack-objects (to >> catch a fetch of a too-big object) >> >> I think index-pack.c:unpack_raw_entry() would want a similar check >> to what is in unpack_object_header_buffer(). > > Here are the results of a few quick experiments using two versions of > git, one built for 32-bit and one for 64-bit: > > $ git init > $ dd if=/dev/zero of=foo.zero bs=1M count=4097 > $ git32 add foo.zero > fatal: Cannot handle files this big > > That comes from the xsize_t() wrapper. I guess it wouldn't trigger on > Windows, though, because it is measuring size_t, not "unsigned long" (on > my 32-bit build they are the same, of course). > > $ git64 add foo.zero > $ git32 cat-file blob :foo.zero > error: bad object header > fatal: packed object df6f032f301d1ce40477eefa505f2fac1de5e243 (stored in .git/objects/pack/pack-57d422f19904e9651bec43d10b7a9cd882de48ac.pack) is corrupt > > So we notice, which is good. This is the message from > unpack_object_header_buffer(). It might be worth improving the error > message to mention the integer overflow. > > And here's what index-pack looks like: > > $ git32 index-pack --stdin <.git/objects/pack/*.pack > fatal: pack has bad object at offset 12: inflate returned -5 > > It's good that we notice, but the error message isn't great. What > happens is that we overflow the size integer, allocate a too-small > buffer, and then zlib complains when we run out of buffer but there's > still content to inflate. We probably ought to notice the integer > overflow in the first place and complain there. > > -Peff > There is a bunch of problems when compiling on a 32 bit system. size_t is 32 bit, but off_t is 64. =================== Some fixes which improve things, (but Windows in the 64 bit version has both size_t and off_t 64 bit, so this is a problem for a 32 bit system. The real "show stopper" is at the end. --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -183,7 +183,7 @@ static void prepare_to_stream(struct bulk_checkin_state *state, static int deflate_to_pack(struct bulk_checkin_state *state, unsigned char result_sha1[], - int fd, size_t size, + int fd, off_t size, enum object_type type, const char *path, unsigned flags) { @@ -252,7 +252,7 @@ static int deflate_to_pack(struct bulk_checkin_state *state, } int index_bulk_checkin(unsigned char *sha1, - int fd, size_t size, enum object_type type, + int fd, off_t size, enum object_type type, const char *path, unsigned flags) { diff --git a/bulk-checkin.h b/bulk-checkin.h index fbd40fc..a385e61 100644 --- a/bulk-checkin.h +++ b/bulk-checkin.h @@ -5,7 +5,7 @@ #define BULK_CHECKIN_H extern int index_bulk_checkin(unsigned char sha1[], - int fd, size_t size, enum object_type type, + int fd, off_t size, enum object_type type, const char *path, unsigned flags); extern void plug_bulk_checkin(void); diff --git a/diff.c b/diff.c index 051761b..f1d7ac3 100644 --- a/diff.c +++ b/diff.c @@ -2857,7 +2857,9 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags) return err; } } - s->size = xsize_t(st.st_size); + s->size = xoff_t(st.st_size); + if (s->size > (size_t) s->size) + return 0; if (!s->size) goto empty; if (S_ISLNK(st.st_mode)) { ======================= After a while we run into the fact that Git wants to produce a diff: diff --git a/diffcore.h b/diffcore.h index 6230241..852214c 100644 --- a/diffcore.h +++ b/diffcore.h @@ -22,6 +22,15 @@ #define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */ +static inline off_t xyoff_t(off_t len, const char *fff, int lll) +{ + if (len > (off_t) len) + die("Cannot handle files this big (%s:%d)", fff, lll); + return (off_t)len; +} + +#define xoff_t(o) xyoff_t((o), __FILE__, __LINE__) + struct userdiff_driver; struct diff_filespec { @@ -29,7 +38,7 @@ struct diff_filespec { char *path; void *data; void *cnt_data; - unsigned long size; + off_t size; int count; /* Reference count */ int rename_used; /* Count of rename users */ unsigned short mode; /* file mode */ diff --git a/git-compat-util.h b/git-compat-util.h index ef6d560..318e998 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -881,13 +881,15 @@ static inline char *xstrdup_or_null(const char *str) return str ? xstrdup(str) : NULL; } -static inline size_t xsize_t(off_t len) +static inline size_t xysize_t(off_t len, const char *fff, int lll) { if (len > (size_t) len) - die("Cannot handle files this big"); + die("Cannot handle files this big (%s:%d)", fff, lll); return (size_t)len; } +#define xsize_t(o) xysize_t((o), __FILE__, __LINE__) + __attribute__((format (printf, 3, 4))) extern int xsnprintf(char *dst, size_t max, const char *fmt, ...); diff --git a/sha1_file.c b/sha1_file.c index ec957db..807123c 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3561,7 +3561,7 @@ static int index_core(unsigned char *sha1, int fd, size_t size, * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_stream(unsigned char *sha1, int fd, size_t size, +static int index_stream(unsigned char *sha1, int fd, off_t size, enum object_type type, const char *path, unsigned flags) { @@ -3586,7 +3586,7 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, ret = index_core(sha1, fd, xsize_t(st->st_size), type, path, flags); else - ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path, + ret = index_stream(sha1, fd, st->st_size, type, path, flags); close(fd); return ret; ========================== And it seams as if zlib is the limitation here. Unless we include the zlib source code into Git and redefine uLong, is there a nice way around this: =========================== /usr/include/zconf.h:# define uLong z_uLong /usr/include/zconf.h:# define uLongf z_uLongf /usr/include/zconf.h:typedef unsigned long uLong; /* 32 bits or more */ /usr/include/zconf.h:typedef uLong FAR uLongf;