[PATCH/RFC v2 1/1] Use off_t instead of size_t for functions dealing with streamed checkin

tboegi@xxxxxx · Tue, 23 Oct 2018 18:11:06 +0200

From: Torsten Bögershausen <tboegi@xxxxxx>

When streaming data from disk into a blob, it should be possible to commit
a file with a file size > 4 GiB using the streaming functionality in Git.
Because of the streaming there is no need to load the whole data into
memory at once.
Today this is not possible on e.g. a 32 bit Linux system.
There is no good reason to limit the length of the file by using a size_t
in the code, which is a 32 bit value.
Loosen this restriction and use off_t instead of size_t in the call chain.

Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx>
---

This is a suggestion for V2, changing even sha1-file.c,
so that the whole patch makes more sense.
The initial commit of a >4Gib file was tested on a 32 bit system

I didn't remove the wrapper functions, as I don't know
what their purpose is.

And: The commit message may need some tweaking, though

bulk-checkin.c | 6 +++---
 bulk-checkin.h | 2 +-
 sha1-file.c    | 5 ++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index 409ecb566b..34dbf5c4ea 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -96,7 +96,7 @@ static int already_written(struct bulk_checkin_state *state, struct object_id *o
  */
 static int stream_to_pack(struct bulk_checkin_state *state,
 			  git_hash_ctx *ctx, off_t *already_hashed_to,
-			  int fd, size_t size, enum object_type type,
+			  int fd, off_t size, enum object_type type,
 			  const char *path, unsigned flags)
 {
 	git_zstream s;
@@ -189,7 +189,7 @@ static void prepare_to_stream(struct bulk_checkin_state *state,
 
 static int deflate_to_pack(struct bulk_checkin_state *state,
 			   struct object_id *result_oid,
-			   int fd, size_t size,
+			   int fd, off_t size,
 			   enum object_type type, const char *path,
 			   unsigned flags)
 {
@@ -258,7 +258,7 @@ static int deflate_to_pack(struct bulk_checkin_state *state,
 }
 
 int index_bulk_checkin(struct object_id *oid,
-		       int fd, size_t size, enum object_type type,
+		       int fd, off_t size, enum object_type type,
 		       const char *path, unsigned flags)
 {
 	int status = deflate_to_pack(&state, oid, fd, size, type,
diff --git a/bulk-checkin.h b/bulk-checkin.h
index f438f93811..09b2affdf3 100644
--- a/bulk-checkin.h
+++ b/bulk-checkin.h
@@ -7,7 +7,7 @@
 #include "cache.h"
 
 extern int index_bulk_checkin(struct object_id *oid,
-			      int fd, size_t size, enum object_type type,
+			      int fd, off_t size, enum object_type type,
 			      const char *path, unsigned flags);
 
 extern void plug_bulk_checkin(void);
diff --git a/sha1-file.c b/sha1-file.c
index a4367b8f04..98d0f50ffa 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -1934,7 +1934,7 @@ static int index_core(struct object_id *oid, int fd, size_t size,
  * binary blobs, they generally do not want to get any conversion, and
  * callers should avoid this code path when filters are requested.
  */
-static int index_stream(struct object_id *oid, int fd, size_t size,
+static int index_stream(struct object_id *oid, int fd, off_t size,
 			enum object_type type, const char *path,
 			unsigned flags)
 {
@@ -1959,8 +1959,7 @@ int index_fd(struct object_id *oid, int fd, struct stat *st,
 		ret = index_core(oid, fd, xsize_t(st->st_size), type, path,
 				 flags);
 	else
-		ret = index_stream(oid, fd, xsize_t(st->st_size), type, path,
-				   flags);
+		ret = index_stream(oid, fd, st->st_size, type, path, flags);
 	close(fd);
 	return ret;
 }
-- 
2.11.0