[PATCH v3 1/5] object-file: refactor write_loose_object() to read buffer from stream

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx>

We used to call "get_data()" in "unpack_non_delta_entry()" to read the
entire contents of a blob object, no matter how big it is. This
implementation may consume all the memory and cause OOM.

This can be improved by feeding data to "write_loose_object()" in a
stream. The input stream is implemented as an interface. In the first
step, we make a simple implementation, feeding the entire buffer in the
"stream" to "write_loose_object()" as a refactor.

Helped-by: Jiang Xin <zhiyou.jx@xxxxxxxxxxxxxxx>
Signed-off-by: Han Xin <hanxin.hx@xxxxxxxxxxxxxxx>
---
 object-file.c  | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
 object-store.h |  5 +++++
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/object-file.c b/object-file.c
index c3d866a287..227f53a0de 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1860,8 +1860,26 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename)
 	return fd;
 }
 
+struct simple_input_stream_data {
+	const void *buf;
+	unsigned long len;
+};
+
+static const void *feed_simple_input_stream(struct input_stream *in_stream, unsigned long *len)
+{
+	struct simple_input_stream_data *data = in_stream->data;
+
+	if (data->len == 0) {
+		*len = 0;
+		return NULL;
+	}
+	*len = data->len;
+	data->len = 0;
+	return data->buf;
+}
+
 static int write_loose_object(const struct object_id *oid, char *hdr,
-			      int hdrlen, const void *buf, unsigned long len,
+			      int hdrlen, struct input_stream *in_stream,
 			      time_t mtime, unsigned flags)
 {
 	int fd, ret;
@@ -1871,6 +1889,8 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
 	struct object_id parano_oid;
 	static struct strbuf tmp_file = STRBUF_INIT;
 	static struct strbuf filename = STRBUF_INIT;
+	const void *buf;
+	unsigned long len;
 
 	loose_object_path(the_repository, &filename, oid);
 
@@ -1898,6 +1918,7 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
 	the_hash_algo->update_fn(&c, hdr, hdrlen);
 
 	/* Then the data itself.. */
+	buf = in_stream->read(in_stream, &len);
 	stream.next_in = (void *)buf;
 	stream.avail_in = len;
 	do {
@@ -1960,6 +1981,13 @@ int write_object_file_flags(const void *buf, unsigned long len,
 {
 	char hdr[MAX_HEADER_LEN];
 	int hdrlen = sizeof(hdr);
+	struct input_stream in_stream = {
+		.read = feed_simple_input_stream,
+		.data = (void *)&(struct simple_input_stream_data) {
+			.buf = buf,
+			.len = len,
+		},
+	};
 
 	/* Normally if we have it in the pack then we do not bother writing
 	 * it out into .git/objects/??/?{38} file.
@@ -1968,7 +1996,7 @@ int write_object_file_flags(const void *buf, unsigned long len,
 				  &hdrlen);
 	if (freshen_packed_object(oid) || freshen_loose_object(oid))
 		return 0;
-	return write_loose_object(oid, hdr, hdrlen, buf, len, 0, flags);
+	return write_loose_object(oid, hdr, hdrlen, &in_stream, 0, flags);
 }
 
 int hash_object_file_literally(const void *buf, unsigned long len,
@@ -1977,6 +2005,13 @@ int hash_object_file_literally(const void *buf, unsigned long len,
 {
 	char *header;
 	int hdrlen, status = 0;
+	struct input_stream in_stream = {
+		.read = feed_simple_input_stream,
+		.data = (void *)&(struct simple_input_stream_data) {
+			.buf = buf,
+			.len = len,
+		},
+	};
 
 	/* type string, SP, %lu of the length plus NUL must fit this */
 	hdrlen = strlen(type) + MAX_HEADER_LEN;
@@ -1988,7 +2023,7 @@ int hash_object_file_literally(const void *buf, unsigned long len,
 		goto cleanup;
 	if (freshen_packed_object(oid) || freshen_loose_object(oid))
 		goto cleanup;
-	status = write_loose_object(oid, header, hdrlen, buf, len, 0, 0);
+	status = write_loose_object(oid, header, hdrlen, &in_stream, 0, 0);
 
 cleanup:
 	free(header);
@@ -2003,14 +2038,21 @@ int force_object_loose(const struct object_id *oid, time_t mtime)
 	char hdr[MAX_HEADER_LEN];
 	int hdrlen;
 	int ret;
+	struct simple_input_stream_data data;
+	struct input_stream in_stream = {
+		.read = feed_simple_input_stream,
+		.data = &data,
+	};
 
 	if (has_loose_object(oid))
 		return 0;
 	buf = read_object(the_repository, oid, &type, &len);
 	if (!buf)
 		return error(_("cannot read object for %s"), oid_to_hex(oid));
+	data.buf = buf;
+	data.len = len;
 	hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX , type_name(type), (uintmax_t)len) + 1;
-	ret = write_loose_object(oid, hdr, hdrlen, buf, len, mtime, 0);
+	ret = write_loose_object(oid, hdr, hdrlen, &in_stream, mtime, 0);
 	free(buf);
 
 	return ret;
diff --git a/object-store.h b/object-store.h
index 952efb6a4b..ccc1fc9c1a 100644
--- a/object-store.h
+++ b/object-store.h
@@ -34,6 +34,11 @@ struct object_directory {
 	char *path;
 };
 
+struct input_stream {
+	const void *(*read)(struct input_stream *, unsigned long *len);
+	void *data;
+};
+
 KHASH_INIT(odb_path_map, const char * /* key: odb_path */,
 	struct object_directory *, 1, fspathhash, fspatheq)
 
-- 
2.34.0.6.g676eedc724




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux