Re: [PATCH v3 09/11] archive: support streaming large files to a tar archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Nguyễn Thái Ngọc Duy  <pclouds@xxxxxxxxx> writes:

> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>

This is *way* *too* underdocumented.

For example, it is totally unclear from the patch what determines
the last parameter to write_archive_entries(), OK_TO_STREAM.  Does
it depend on the nature of the payload?  Does the backend decide it,
in other words, if it is prepared to read from a streaming API or not?

I wanted to first take all the "do not slurp things in core, and
instead read from streaming API" patches from this series, but I
had to stop at this one.

> ---
>  archive-tar.c    |   35 ++++++++++++++++++++++++++++-------
>  archive-zip.c    |    9 +++++----
>  archive.c        |   51 ++++++++++++++++++++++++++++++++++-----------------
>  archive.h        |   11 +++++++++--
>  t/t1050-large.sh |    2 +-
>  5 files changed, 77 insertions(+), 31 deletions(-)
>
> diff --git a/archive-tar.c b/archive-tar.c
> index 20af005..5bffe49 100644
> --- a/archive-tar.c
> +++ b/archive-tar.c
> @@ -5,6 +5,7 @@
>  #include "tar.h"
>  #include "archive.h"
>  #include "run-command.h"
> +#include "streaming.h"
>  
>  #define RECORDSIZE	(512)
>  #define BLOCKSIZE	(RECORDSIZE * 20)
> @@ -123,9 +124,29 @@ static size_t get_path_prefix(const char *path, size_t pathlen, size_t maxlen)
>  	return i;
>  }
>  
> +static void write_file(struct git_istream *stream, const void *buffer,
> +		       unsigned long size)
> +{
> +	if (!stream) {
> +		write_blocked(buffer, size);
> +		return;
> +	}
> +	for (;;) {
> +		char buf[1024 * 16];
> +		ssize_t readlen;
> +
> +		readlen = read_istream(stream, buf, sizeof(buf));
> +
> +		if (!readlen)
> +			break;
> +		write_blocked(buf, readlen);
> +	}
> +}
> +
>  static int write_tar_entry(struct archiver_args *args,
> -		const unsigned char *sha1, const char *path, size_t pathlen,
> -		unsigned int mode, void *buffer, unsigned long size)
> +			   const unsigned char *sha1, const char *path,
> +			   size_t pathlen, unsigned int mode, void *buffer,
> +			   struct git_istream *stream, unsigned long size)
>  {
>  	struct ustar_header header;
>  	struct strbuf ext_header = STRBUF_INIT;
> @@ -200,14 +221,14 @@ static int write_tar_entry(struct archiver_args *args,
>  
>  	if (ext_header.len > 0) {
>  		err = write_tar_entry(args, sha1, NULL, 0, 0, ext_header.buf,
> -				ext_header.len);
> +				      NULL, ext_header.len);
>  		if (err)
>  			return err;
>  	}
>  	strbuf_release(&ext_header);
>  	write_blocked(&header, sizeof(header));
> -	if (S_ISREG(mode) && buffer && size > 0)
> -		write_blocked(buffer, size);
> +	if (S_ISREG(mode) && size > 0)
> +		write_file(stream, buffer, size);
>  	return err;
>  }
>  
> @@ -219,7 +240,7 @@ static int write_global_extended_header(struct archiver_args *args)
>  
>  	strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40);
>  	err = write_tar_entry(args, NULL, NULL, 0, 0, ext_header.buf,
> -			ext_header.len);
> +			      NULL, ext_header.len);
>  	strbuf_release(&ext_header);
>  	return err;
>  }
> @@ -308,7 +329,7 @@ static int write_tar_archive(const struct archiver *ar,
>  	if (args->commit_sha1)
>  		err = write_global_extended_header(args);
>  	if (!err)
> -		err = write_archive_entries(args, write_tar_entry);
> +		err = write_archive_entries(args, write_tar_entry, 1);
>  	if (!err)
>  		write_trailer();
>  	return err;
> diff --git a/archive-zip.c b/archive-zip.c
> index 02d1f37..4a1e917 100644
> --- a/archive-zip.c
> +++ b/archive-zip.c
> @@ -120,9 +120,10 @@ static void *zlib_deflate(void *data, unsigned long size,
>  	return buffer;
>  }
>  
> -static int write_zip_entry(struct archiver_args *args,
> -		const unsigned char *sha1, const char *path, size_t pathlen,
> -		unsigned int mode, void *buffer, unsigned long size)
> +int write_zip_entry(struct archiver_args *args,
> +			   const unsigned char *sha1, const char *path,
> +			   size_t pathlen, unsigned int mode, void *buffer,
> +			   struct git_istream *stream, unsigned long size)
>  {
>  	struct zip_local_header header;
>  	struct zip_dir_header dirent;
> @@ -271,7 +272,7 @@ static int write_zip_archive(const struct archiver *ar,
>  	zip_dir = xmalloc(ZIP_DIRECTORY_MIN_SIZE);
>  	zip_dir_size = ZIP_DIRECTORY_MIN_SIZE;
>  
> -	err = write_archive_entries(args, write_zip_entry);
> +	err = write_archive_entries(args, write_zip_entry, 0);
>  	if (!err)
>  		write_zip_trailer(args->commit_sha1);
>  
> diff --git a/archive.c b/archive.c
> index 1ee837d..257eadf 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -5,6 +5,7 @@
>  #include "archive.h"
>  #include "parse-options.h"
>  #include "unpack-trees.h"
> +#include "streaming.h"
>  
>  static char const * const archive_usage[] = {
>  	"git archive [options] <tree-ish> [<path>...]",
> @@ -59,26 +60,35 @@ static void format_subst(const struct commit *commit,
>  	free(to_free);
>  }
>  
> -static void *sha1_file_to_archive(const char *path, const unsigned char *sha1,
> -		unsigned int mode, enum object_type *type,
> -		unsigned long *sizep, const struct commit *commit)
> +void sha1_file_to_archive(void **buffer, struct git_istream **stream,
> +			  const char *path, const unsigned char *sha1,
> +			  unsigned int mode, enum object_type *type,
> +			  unsigned long *sizep,
> +			  const struct commit *commit)
>  {
> -	void *buffer;
> +	if (stream) {
> +		struct stream_filter *filter;
> +		filter = get_stream_filter(path, sha1);
> +		if (!commit && S_ISREG(mode) && is_null_stream_filter(filter)) {
> +			*buffer = NULL;
> +			*stream = open_istream(sha1, type, sizep, NULL);
> +			return;
> +		}
> +		*stream = NULL;
> +	}
>  
> -	buffer = read_sha1_file(sha1, type, sizep);
> -	if (buffer && S_ISREG(mode)) {
> +	*buffer = read_sha1_file(sha1, type, sizep);
> +	if (*buffer && S_ISREG(mode)) {
>  		struct strbuf buf = STRBUF_INIT;
>  		size_t size = 0;
>  
> -		strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
> +		strbuf_attach(&buf, *buffer, *sizep, *sizep + 1);
>  		convert_to_working_tree(path, buf.buf, buf.len, &buf);
>  		if (commit)
>  			format_subst(commit, buf.buf, buf.len, &buf);
> -		buffer = strbuf_detach(&buf, &size);
> +		*buffer = strbuf_detach(&buf, &size);
>  		*sizep = size;
>  	}
> -
> -	return buffer;
>  }
>  
>  static void setup_archive_check(struct git_attr_check *check)
> @@ -97,6 +107,7 @@ static void setup_archive_check(struct git_attr_check *check)
>  struct archiver_context {
>  	struct archiver_args *args;
>  	write_archive_entry_fn_t write_entry;
> +	int stream_ok;
>  };
>  
>  static int write_archive_entry(const unsigned char *sha1, const char *base,
> @@ -109,6 +120,7 @@ static int write_archive_entry(const unsigned char *sha1, const char *base,
>  	write_archive_entry_fn_t write_entry = c->write_entry;
>  	struct git_attr_check check[2];
>  	const char *path_without_prefix;
> +	struct git_istream *stream = NULL;
>  	int convert = 0;
>  	int err;
>  	enum object_type type;
> @@ -133,25 +145,29 @@ static int write_archive_entry(const unsigned char *sha1, const char *base,
>  		strbuf_addch(&path, '/');
>  		if (args->verbose)
>  			fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
> -		err = write_entry(args, sha1, path.buf, path.len, mode, NULL, 0);
> +		err = write_entry(args, sha1, path.buf, path.len, mode, NULL, NULL, 0);
>  		if (err)
>  			return err;
>  		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
>  	}
>  
> -	buffer = sha1_file_to_archive(path_without_prefix, sha1, mode,
> -			&type, &size, convert ? args->commit : NULL);
> -	if (!buffer)
> +	sha1_file_to_archive(&buffer, c->stream_ok ? &stream : NULL,
> +			     path_without_prefix, sha1, mode,
> +			     &type, &size, convert ? args->commit : NULL);
> +	if (!buffer && !stream)
>  		return error("cannot read %s", sha1_to_hex(sha1));
>  	if (args->verbose)
>  		fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
> -	err = write_entry(args, sha1, path.buf, path.len, mode, buffer, size);
> +	err = write_entry(args, sha1, path.buf, path.len, mode, buffer, stream, size);
> +	if (stream)
> +		close_istream(stream);
>  	free(buffer);
>  	return err;
>  }
>  
>  int write_archive_entries(struct archiver_args *args,
> -		write_archive_entry_fn_t write_entry)
> +			  write_archive_entry_fn_t write_entry,
> +			  int stream_ok)
>  {
>  	struct archiver_context context;
>  	struct unpack_trees_options opts;
> @@ -167,13 +183,14 @@ int write_archive_entries(struct archiver_args *args,
>  		if (args->verbose)
>  			fprintf(stderr, "%.*s\n", (int)len, args->base);
>  		err = write_entry(args, args->tree->object.sha1, args->base,
> -				len, 040777, NULL, 0);
> +				  len, 040777, NULL, NULL, 0);
>  		if (err)
>  			return err;
>  	}
>  
>  	context.args = args;
>  	context.write_entry = write_entry;
> +	context.stream_ok = stream_ok;
>  
>  	/*
>  	 * Setup index and instruct attr to read index only
> diff --git a/archive.h b/archive.h
> index 2b0884f..370cca9 100644
> --- a/archive.h
> +++ b/archive.h
> @@ -27,9 +27,16 @@ extern void register_archiver(struct archiver *);
>  extern void init_tar_archiver(void);
>  extern void init_zip_archiver(void);
>  
> -typedef int (*write_archive_entry_fn_t)(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, unsigned int mode, void *buffer, unsigned long size);
> +struct git_istream;
> +typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
> +					const unsigned char *sha1,
> +					const char *path, size_t pathlen,
> +					unsigned int mode,
> +					void *buffer,
> +					struct git_istream *stream,
> +					unsigned long size);
>  
> -extern int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
> +extern int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry, int stream_ok);
>  extern int write_archive(int argc, const char **argv, const char *prefix, int setup_prefix, const char *name_hint, int remote);
>  
>  const char *archive_format_from_filename(const char *filename);
> diff --git a/t/t1050-large.sh b/t/t1050-large.sh
> index 52acae5..5336eb8 100755
> --- a/t/t1050-large.sh
> +++ b/t/t1050-large.sh
> @@ -151,7 +151,7 @@ test_expect_failure 'repack' '
>  	git repack -ad
>  '
>  
> -test_expect_failure 'tar achiving' '
> +test_expect_success 'tar achiving' '
>  	git archive --format=tar HEAD >/dev/null
>  '
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]