[PATCH] archive-zip: add --text parameter

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Entries in a ZIP file can be marked as text files.  Extractors can use
that flag to apply end-of-line conversions.  An example is unzip -a.

git archive currently marks all ZIP file entries as binary files.  This
patch adds the new option --text that can be used to mark non-binary
files or all files as text files, thus enabling the use of unzip -a.

No sign-off, yet, because I'm not sure we really need another option.
E.g. --text=all doesn't seem to be actually useful, but it was easy to
implement.  Info-ZIP's zip always creates archives like --text=auto
does, so perhaps we should make that our default behavior as well?

Changing the default behavior would cause newer versions of git archive
to create different ZIP files than older ones, of course.  This can
break caching and signature checking.

The last time we did that was in 2012 when we added an extended mtime
field (227bf5980), I think.  I don't remember any fallout from that
change, but there was a recent discussion about the stability of
generated tar files, so I'm a bit cautious:

    http://thread.gmane.org/gmane.comp.version-control.git/258516

---
 Documentation/git-archive.txt |  5 ++++
 archive-zip.c                 | 23 ++++++++++++++----
 archive.c                     | 18 ++++++++++++++
 archive.h                     |  7 ++++++
 t/t5003-archive-zip.sh        | 56 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
index cfa1e4e..684ca36 100644
--- a/Documentation/git-archive.txt
+++ b/Documentation/git-archive.txt
@@ -93,6 +93,11 @@ zip
 	Highest and slowest compression level.  You can specify any
 	number from 1 to 9 to adjust compression speed and ratio.
 
+--text=<which>::
+	Mark the specfied entries as text files so that `unzip -a`
+	converts end-of-line characters while extracting. The value
+	must be either 'all', 'auto', or 'none' (the default).
+
 
 CONFIGURATION
 -------------
diff --git a/archive-zip.c b/archive-zip.c
index 4bde019..3767940 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -5,6 +5,7 @@
 #include "archive.h"
 #include "streaming.h"
 #include "utf8.h"
+#include "xdiff-interface.h"
 
 static int zip_date;
 static int zip_time;
@@ -210,6 +211,7 @@ static int write_zip_entry(struct archiver_args *args,
 	struct git_istream *stream = NULL;
 	unsigned long flags = 0;
 	unsigned long size;
+	int is_binary = -1;
 
 	crc = crc32(0, NULL, 0);
 
@@ -238,8 +240,14 @@ static int write_zip_entry(struct archiver_args *args,
 		method = 0;
 		attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) :
 			(mode & 0111) ? ((mode) << 16) : 0;
-		if (S_ISREG(mode) && args->compression_level != 0 && size > 0)
-			method = 8;
+		if (S_ISREG(mode)) {
+			if (args->compression_level != 0 && size > 0)
+				method = 8;
+			if (args->text == ARCHIVE_TEXT_ALL)
+				is_binary = 0;
+			else if (args->text == ARCHIVE_TEXT_NONE)
+				is_binary = 1;
+		}
 
 		if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert &&
 		    size > big_file_threshold) {
@@ -256,6 +264,8 @@ static int write_zip_entry(struct archiver_args *args,
 				return error("cannot read %s",
 					     sha1_to_hex(sha1));
 			crc = crc32(crc, buffer, size);
+			if (is_binary < 0)
+				is_binary = buffer_is_binary(buffer, size);
 			out = buffer;
 		}
 		compressed_size = (method == 0) ? size : 0;
@@ -300,7 +310,6 @@ static int write_zip_entry(struct archiver_args *args,
 	copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE);
 	copy_le16(dirent.comment_length, 0);
 	copy_le16(dirent.disk, 0);
-	copy_le16(dirent.attr1, 0);
 	copy_le32(dirent.attr2, attr2);
 	copy_le32(dirent.offset, zip_offset);
 
@@ -328,6 +337,8 @@ static int write_zip_entry(struct archiver_args *args,
 			if (readlen <= 0)
 				break;
 			crc = crc32(crc, buf, readlen);
+			if (is_binary < 0)
+				is_binary = buffer_is_binary(buffer, size);
 			write_or_die(1, buf, readlen);
 		}
 		close_istream(stream);
@@ -361,6 +372,8 @@ static int write_zip_entry(struct archiver_args *args,
 			if (readlen <= 0)
 				break;
 			crc = crc32(crc, buf, readlen);
+			if (is_binary < 0)
+				is_binary = buffer_is_binary(buffer, size);
 
 			zstream.next_in = buf;
 			zstream.avail_in = readlen;
@@ -405,6 +418,8 @@ static int write_zip_entry(struct archiver_args *args,
 	free(deflated);
 	free(buffer);
 
+	copy_le16(dirent.attr1, !is_binary);
+
 	memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE);
 	zip_dir_offset += ZIP_DIR_HEADER_SIZE;
 	memcpy(zip_dir + zip_dir_offset, path, pathlen);
@@ -466,7 +481,7 @@ static int write_zip_archive(const struct archiver *ar,
 static struct archiver zip_archiver = {
 	"zip",
 	write_zip_archive,
-	ARCHIVER_WANT_COMPRESSION_LEVELS|ARCHIVER_REMOTE
+	ARCHIVER_WANT_COMPRESSION_LEVELS|ARCHIVER_REMOTE|ARCHIVER_TEXT_ATTRIBUTE
 };
 
 void init_zip_archiver(void)
diff --git a/archive.c b/archive.c
index 96057ed..89bd23d 100644
--- a/archive.c
+++ b/archive.c
@@ -417,6 +417,7 @@ static int parse_archive_args(int argc, const char **argv,
 	const char *remote = NULL;
 	const char *exec = NULL;
 	const char *output = NULL;
+	const char *text = NULL;
 	int compression_level = -1;
 	int verbose = 0;
 	int i;
@@ -442,6 +443,8 @@ static int parse_archive_args(int argc, const char **argv,
 		OPT__COMPR_HIDDEN('7', &compression_level, 7),
 		OPT__COMPR_HIDDEN('8', &compression_level, 8),
 		OPT__COMPR('9', &compression_level, N_("compress better"), 9),
+		OPT_STRING(0, "text", &text, N_("which"),
+			N_("specify which files contain text")),
 		OPT_GROUP(""),
 		OPT_BOOL('l', "list", &list,
 			N_("list supported archive formats")),
@@ -493,6 +496,21 @@ static int parse_archive_args(int argc, const char **argv,
 					format, compression_level);
 		}
 	}
+	args->text = ARCHIVE_TEXT_NONE;
+	if (text) {
+		if (!strcmp(text, "auto"))
+			args->text = ARCHIVE_TEXT_AUTO;
+		else if (!strcmp(text, "all"))
+			args->text = ARCHIVE_TEXT_ALL;
+		else if (!strcmp(text, "none"))
+			args->text = ARCHIVE_TEXT_NONE;
+		else
+			die("Unknown argument: --text=%s", text);
+		if (args->text != ARCHIVE_TEXT_NONE &&
+		    !((*ar)->flags & ARCHIVER_TEXT_ATTRIBUTE))
+			die("Argument not supported for format '%s': --text=%s",
+			    format, text);
+	}
 	args->verbose = verbose;
 	args->base = base;
 	args->baselen = strlen(base);
diff --git a/archive.h b/archive.h
index 4a791e1..eabcd11 100644
--- a/archive.h
+++ b/archive.h
@@ -14,11 +14,18 @@ struct archiver_args {
 	unsigned int verbose : 1;
 	unsigned int worktree_attributes : 1;
 	unsigned int convert : 1;
+	unsigned int text : 2;
 	int compression_level;
 };
 
 #define ARCHIVER_WANT_COMPRESSION_LEVELS 1
 #define ARCHIVER_REMOTE 2
+#define ARCHIVER_TEXT_ATTRIBUTE 4
+
+#define ARCHIVE_TEXT_NONE	0
+#define ARCHIVE_TEXT_ALL	1
+#define ARCHIVE_TEXT_AUTO	2
+
 struct archiver {
 	const char *name;
 	int (*write_archive)(const struct archiver *, struct archiver_args *);
diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
index c929db5..4e49aad 100755
--- a/t/t5003-archive-zip.sh
+++ b/t/t5003-archive-zip.sh
@@ -35,12 +35,56 @@ check_zip() {
 	"
 }
 
+zip_text() {
+	option=$1
+	zipfile=text_$1.zip
+	dir=text_$1
+
+	test_expect_success "git archive --format-zip --text=$option" "
+		git archive --format=zip --text=$option HEAD >$zipfile
+	"
+
+	test_expect_success UNZIP " extract ZIP archive with EOL conversion" '
+		(mkdir $dir && cd $dir && "$GIT_UNZIP" -a ../$zipfile)
+	'
+}
+
+check_text_converted() {
+	dir=text_$1
+	filetype=$2
+	extracted=$dir/a/$filetype
+
+	test_expect_success " validate that $filetype files are converted" "
+		test_cmp_bin $extracted.cr $extracted.crlf &&
+		test_cmp_bin $extracted.cr $extracted.lf
+	"
+}
+
+check_text_verbatim() {
+	dir=text_$1
+	filetype=$2
+	extracted=$dir/a/$filetype
+	original=a/$filetype
+
+	test_expect_success " validate that $filetype files are unchanged" "
+		test_cmp_bin $original.cr   $extracted.cr &&
+		test_cmp_bin $original.crlf $extracted.crlf &&
+		test_cmp_bin $original.lf   $extracted.lf
+	"
+}
+
 test_expect_success \
     'populate workdir' \
     'mkdir a &&
      echo simple textfile >a/a &&
      mkdir a/bin &&
      cp /bin/sh a/bin &&
+     printf "text\r"   >a/text.cr &&
+     printf "text\r\n" >a/text.crlf &&
+     printf "text\n"   >a/text.lf &&
+     printf "\0\r"   >a/binary.cr &&
+     printf "\0\r\n" >a/binary.crlf &&
+     printf "\0\n"   >a/binary.lf &&
      printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 &&
      printf "A not substituted O" >a/substfile2 &&
      (p=long_path_to_a_file && cd a &&
@@ -124,4 +168,16 @@ test_expect_success 'git archive --format=zip on large files' '
 
 check_zip large-compressed
 
+zip_text all
+check_text_converted all text
+check_text_converted all binary
+
+zip_text auto
+check_text_converted auto text
+check_text_verbatim auto binary
+
+zip_text none
+check_text_verbatim none text
+check_text_verbatim none binary
+
 test_done
-- 
2.3.1

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]