[PATCH 2/2 v2] git-remote-mediawiki: escape ", \, and LF in file names

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A mediawiki page can contain, and even start with a " character, we have
to escape it when generating the fast-export stream, as well as \
character. While we're there, also escape newlines, but I don't think we
can get them from MediaWiki pages.

Signed-off-by: Matthieu Moy <Matthieu.Moy@xxxxxxx>
---
 contrib/mw-to-git/git-remote-mediawiki      | 16 +++++++++++++---
 contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh | 26 ++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/contrib/mw-to-git/git-remote-mediawiki b/contrib/mw-to-git/git-remote-mediawiki
index 68555d4..094129d 100755
--- a/contrib/mw-to-git/git-remote-mediawiki
+++ b/contrib/mw-to-git/git-remote-mediawiki
@@ -711,6 +711,14 @@ sub fetch_mw_revisions {
 	return ($n, @revisions);
 }
 
+sub fe_escape_path {
+    my $path = shift;
+    $path =~ s/\\/\\\\/g;
+    $path =~ s/"/\\"/g;
+    $path =~ s/\n/\\n/g;
+    return '"' . $path . '"';
+}
+
 sub import_file_revision {
 	my $commit = shift;
 	my %commit = %{$commit};
@@ -738,15 +746,17 @@ sub import_file_revision {
 		print STDOUT "from refs/mediawiki/$remotename/master^0\n";
 	}
 	if ($content ne DELETED_CONTENT) {
-		print STDOUT "M 644 inline $title.mw\n";
+		print STDOUT "M 644 inline " .
+		    fe_escape_path($title . ".mw") . "\n";
 		literal_data($content);
 		if (%mediafile) {
-			print STDOUT "M 644 inline $mediafile{title}\n";
+			print STDOUT "M 644 inline "
+			    . fe_escape_path($mediafile{title}) . "\n";
 			literal_data_raw($mediafile{content});
 		}
 		print STDOUT "\n\n";
 	} else {
-		print STDOUT "D $title.mw\n";
+		print STDOUT "D " . fe_escape_path($title . ".mw") . "\n";
 	}
 
 	# mediawiki revision number in the git note
diff --git a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh
index 246d47d..b6405ce 100755
--- a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh
+++ b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh
@@ -318,4 +318,30 @@ test_expect_success 'git push with \ in format control' '
 '
 
 
+test_expect_success 'fast-import meta-characters in page name (mw -> git)' '
+	wiki_reset &&
+	wiki_editpage \"file\"_\\_foo "expect to be called \"file\"_\\_foo" false &&
+	git clone mediawiki::'"$WIKI_URL"' mw_dir_21 &&
+	test_path_is_file mw_dir_21/\"file\"_\\_foo.mw &&
+	wiki_getallpage ref_page_21 &&
+	test_diff_directories mw_dir_21 ref_page_21
+'
+
+
+test_expect_success 'fast-import meta-characters in page name (git -> mw) ' '
+	wiki_reset &&
+	git clone mediawiki::'"$WIKI_URL"' mw_dir_22 &&
+	(
+		cd mw_dir_22 &&
+		echo "this file is called \"file\"_\\_foo.mw" >\"file\"_\\_foo &&
+		git add . &&
+		git commit -am "file \"file\"_\\_foo" &&
+		git pull &&
+		git push
+	) &&
+	wiki_getallpage ref_page_22 &&
+	test_diff_directories mw_dir_22 ref_page_22
+'
+
+
 test_done
-- 
1.8.0.319.g8abfee4

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]