[JGIT PATCH 4/6 v3] Add QuotedString class to handle Git path style quoting rules

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Git patch files can contain file names which are quoted using the
roughly the C language quoting rules.  In order to correctly create
or parse these files we must implement a quoting style that matches
those specific rules.

QuotedString itself is an abstract API so callers can be passed a
quoting style based on the context of where their output will be
used, and multiple styles could be supported.  This may be useful
if jgit ever grows a "git for-each-ref" style of output where Perl,
Python, Tcl and Bourne style quoting might be necessary.

References through the singleton QuotedString.GIT_PATH should be
able to bypass the virtual function table, as the specific type is
mentioned in the field declaration and that type is final.  A good
JIT should be able to remove the abstraction costs when the caller
has hardcoded the quoting style.

Signed-off-by: Shawn O. Pearce <spearce@xxxxxxxxxxx>
---
  Robin Rosenberg <robin.rosenberg@xxxxxxxxxx> wrote:
  > So this should pass, right?

  These tests have been added, and they pass.

  > You also reversed the arguments to testQuote. It think we should follow the
  > "expected"-first conventions here too.

  Fixed.

  > Using Constant.encode in the test is kind of dangerous as it does too
  > many conversions,

  Fixed.

 .../jgit/util/QuotedStringGitPathStyleTest.java    |  172 +++++++++++++
 .../src/org/spearce/jgit/util/QuotedString.java    |  268 ++++++++++++++++++++
 2 files changed, 440 insertions(+), 0 deletions(-)
 create mode 100644 org.spearce.jgit.test/tst/org/spearce/jgit/util/QuotedStringGitPathStyleTest.java
 create mode 100644 org.spearce.jgit/src/org/spearce/jgit/util/QuotedString.java

diff --git a/org.spearce.jgit.test/tst/org/spearce/jgit/util/QuotedStringGitPathStyleTest.java b/org.spearce.jgit.test/tst/org/spearce/jgit/util/QuotedStringGitPathStyleTest.java
new file mode 100644
index 0000000..54fbd31
--- /dev/null
+++ b/org.spearce.jgit.test/tst/org/spearce/jgit/util/QuotedStringGitPathStyleTest.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2008, Google Inc.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Git Development Community nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.spearce.jgit.util;
+
+import static org.spearce.jgit.util.QuotedString.GIT_PATH;
+
+import java.io.UnsupportedEncodingException;
+
+import junit.framework.TestCase;
+
+import org.spearce.jgit.lib.Constants;
+
+public class QuotedStringGitPathStyleTest extends TestCase {
+	private static void assertQuote(final String exp, final String in) {
+		final String r = GIT_PATH.quote(in);
+		assertNotSame(in, r);
+		assertFalse(in.equals(r));
+		assertEquals('"' + exp + '"', r);
+	}
+
+	private static void assertDequote(final String exp, final String in) {
+		final byte[] b;
+		try {
+			b = ('"' + in + '"').getBytes("ISO-8859-1");
+		} catch (UnsupportedEncodingException e) {
+			throw new RuntimeException(e);
+		}
+		final String r = GIT_PATH.dequote(b, 0, b.length);
+		assertEquals(exp, r);
+	}
+
+	public void testQuote_Empty() {
+		assertEquals("\"\"", GIT_PATH.quote(""));
+	}
+
+	public void testDequote_Empty1() {
+		assertEquals("", GIT_PATH.dequote(new byte[0], 0, 0));
+	}
+
+	public void testDequote_Empty2() {
+		assertEquals("", GIT_PATH.dequote(new byte[] { '"', '"' }, 0, 2));
+	}
+
+	public void testDequote_SoleDq() {
+		assertEquals("\"", GIT_PATH.dequote(new byte[] { '"' }, 0, 1));
+	}
+
+	public void testQuote_BareA() {
+		final String in = "a";
+		assertSame(in, GIT_PATH.quote(in));
+	}
+
+	public void testDequote_BareA() {
+		final String in = "a";
+		final byte[] b = Constants.encode(in);
+		assertEquals(in, GIT_PATH.dequote(b, 0, b.length));
+	}
+
+	public void testDequote_BareABCZ_OnlyBC() {
+		final String in = "abcz";
+		final byte[] b = Constants.encode(in);
+		final int p = in.indexOf('b');
+		assertEquals("bc", GIT_PATH.dequote(b, p, p + 2));
+	}
+
+	public void testDequote_LoneBackslash() {
+		assertDequote("\\", "\\");
+	}
+
+	public void testQuote_NamedEscapes() {
+		assertQuote("\\a", "\u0007");
+		assertQuote("\\b", "\b");
+		assertQuote("\\f", "\f");
+		assertQuote("\\n", "\n");
+		assertQuote("\\r", "\r");
+		assertQuote("\\t", "\t");
+		assertQuote("\\v", "\u000B");
+		assertQuote("\\\\", "\\");
+		assertQuote("\\\"", "\"");
+	}
+
+	public void testDequote_NamedEscapes() {
+		assertDequote("\u0007", "\\a");
+		assertDequote("\b", "\\b");
+		assertDequote("\f", "\\f");
+		assertDequote("\n", "\\n");
+		assertDequote("\r", "\\r");
+		assertDequote("\t", "\\t");
+		assertDequote("\u000B", "\\v");
+		assertDequote("\\", "\\\\");
+		assertDequote("\"", "\\\"");
+	}
+
+	public void testDequote_OctalAll() {
+		for (int i = 0; i < 256; i++) {
+			String s = Integer.toOctalString(i);
+			while (s.length() < 3) {
+				s = "0" + s;
+			}
+			assertDequote("" + (char) i, "\\" + s);
+		}
+	}
+
+	public void testQuote_OctalAll() {
+		assertQuote("\\001", "\1");
+		assertQuote("\\176", "~");
+		assertQuote("\\303\\277", "\u00ff"); // \u00ff in UTF-8
+	}
+
+	public void testDequote_UnknownEscapeQ() {
+		assertDequote("\\q", "\\q");
+	}
+
+	public void testDequote_FooTabBar() {
+		assertDequote("foo\tbar", "foo\\tbar");
+	}
+
+	public void testDequote_Latin1() {
+		assertDequote("\u00c5ngstr\u00f6m", "\\305ngstr\\366m"); // Latin1
+	}
+
+	public void testDequote_UTF8() {
+		assertDequote("\u00c5ngstr\u00f6m", "\\303\\205ngstr\\303\\266m");
+	}
+
+	public void testDequote_RawUTF8() {
+		assertDequote("\u00c5ngstr\u00f6m", "\303\205ngstr\303\266m");
+	}
+
+	public void testDequote_RawLatin1() {
+		assertDequote("\u00c5ngstr\u00f6m", "\305ngstr\366m");
+	}
+
+	public void testQuote_Ang() {
+		assertQuote("\\303\\205ngstr\\303\\266m", "\u00c5ngstr\u00f6m");
+	}
+}
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/QuotedString.java b/org.spearce.jgit/src/org/spearce/jgit/util/QuotedString.java
new file mode 100644
index 0000000..279b713
--- /dev/null
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/QuotedString.java
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2008, Google Inc.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Git Development Community nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.spearce.jgit.util;
+
+import java.util.Arrays;
+
+import org.spearce.jgit.lib.Constants;
+
+/** Utility functions related to quoted string handling. */
+public abstract class QuotedString {
+	/** Quoting style that obeys the rules Git applies to file names */
+	public static final GitPathStyle GIT_PATH = new GitPathStyle();
+
+	/**
+	 * Quote an input string by the quoting rules.
+	 * <p>
+	 * If the input string does not require any quoting, the same String
+	 * reference is returned to the caller.
+	 * <p>
+	 * Otherwise a quoted string is returned, including the opening and closing
+	 * quotation marks at the start and end of the string. If the style does not
+	 * permit raw Unicode characters then the string will first be encoded in
+	 * UTF-8, with unprintable sequences possibly escaped by the rules.
+	 * 
+	 * @param in
+	 *            any non-null Unicode string.
+	 * @return a quoted string. See above for details.
+	 */
+	public abstract String quote(String in);
+
+	/**
+	 * Clean a previously quoted input, decoding the result via UTF-8.
+	 * <p>
+	 * This method must match quote such that:
+	 * 
+	 * <pre>
+	 * a.equals(dequote(quote(a)));
+	 * </pre>
+	 * 
+	 * is true for any <code>a</code>.
+	 * 
+	 * @param in
+	 *            a Unicode string to remove quoting from.
+	 * @return the cleaned string.
+	 * @see #dequote(byte[], int, int)
+	 */
+	public String dequote(final String in) {
+		final byte[] b = Constants.encode(in);
+		return dequote(b, 0, b.length);
+	}
+
+	/**
+	 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
+	 * <p>
+	 * This method must match quote such that:
+	 * 
+	 * <pre>
+	 * a.equals(dequote(Constants.encode(quote(a))));
+	 * </pre>
+	 * 
+	 * is true for any <code>a</code>.
+	 * <p>
+	 * This method removes any opening/closing quotation marks added by
+	 * {@link #quote(String)}.
+	 * 
+	 * @param in
+	 *            the input buffer to parse.
+	 * @param offset
+	 *            first position within <code>in</code> to scan.
+	 * @param end
+	 *            one position past in <code>in</code> to scan.
+	 * @return the cleaned string.
+	 */
+	public abstract String dequote(byte[] in, int offset, int end);
+
+	/** Quoting style that obeys the rules Git applies to file names */
+	public static final class GitPathStyle extends QuotedString {
+		private static final byte[] quote;
+		static {
+			quote = new byte[128];
+			Arrays.fill(quote, (byte) -1);
+
+			for (int i = '0'; i <= '9'; i++)
+				quote[i] = 0;
+			for (int i = 'a'; i <= 'z'; i++)
+				quote[i] = 0;
+			for (int i = 'A'; i <= 'Z'; i++)
+				quote[i] = 0;
+			quote[' '] = 0;
+			quote['+'] = 0;
+			quote[','] = 0;
+			quote['-'] = 0;
+			quote['.'] = 0;
+			quote['/'] = 0;
+			quote['='] = 0;
+			quote['_'] = 0;
+			quote['^'] = 0;
+
+			quote['\u0007'] = 'a';
+			quote['\b'] = 'b';
+			quote['\f'] = 'f';
+			quote['\n'] = 'n';
+			quote['\r'] = 'r';
+			quote['\t'] = 't';
+			quote['\u000B'] = 'v';
+			quote['\\'] = '\\';
+			quote['"'] = '"';
+		}
+
+		@Override
+		public String quote(final String instr) {
+			if (instr.length() == 0)
+				return "\"\"";
+			boolean reuse = true;
+			final byte[] in = Constants.encode(instr);
+			final StringBuilder r = new StringBuilder(2 + in.length);
+			r.append('"');
+			for (int i = 0; i < in.length; i++) {
+				final int c = in[i] & 0xff;
+				if (c < quote.length) {
+					final byte style = quote[c];
+					if (style == 0) {
+						r.append((char) c);
+						continue;
+					}
+					if (style > 0) {
+						reuse = false;
+						r.append('\\');
+						r.append((char) style);
+						continue;
+					}
+				}
+
+				reuse = false;
+				r.append('\\');
+				r.append((char) (((c >> 6) & 03) + '0'));
+				r.append((char) (((c >> 3) & 07) + '0'));
+				r.append((char) (((c >> 0) & 07) + '0'));
+			}
+			if (reuse)
+				return instr;
+			r.append('"');
+			return r.toString();
+		}
+
+		@Override
+		public String dequote(final byte[] in, final int inPtr, final int inEnd) {
+			if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
+				return dq(in, inPtr + 1, inEnd - 1);
+			return RawParseUtils.decode(Constants.CHARSET, in, inPtr, inEnd);
+		}
+
+		private static String dq(final byte[] in, int inPtr, final int inEnd) {
+			final byte[] r = new byte[inEnd - inPtr];
+			int rPtr = 0;
+			while (inPtr < inEnd) {
+				final byte b = in[inPtr++];
+				if (b != '\\') {
+					r[rPtr++] = b;
+					continue;
+				}
+
+				if (inPtr == inEnd) {
+					// Lone trailing backslash. Treat it as a literal.
+					//
+					r[rPtr++] = '\\';
+					break;
+				}
+
+				switch (in[inPtr++]) {
+				case 'a':
+					r[rPtr++] = 0x07 /* \a = BEL */;
+					continue;
+				case 'b':
+					r[rPtr++] = '\b';
+					continue;
+				case 'f':
+					r[rPtr++] = '\f';
+					continue;
+				case 'n':
+					r[rPtr++] = '\n';
+					continue;
+				case 'r':
+					r[rPtr++] = '\r';
+					continue;
+				case 't':
+					r[rPtr++] = '\t';
+					continue;
+				case 'v':
+					r[rPtr++] = 0x0B/* \v = VT */;
+					continue;
+
+				case '\\':
+				case '"':
+					r[rPtr++] = in[inPtr - 1];
+					continue;
+
+				case '0':
+				case '1':
+				case '2':
+				case '3': {
+					int cp = in[inPtr - 1] - '0';
+					while (inPtr < inEnd) {
+						final byte c = in[inPtr];
+						if ('0' <= c && c <= '7') {
+							cp <<= 3;
+							cp |= c - '0';
+							inPtr++;
+						} else {
+							break;
+						}
+					}
+					r[rPtr++] = (byte) cp;
+					continue;
+				}
+
+				default:
+					// Any other code is taken literally.
+					//
+					r[rPtr++] = '\\';
+					r[rPtr++] = in[inPtr - 1];
+					continue;
+				}
+			}
+
+			return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
+		}
+
+		private GitPathStyle() {
+			// Singleton
+		}
+	}
+}
-- 
1.6.1.rc2.299.gead4c


-- 
Shawn.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux