[JGIT PATCH] Encode/decode index and tree entries using UTF-8

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Decoding uses the same strategy as for commit messages and other string
entities. Encoding is always done in UTF-8. This is incompatible with
Git for non-unicode unices, but it leads to the expected behavior on
Windows and cross-locale sharing of repositories.

Signed-off-by: Robin Rosenberg <robin.rosnberg@xxxxxxxxxx>
---

Inpired by the recent thread on the gitml, I decideed to clean up jgit a little. I
know the GitIndex is soon to be obsoleted, but it it still the class that does
the dirty work when committing in Egit and the changes are fairly simple
anyway.

- Unicode paths will work on all platforms that support unicode, i.e. Windows
and any unix using a UTF-8 locale, with one small exception. Accented characters
on OS-X probably do not work well.
- Combined use of unicode on one platform is compatible with non-unicode locales
on other platforms as long as the characters in use are available in the local character
set.

A side note, invalid byte sequences in unix, e.g. ISO-latin-1 encoded file names cannot
work in Java. Such files are inaccessible. Jgit will allow you to rename them in the index,
but that is all.

-- robin

 .../src/org/spearce/jgit/lib/GitIndex.java         |   27 +++++++++++---------
 .../src/org/spearce/jgit/lib/Tree.java             |   11 +++----
 .../src/org/spearce/jgit/lib/TreeEntry.java        |   13 +++------
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
index 22935ab..3d37033 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
@@ -63,6 +63,7 @@
 import org.spearce.jgit.errors.CorruptObjectException;
 import org.spearce.jgit.errors.NotSupportedException;
 import org.spearce.jgit.util.FS;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * A representation of the Git index.
@@ -178,8 +179,9 @@ public Entry add(File wd, File f) throws IOException {
 	 * @param f
 	 *            the file whose path shall be removed.
 	 * @return true if such a path was found (and thus removed)
+	 * @throws IOException 
 	 */
-	public boolean remove(File wd, File f) {
+	public boolean remove(File wd, File f) throws IOException {
 		byte[] key = makeKey(wd, f);
 		return entries.remove(key) != null;
 	}
@@ -300,11 +302,11 @@ static boolean File_hasExecute() {
 		return FS.INSTANCE.supportsExecute();
 	}
 
-	static byte[] makeKey(File wd, File f) {
+	static byte[] makeKey(File wd, File f) throws IOException {
 		if (!f.getPath().startsWith(wd.getPath()))
 			throw new Error("Path is not in working dir");
 		String relName = Repository.stripWorkDir(wd, f);
-		return relName.getBytes();
+		return relName.getBytes(Constants.CHARACTER_ENCODING);
 	}
 
 	Boolean filemode;
@@ -376,7 +378,7 @@ Entry(TreeEntry f, int stage)
 				size = -1;
 			}
 			sha1 = f.getId();
-			name = f.getFullName().getBytes("UTF-8");
+			name = f.getFullName().getBytes(Constants.CHARACTER_ENCODING);
 			flags = (short) ((stage << 12) | name.length); // TODO: fix flags
 		}
 
@@ -580,7 +582,7 @@ private File getFile(File wd) {
 		}
 
 		public String toString() {
-			return new String(name) + "/SHA-1(" + sha1.name() + ")/M:"
+			return getName() + "/SHA-1(" + sha1.name() + ")/M:"
 					+ new Date(ctime / 1000000L) + "/C:"
 					+ new Date(mtime / 1000000L) + "/d" + dev + "/i" + ino
 					+ "/m" + Integer.toString(mode, 8) + "/u" + uid + "/g"
@@ -591,7 +593,7 @@ public String toString() {
 		 * @return path name for this entry
 		 */
 		public String getName() {
-			return new String(name);
+			return RawParseUtils.decode(Constants.CHARSET, name, 0, name.length);
 		}
 
 		/**
@@ -731,7 +733,7 @@ void readTree(String prefix, Tree t) throws IOException {
 				readTree(name, (Tree) te);
 			} else {
 				Entry e = new Entry(te, 0);
-				entries.put(name.getBytes("UTF-8"), e);
+				entries.put(name.getBytes(Constants.CHARACTER_ENCODING), e);
 			}
 		}
 	}
@@ -743,7 +745,7 @@ void readTree(String prefix, Tree t) throws IOException {
 	 * @throws IOException
 	 */
 	public Entry addEntry(TreeEntry te) throws IOException {
-		byte[] key = te.getFullName().getBytes("UTF-8");
+		byte[] key = te.getFullName().getBytes(Constants.CHARACTER_ENCODING);
 		Entry e = new Entry(te, 0);
 		entries.put(key, e);
 		return e;
@@ -825,7 +827,7 @@ public ObjectId writeTree() throws IOException {
 			while (trees.size() < newName.length) {
 				if (!current.existsTree(newName[trees.size() - 1])) {
 					current = new Tree(current, newName[trees.size() - 1]
-							.getBytes());
+							.getBytes(Constants.CHARACTER_ENCODING));
 					current.getParent().addEntry(current);
 					trees.push(current);
 				} else {
@@ -835,7 +837,7 @@ public ObjectId writeTree() throws IOException {
 				}
 			}
 			FileTreeEntry ne = new FileTreeEntry(current, e.sha1,
-					newName[newName.length - 1].getBytes(),
+					newName[newName.length - 1].getBytes(Constants.CHARACTER_ENCODING),
 					(e.mode & FileMode.EXECUTABLE_FILE.getBits()) == FileMode.EXECUTABLE_FILE.getBits());
 			current.addEntry(ne);
 		}
@@ -880,7 +882,7 @@ int longestCommonPath(String[] a, String[] b) {
 	 * Small beware: Unaccounted for are unmerged entries. You may want
 	 * to abort if members with stage != 0 are found if you are doing
 	 * any updating operations. All stages will be found after one another
-	 * here later. Currently only one stage per name is returned.
+	 * here later. Currently only one stage per name is returned.	
 	 *
 	 * @return The index entries sorted
 	 */
@@ -896,7 +898,8 @@ int longestCommonPath(String[] a, String[] b) {
 	 * @throws UnsupportedEncodingException
 	 */
 	public Entry getEntry(String path) throws UnsupportedEncodingException {
-		return (Entry) entries.get(Repository.gitInternalSlash(path.getBytes("ISO-8859-1")));
+		return (Entry) entries.get(Repository.gitInternalSlash(path
+				.getBytes(Constants.CHARACTER_ENCODING)));
 	}
 
 	/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
index 25a9a71..3fd3d30 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
@@ -44,6 +44,7 @@
 import org.spearce.jgit.errors.CorruptObjectException;
 import org.spearce.jgit.errors.EntryExistsException;
 import org.spearce.jgit.errors.MissingObjectException;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * A representation of a Git tree entry. A Tree is a directory in Git.
@@ -281,8 +282,7 @@ public FileTreeEntry addFile(final byte[] s, final int offset)
 
 		final byte[] newName = substring(s, offset, slash);
 		if (p >= 0)
-			throw new EntryExistsException(new String(newName,
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(RawParseUtils.decode(Constants.CHARSET, newName, 0, newName.length));
 		else if (slash < s.length) {
 			final Tree t = new Tree(this, newName);
 			insertEntry(p, t);
@@ -332,8 +332,8 @@ public Tree addTree(final byte[] s, final int offset) throws IOException {
 
 		final byte[] newName = substring(s, offset, slash);
 		if (p >= 0)
-			throw new EntryExistsException(new String(newName,
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(RawParseUtils.decode(
+					Constants.CHARSET, newName, 0, newName.length));
 
 		final Tree t = new Tree(this, newName);
 		insertEntry(p, t);
@@ -355,8 +355,7 @@ public void addEntry(final TreeEntry e) throws IOException {
 			e.attachParent(this);
 			insertEntry(p, e);
 		} else {
-			throw new EntryExistsException(new String(e.getNameUTF8(),
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(e.getName());
 		}
 	}
 
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
index 85dda1d..7f58056 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
@@ -39,9 +39,9 @@
 package org.spearce.jgit.lib;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 
 import org.spearce.jgit.lib.GitIndex.Entry;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * This class represents an entry in a tree, like a blob or another tree.
@@ -126,13 +126,10 @@ public Repository getRepository() {
 	 * @return the name of this entry.
 	 */
 	public String getName() {
-		try {
-			return nameUTF8 != null ? new String(nameUTF8,
-					Constants.CHARACTER_ENCODING) : null;
-		} catch (UnsupportedEncodingException uee) {
-			throw new RuntimeException("JVM doesn't support "
-					+ Constants.CHARACTER_ENCODING, uee);
-		}
+		if (nameUTF8 != null)
+			return RawParseUtils.decode(Constants.CHARSET, nameUTF8, 0,
+					nameUTF8.length);
+		return null;
 	}
 
 	/**
-- 
1.6.0.2.308.gef4a

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux