The conversion from byte[] to String is performed one line at a time, in case the patch is a character encoding conversion patch for the file. For simplicity we currently assume UTF-8 still as the default encoding for any content, but eventually we should support using the .gitattributes encoding property when performing this conversion. Signed-off-by: Shawn O. Pearce <spearce@xxxxxxxxxxx> --- .../src/org/spearce/jgit/patch/BinaryHunk.java | 8 ++ .../src/org/spearce/jgit/patch/FileHeader.java | 6 ++ .../src/org/spearce/jgit/patch/HunkHeader.java | 7 ++ .../src/org/spearce/jgit/patch/PatchUtil.java | 79 ++++++++++++++++++++ 4 files changed, 100 insertions(+), 0 deletions(-) create mode 100644 org.spearce.jgit/src/org/spearce/jgit/patch/PatchUtil.java diff --git a/org.spearce.jgit/src/org/spearce/jgit/patch/BinaryHunk.java b/org.spearce.jgit/src/org/spearce/jgit/patch/BinaryHunk.java index f43a1b9..f4e2ee3 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/patch/BinaryHunk.java +++ b/org.spearce.jgit/src/org/spearce/jgit/patch/BinaryHunk.java @@ -42,6 +42,8 @@ import static org.spearce.jgit.util.RawParseUtils.nextLF; import static org.spearce.jgit.util.RawParseUtils.parseBase10; +import org.spearce.jgit.lib.Constants; + /** Part of a "GIT binary patch" to describe the pre-image or post-image */ public class BinaryHunk { private static final byte[] LITERAL = encodeASCII("literal "); @@ -96,6 +98,12 @@ public int getEndOffset() { return endOffset; } + /** @return text of this patch file's script; best-effort decoded */ + public String getHunkText() { + return PatchUtil.decode(Constants.CHARSET, getBuffer(), + getStartOffset(), getEndOffset()); + } + /** @return type of this binary hunk */ public Type getType() { return type; diff --git a/org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java b/org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java index 7c3a45a..0110f4a 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java +++ b/org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java @@ -188,6 +188,12 @@ public int getEndOffset() { return endOffset; } + /** @return text of this patch file's script; best-effort decoded */ + public String getScriptText() { + return PatchUtil.decode(Constants.CHARSET, getBuffer(), + getStartOffset(), getEndOffset()); + } + /** * Get the old name associated with this file. * <p> diff --git a/org.spearce.jgit/src/org/spearce/jgit/patch/HunkHeader.java b/org.spearce.jgit/src/org/spearce/jgit/patch/HunkHeader.java index 12c670d..5a3b590 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/patch/HunkHeader.java +++ b/org.spearce.jgit/src/org/spearce/jgit/patch/HunkHeader.java @@ -42,6 +42,7 @@ import static org.spearce.jgit.util.RawParseUtils.parseBase10; import org.spearce.jgit.lib.AbbreviatedObjectId; +import org.spearce.jgit.lib.Constants; import org.spearce.jgit.util.MutableInteger; /** Hunk header describing the layout of a single block of lines */ @@ -138,6 +139,12 @@ public int getEndOffset() { return endOffset; } + /** @return text of this patch file's script; best-effort decoded */ + public String getHunkText() { + return PatchUtil.decode(Constants.CHARSET, getBuffer(), + getStartOffset(), getEndOffset()); + } + /** @return information about the old image mentioned in this hunk. */ public OldImage getOldImage() { return old; diff --git a/org.spearce.jgit/src/org/spearce/jgit/patch/PatchUtil.java b/org.spearce.jgit/src/org/spearce/jgit/patch/PatchUtil.java new file mode 100644 index 0000000..89136c0 --- /dev/null +++ b/org.spearce.jgit/src/org/spearce/jgit/patch/PatchUtil.java @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2008, Google Inc. + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Git Development Community nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.spearce.jgit.patch; + +import java.nio.charset.Charset; + +import org.spearce.jgit.util.RawParseUtils; + +/** Patch related utility functions. */ +public class PatchUtil { + /** + * Decode a region of a buffer one line at a time. + * <p> + * Unlike {@link RawParseUtils#decode(Charset, byte[], int, int)} this + * method reads the input one line at a time and decodes each line + * individually. This permits a decoding of a file converting from + * ISO-8859-1 to UTF-8 encoding (for example), as each line in the patch + * script will be in one encoding or the other. + * + * @param cs + * preferred character set to use when decoding the buffer. + * @param buf + * buffer to pull the raw bytes from. + * @param ptr + * first position to read. + * @param end + * one position past the last position to read. + * @return a string representation of the region, decoded per-line. + */ + public static String decode(final Charset cs, final byte[] buf, int ptr, + final int end) { + final StringBuilder r = new StringBuilder(end - ptr); + while (ptr < end) { + final int eol = Math.min(end, RawParseUtils.nextLF(buf, ptr)); + r.append(RawParseUtils.decode(cs, buf, ptr, eol)); + ptr = eol; + } + return r.toString(); + } + + private PatchUtil() { + // No instances + } +} -- 1.6.1.rc2.306.ge5d5e -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html