[JGIT PATCH 04/15] Add lineMap computer to RawParseUtils to index locations of line starts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Some algorithms like a diff or patch require efficient lookup of lines
within a source file, using a 1 based line number.  lineMap produces a
list of offsets within the file where each line starts, providing O(1)
lookup time to locate those positions.

Signed-off-by: Shawn O. Pearce <spearce@xxxxxxxxxxx>
---
 .../tst/org/spearce/jgit/util/IntListTest.java     |   24 ++++++
 .../jgit/util/RawParseUtils_LineMapTest.java       |   88 ++++++++++++++++++++
 .../src/org/spearce/jgit/util/IntList.java         |   15 ++++
 .../src/org/spearce/jgit/util/RawParseUtils.java   |   31 +++++++
 4 files changed, 158 insertions(+), 0 deletions(-)
 create mode 100644 org.spearce.jgit.test/tst/org/spearce/jgit/util/RawParseUtils_LineMapTest.java

diff --git a/org.spearce.jgit.test/tst/org/spearce/jgit/util/IntListTest.java b/org.spearce.jgit.test/tst/org/spearce/jgit/util/IntListTest.java
index f943075..c470d55 100644
--- a/org.spearce.jgit.test/tst/org/spearce/jgit/util/IntListTest.java
+++ b/org.spearce.jgit.test/tst/org/spearce/jgit/util/IntListTest.java
@@ -103,6 +103,30 @@ public void testAdd_LargeGroup() {
 		}
 	}
 
+	public void testFillTo0() {
+		final IntList i = new IntList();
+		i.fillTo(0, Integer.MIN_VALUE);
+		assertEquals(0, i.size());
+	}
+
+	public void testFillTo1() {
+		final IntList i = new IntList();
+		i.fillTo(1, Integer.MIN_VALUE);
+		assertEquals(1, i.size());
+		i.add(0);
+		assertEquals(Integer.MIN_VALUE, i.get(0));
+		assertEquals(0, i.get(1));
+	}
+
+	public void testFillTo100() {
+		final IntList i = new IntList();
+		i.fillTo(100, Integer.MIN_VALUE);
+		assertEquals(100, i.size());
+		i.add(3);
+		assertEquals(Integer.MIN_VALUE, i.get(99));
+		assertEquals(3, i.get(100));
+	}
+
 	public void testClear() {
 		final IntList i = new IntList();
 		final int n = 5;
diff --git a/org.spearce.jgit.test/tst/org/spearce/jgit/util/RawParseUtils_LineMapTest.java b/org.spearce.jgit.test/tst/org/spearce/jgit/util/RawParseUtils_LineMapTest.java
new file mode 100644
index 0000000..3f562a4
--- /dev/null
+++ b/org.spearce.jgit.test/tst/org/spearce/jgit/util/RawParseUtils_LineMapTest.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2008, Google Inc.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Git Development Community nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.spearce.jgit.util;
+
+import java.io.UnsupportedEncodingException;
+
+import junit.framework.TestCase;
+
+public class RawParseUtils_LineMapTest extends TestCase {
+	public void testEmpty() {
+		final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0);
+		assertNotNull(map);
+		assertEquals(1, map.size());
+		assertEquals(Integer.MIN_VALUE, map.get(0));
+	}
+
+	public void testOneBlankLine() {
+		final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1);
+		assertEquals(2, map.size());
+		assertEquals(Integer.MIN_VALUE, map.get(0));
+		assertEquals(0, map.get(1));
+	}
+
+	public void testTwoLineFooBar() throws UnsupportedEncodingException {
+		final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1");
+		final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
+		assertEquals(3, map.size());
+		assertEquals(Integer.MIN_VALUE, map.get(0));
+		assertEquals(0, map.get(1));
+		assertEquals(4, map.get(2));
+	}
+
+	public void testTwoLineNoLF() throws UnsupportedEncodingException {
+		final byte[] buf = "foo\nbar".getBytes("ISO-8859-1");
+		final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
+		assertEquals(3, map.size());
+		assertEquals(Integer.MIN_VALUE, map.get(0));
+		assertEquals(0, map.get(1));
+		assertEquals(4, map.get(2));
+	}
+
+	public void testFourLineBlanks() throws UnsupportedEncodingException {
+		final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1");
+		final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
+		assertEquals(5, map.size());
+		assertEquals(Integer.MIN_VALUE, map.get(0));
+		assertEquals(0, map.get(1));
+		assertEquals(4, map.get(2));
+		assertEquals(5, map.get(3));
+		assertEquals(6, map.get(4));
+	}
+
+}
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java b/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java
index 1445f88..2824074 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/IntList.java
@@ -93,6 +93,21 @@ public void add(final int n) {
 		entries[count++] = n;
 	}
 
+	/**
+	 * Pad the list with entries.
+	 * 
+	 * @param toIndex
+	 *            index position to stop filling at. 0 inserts no filler. 1
+	 *            ensures the list has a size of 1, adding <code>val</code> if
+	 *            the list is currently empty.
+	 * @param val
+	 *            value to insert into padded positions.
+	 */
+	public void fillTo(int toIndex, final int val) {
+		while (count < toIndex)
+			add(val);
+	}
+
 	private void grow() {
 		final int[] n = new int[(entries.length + 16) * 3 / 2];
 		System.arraycopy(entries, 0, n, 0, count);
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
index 8896d38..6143188 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
@@ -266,6 +266,37 @@ public static final int nextLF(final byte[] b, int ptr, final char chrA) {
 	}
 
 	/**
+	 * Index the region between <code>[ptr, end)</code> to find line starts.
+	 * <p>
+	 * The returned list is 1 indexed. Index 0 contains
+	 * {@link Integer#MIN_VALUE} to pad the list out.
+	 * <p>
+	 * Using a 1 indexed list means that line numbers can be directly accessed
+	 * from the list, so <code>list.get(1)</code> (aka get line 1) returns
+	 * <code>ptr</code>.
+	 * 
+	 * @param buf
+	 *            buffer to scan.
+	 * @param ptr
+	 *            position within the buffer corresponding to the first byte of
+	 *            line 1.
+	 * @param end
+	 *            1 past the end of the content within <code>buf</code>.
+	 * @return a line map indexing the start position of each line.
+	 */
+	public static final IntList lineMap(final byte[] buf, int ptr, int end) {
+		// Experimentally derived from multiple source repositories
+		// the average number of bytes/line is 36. Its a rough guess
+		// to initially size our map close to the target.
+		//
+		final IntList map = new IntList((end - ptr) / 36);
+		map.fillTo(1, Integer.MIN_VALUE);
+		for (; ptr < end; ptr = nextLF(buf, ptr))
+			map.add(ptr);
+		return map;
+	}
+
+	/**
 	 * Locate the "author " header line data.
 	 * 
 	 * @param b
-- 
1.6.1.rc2.306.ge5d5e

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux