[EGIT PATCH 05/20] Reverse pack index implementation: PackReverseIndex

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Let us quickly find ObjectId or next object for specified offset in a
pack, in O(log n) time.

Current implementation uses one level (reverse) indexing by an offset.
However, it tries to take small space as possible, by using 2 distinct
arrays for offsets with value < 2^31 (int[]) and those with value > 2^31
(long[]). Offsets are stored ordered in these 2 arrays. Binary search is
performed during requests.

Reverse index is constructed from an existing PackIndex instance, by
lazy initialization in a PackFile instance.

Signed-off-by: Marek Zawirski <marek.zawirski@xxxxxxxxx>
---
 .../src/org/spearce/jgit/lib/PackFile.java         |   20 +++
 .../src/org/spearce/jgit/lib/PackReverseIndex.java |  179 ++++++++++++++++++++
 2 files changed, 199 insertions(+), 0 deletions(-)
 create mode 100644 org.spearce.jgit/src/org/spearce/jgit/lib/PackReverseIndex.java

diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java b/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java
index 1b2c167..3880966 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java
@@ -55,6 +55,8 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
 
 	private final PackIndex idx;
 
+	private PackReverseIndex reverseIdx;
+
 	/**
 	 * Construct a reader for an existing, pre-indexed packfile.
 	 * 
@@ -172,6 +174,18 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
 		return idx.getObjectCount();
 	}
 
+	/**
+	 * Search for object id with the specified start offset in associated pack
+	 * (reverse) index.
+	 * 
+	 * @param offset
+	 *            start offset of object to find
+	 * @return object id for this offset, or null if no object was found
+	 */
+	ObjectId findObjectForOffset(final long offset) {
+		return getReverseIdx().findObject(offset);
+	}
+
 	final UnpackedObjectCache.Entry readCache(final long position) {
 		return UnpackedObjectCache.get(pack, position);
 	}
@@ -264,4 +278,10 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
 			throw new IOException("Unknown object type " + typeCode + ".");
 		}
 	}
+
+	private PackReverseIndex getReverseIdx() {
+		if (reverseIdx == null)
+			reverseIdx = new PackReverseIndex(idx);
+		return reverseIdx;
+	}
 }
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/PackReverseIndex.java b/org.spearce.jgit/src/org/spearce/jgit/lib/PackReverseIndex.java
new file mode 100644
index 0000000..3dede88
--- /dev/null
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/PackReverseIndex.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2008, Marek Zawirski <marek.zawirski@xxxxxxxxx>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Git Development Community nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.spearce.jgit.lib;
+
+import java.util.Arrays;
+import java.util.Comparator;
+
+import org.spearce.jgit.errors.CorruptObjectException;
+import org.spearce.jgit.lib.PackIndex.MutableEntry;
+
+/**
+ * <p>
+ * Reverse index for forward pack index. Provides operations based on offset
+ * instead of object id. Such offset-based reverse lookups are performed in
+ * O(log n) time.
+ * </p>
+ * 
+ * @see PackIndex
+ * @see PackFile
+ */
+class PackReverseIndex {
+	/**
+	 * (offset31, truly) Offsets accommodating in 31 bits.
+	 */
+	private final int offsets32[];
+
+	/**
+	 * Offsets not accommodating in 31 bits.
+	 */
+	private final long offsets64[];
+
+	/**
+	 * Object ids corresponding to {@link #offsets32} and {@link #offsets64}.
+	 */
+	private final int names[];
+
+	/**
+	 * Create reverse index from straight/forward pack index, by indexing all
+	 * its entries.
+	 * 
+	 * @param index
+	 *            forward index - entries to (reverse) index.
+	 */
+	PackReverseIndex(final PackIndex index) {
+		final long count = index.getObjectCount();
+		if (count > Integer.MAX_VALUE)
+			throw new IllegalArgumentException(
+					"Huge indexes (> 2^31 entries) are not supported by jgit, yet");
+
+		final MutableEntry entries[] = new MutableEntry[(int) count];
+		int i = 0;
+		int count32 = 0;
+		for (MutableEntry me : index) {
+			entries[i++] = me.cloneEntry();
+			if (me.getOffset() <= Integer.MAX_VALUE)
+				count32++;
+		}
+		Arrays.sort(entries, new Comparator<MutableEntry>() {
+			public int compare(MutableEntry o1, MutableEntry o2) {
+				return Long.signum(o1.getOffset() - o2.getOffset());
+			}
+		});
+
+		names = new int[entries.length * Constants.OBJECT_ID_LENGTH / 4];
+		offsets32 = new int[count32];
+		offsets64 = new long[entries.length - count32];
+		for (int j = 0, j32 = 0; j < entries.length; j++) {
+			final long offset = entries[j].getOffset();
+			if (offset <= Integer.MAX_VALUE)
+				offsets32[j32++] = (int) offset;
+			else
+				offsets64[j - j32] = offset;
+			entries[j].copyRawTo(names, j * Constants.OBJECT_ID_LENGTH / 4);
+		}
+	}
+
+	/**
+	 * Search for object id with the specified start offset in this pack
+	 * (reverse) index.
+	 * 
+	 * @param offset
+	 *            start offset of object to find.
+	 * @return object id for this offset, or null if no object was found.
+	 */
+	ObjectId findObject(final long offset) {
+		if (offset <= Integer.MAX_VALUE) {
+			final int i32 = Arrays.binarySearch(offsets32, (int) offset);
+			if (i32 < 0)
+				return null;
+			final int iNames = i32 * Constants.OBJECT_ID_LENGTH / 4;
+			return ObjectId.fromRaw(names, iNames);
+		} else {
+			final int i64 = Arrays.binarySearch(offsets64, offset);
+			if (i64 < 0)
+				return null;
+			final int iNames = (i64 + offsets32.length)
+					* Constants.OBJECT_ID_LENGTH / 4;
+			return ObjectId.fromRaw(names, iNames);
+		}
+	}
+
+	/**
+	 * Search for the next offset to the specified offset in this pack (reverse)
+	 * index.
+	 * 
+	 * @param offset
+	 *            start offset of previous object (must be valid-existing
+	 *            offset).
+	 * @param maxOffset
+	 *            maximum offset in a pack (returned when there is no next
+	 *            offset).
+	 * @return offset of the next object in a pack or maxOffset if provided
+	 *         offset was the last one.
+	 * @throws CorruptObjectException
+	 *             when there is no object with the provided offset.
+	 */
+	long findNextOffset(final long offset, final long maxOffset)
+			throws CorruptObjectException {
+		if (offset <= Integer.MAX_VALUE) {
+			final int i32 = Arrays.binarySearch(offsets32, (int) offset);
+			if (i32 < 0)
+				throw new CorruptObjectException(
+						"Can't find object in (reverse) pack index for the specified offset "
+								+ offset);
+
+			if (i32 + 1 == offsets32.length) {
+				if (offsets64.length > 0)
+					return offsets64[0];
+				return maxOffset;
+			}
+			return offsets32[i32 + 1];
+		} else {
+			final int i64 = Arrays.binarySearch(offsets64, offset);
+			if (i64 < 0)
+				throw new CorruptObjectException(
+						"Can't find object in (reverse) pack index for the specified offset "
+								+ offset);
+
+			if (i64 + 1 == offsets64.length)
+				return maxOffset;
+			return offsets64[i64 + 1];
+		}
+	}
+}
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux