Let us quickly find ObjectId or next object for specified offset in a pack, in O(log n) time. Current implementation uses one level (reverse) indexing by an offset. However, it tries to take small space as possible, by using 2 distinct arrays for offsets with value < 2^31 (int[]) and those with value > 2^31 (long[]). Offsets are stored ordered in these 2 arrays. Binary search is performed during requests. Reverse index is constructed from an existing PackIndex instance, by lazy initialization in a PackFile instance. Signed-off-by: Marek Zawirski <marek.zawirski@xxxxxxxxx> --- .../src/org/spearce/jgit/lib/PackFile.java | 20 +++ .../src/org/spearce/jgit/lib/PackReverseIndex.java | 179 ++++++++++++++++++++ 2 files changed, 199 insertions(+), 0 deletions(-) create mode 100644 org.spearce.jgit/src/org/spearce/jgit/lib/PackReverseIndex.java diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java b/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java index 1b2c167..3880966 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java +++ b/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java @@ -55,6 +55,8 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> { private final PackIndex idx; + private PackReverseIndex reverseIdx; + /** * Construct a reader for an existing, pre-indexed packfile. * @@ -172,6 +174,18 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> { return idx.getObjectCount(); } + /** + * Search for object id with the specified start offset in associated pack + * (reverse) index. + * + * @param offset + * start offset of object to find + * @return object id for this offset, or null if no object was found + */ + ObjectId findObjectForOffset(final long offset) { + return getReverseIdx().findObject(offset); + } + final UnpackedObjectCache.Entry readCache(final long position) { return UnpackedObjectCache.get(pack, position); } @@ -264,4 +278,10 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> { throw new IOException("Unknown object type " + typeCode + "."); } } + + private PackReverseIndex getReverseIdx() { + if (reverseIdx == null) + reverseIdx = new PackReverseIndex(idx); + return reverseIdx; + } } diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/PackReverseIndex.java b/org.spearce.jgit/src/org/spearce/jgit/lib/PackReverseIndex.java new file mode 100644 index 0000000..3dede88 --- /dev/null +++ b/org.spearce.jgit/src/org/spearce/jgit/lib/PackReverseIndex.java @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2008, Marek Zawirski <marek.zawirski@xxxxxxxxx> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Git Development Community nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.spearce.jgit.lib; + +import java.util.Arrays; +import java.util.Comparator; + +import org.spearce.jgit.errors.CorruptObjectException; +import org.spearce.jgit.lib.PackIndex.MutableEntry; + +/** + * <p> + * Reverse index for forward pack index. Provides operations based on offset + * instead of object id. Such offset-based reverse lookups are performed in + * O(log n) time. + * </p> + * + * @see PackIndex + * @see PackFile + */ +class PackReverseIndex { + /** + * (offset31, truly) Offsets accommodating in 31 bits. + */ + private final int offsets32[]; + + /** + * Offsets not accommodating in 31 bits. + */ + private final long offsets64[]; + + /** + * Object ids corresponding to {@link #offsets32} and {@link #offsets64}. + */ + private final int names[]; + + /** + * Create reverse index from straight/forward pack index, by indexing all + * its entries. + * + * @param index + * forward index - entries to (reverse) index. + */ + PackReverseIndex(final PackIndex index) { + final long count = index.getObjectCount(); + if (count > Integer.MAX_VALUE) + throw new IllegalArgumentException( + "Huge indexes (> 2^31 entries) are not supported by jgit, yet"); + + final MutableEntry entries[] = new MutableEntry[(int) count]; + int i = 0; + int count32 = 0; + for (MutableEntry me : index) { + entries[i++] = me.cloneEntry(); + if (me.getOffset() <= Integer.MAX_VALUE) + count32++; + } + Arrays.sort(entries, new Comparator<MutableEntry>() { + public int compare(MutableEntry o1, MutableEntry o2) { + return Long.signum(o1.getOffset() - o2.getOffset()); + } + }); + + names = new int[entries.length * Constants.OBJECT_ID_LENGTH / 4]; + offsets32 = new int[count32]; + offsets64 = new long[entries.length - count32]; + for (int j = 0, j32 = 0; j < entries.length; j++) { + final long offset = entries[j].getOffset(); + if (offset <= Integer.MAX_VALUE) + offsets32[j32++] = (int) offset; + else + offsets64[j - j32] = offset; + entries[j].copyRawTo(names, j * Constants.OBJECT_ID_LENGTH / 4); + } + } + + /** + * Search for object id with the specified start offset in this pack + * (reverse) index. + * + * @param offset + * start offset of object to find. + * @return object id for this offset, or null if no object was found. + */ + ObjectId findObject(final long offset) { + if (offset <= Integer.MAX_VALUE) { + final int i32 = Arrays.binarySearch(offsets32, (int) offset); + if (i32 < 0) + return null; + final int iNames = i32 * Constants.OBJECT_ID_LENGTH / 4; + return ObjectId.fromRaw(names, iNames); + } else { + final int i64 = Arrays.binarySearch(offsets64, offset); + if (i64 < 0) + return null; + final int iNames = (i64 + offsets32.length) + * Constants.OBJECT_ID_LENGTH / 4; + return ObjectId.fromRaw(names, iNames); + } + } + + /** + * Search for the next offset to the specified offset in this pack (reverse) + * index. + * + * @param offset + * start offset of previous object (must be valid-existing + * offset). + * @param maxOffset + * maximum offset in a pack (returned when there is no next + * offset). + * @return offset of the next object in a pack or maxOffset if provided + * offset was the last one. + * @throws CorruptObjectException + * when there is no object with the provided offset. + */ + long findNextOffset(final long offset, final long maxOffset) + throws CorruptObjectException { + if (offset <= Integer.MAX_VALUE) { + final int i32 = Arrays.binarySearch(offsets32, (int) offset); + if (i32 < 0) + throw new CorruptObjectException( + "Can't find object in (reverse) pack index for the specified offset " + + offset); + + if (i32 + 1 == offsets32.length) { + if (offsets64.length > 0) + return offsets64[0]; + return maxOffset; + } + return offsets32[i32 + 1]; + } else { + final int i64 = Arrays.binarySearch(offsets64, offset); + if (i64 < 0) + throw new CorruptObjectException( + "Can't find object in (reverse) pack index for the specified offset " + + offset); + + if (i64 + 1 == offsets64.length) + return maxOffset; + return offsets64[i64 + 1]; + } + } +} -- 1.5.5.1 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html