Navigating the pack index for every lookup takes time, mostly because it takes resources from the memory mapping for getting the actual objects. Signed-off-by: Robin Rosenberg <robin.rosenberg@xxxxxxxxxx> --- .../jgit/lib/DeltaRefPackedObjectLoader.java | 3 .../src/org/spearce/jgit/lib/ObjectId.java | 17 ++ .../src/org/spearce/jgit/lib/ObjectIdMap.java | 145 ++++++++++++++++++++ .../src/org/spearce/jgit/lib/PackFile.java | 87 ++++++------ .../src/org/spearce/jgit/lib/Repository.java | 17 +- .../tst/org/spearce/jgit/lib/ObjectIdMapTest.java | 170 +++++++++++++++++++++++ .../tst/org/spearce/jgit/lib/T0004_PackReader.java | 2 7 files changed, 377 insertions(+), 64 deletions(-) diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/DeltaRefPackedObjectLoader.java b/org.spearce.jgit/src/org/spearce/jgit/lib/DeltaRefPackedObjectLoader.java index 2719738..90c01ea 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/lib/DeltaRefPackedObjectLoader.java +++ b/org.spearce.jgit/src/org/spearce/jgit/lib/DeltaRefPackedObjectLoader.java @@ -15,8 +15,7 @@ class DeltaRefPackedObjectLoader extends DeltaPackedObjectLoader { } protected ObjectLoader getBaseLoader() throws IOException { - final ObjectLoader or = pack.get(deltaBase, - new byte[Constants.OBJECT_ID_LENGTH]); + final ObjectLoader or = pack.get(deltaBase); if (or == null) throw new MissingObjectException(deltaBase, "delta base"); return or; diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectId.java b/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectId.java index fea0d91..f99c303 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectId.java +++ b/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectId.java @@ -83,10 +83,20 @@ public class ObjectId implements Comparable { return new ObjectId(id); } - private static int compare(final byte[] a, final byte[] b) { - if (a==b) - return 0; + public int compareTo(byte[] b, long pos) { for (int k = 0; k < Constants.OBJECT_ID_LENGTH; k++) { + final int ak = id[k] & 0xff; + final int bk = b[k + (int)pos] & 0xff; + if (ak < bk) + return -1; + else if (ak > bk) + return 1; + } + return 0; + } + + private static int compare(final byte[] a, final byte[] b) { + for (int k = 0 ; k < Constants.OBJECT_ID_LENGTH; k++) { final int ak = a[k] & 0xff; final int bk = b[k] & 0xff; if (ak < bk) @@ -218,4 +228,5 @@ public class ObjectId implements Comparable { } return new String(s,0); } + } diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectIdMap.java b/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectIdMap.java new file mode 100644 index 0000000..c397a0d --- /dev/null +++ b/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectIdMap.java @@ -0,0 +1,145 @@ +/* + * Copyright (C) 2006 Robin Rosenberg <robin.rosenberg@xxxxxxxxxx> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License, version 2, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 + */ +package org.spearce.jgit.lib; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +/** Very much like a map, but specialized + * to partition the data on the first byte + * of the key. This is MUCH faster. See test + * class for how these numbers were derived. + * + * TreeMap= 2968 + * HashMap= 1689 + * Partitioned TreeMap=1499 + * Partitioned HashMap=1782 + * + * Inspiration from Git pack file format which uses this technique. + * + */ +public class ObjectIdMap implements Map { + + Map[] level0 = new Map[256]; + + public ObjectIdMap() { + this(new TreeMap()); + } + + public ObjectIdMap(Map sample) { + try { + Method m=sample.getClass().getMethod("clone", null); + for (int i=0; i<256; ++i) { + level0[i] = (Map)m.invoke(sample, null); + } + } catch (IllegalAccessException e) { + throw new IllegalArgumentException(e); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException(e); + } catch (InvocationTargetException e) { + throw new IllegalArgumentException(e); + } catch (SecurityException e) { + throw new IllegalArgumentException(e); + } catch (NoSuchMethodException e) { + throw new IllegalArgumentException(e); + } + } + + public void clear() { + for (int i=0; i<256; ++i) + level0[i].clear(); + } + + public boolean containsKey(Object key) { + return submap(key).containsKey(key); + } + + private final Map submap(Object key) { + return level0[((ObjectId)key).getFirstByte()]; + } + + public boolean containsValue(Object value) { + for (int i=0; i<256; ++i) + if (level0[i].containsValue(value)) + return true; + return false; + } + + public Set entrySet() { + Set ret = new HashSet(); + for (int i=0; i<256; ++i) + ret.addAll(level0[i].entrySet()); + return ret; + } + + public Object get(Object key) { + return submap(key).get(key); + } + + public boolean isEmpty() { + for (int i=0; i<256; ++i) + if (!level0[i].isEmpty()) + return false; + return true; + } + + public Set keySet() { + Set ret = new HashSet(); + for (int i=0; i<256; ++i) + ret.addAll(level0[i].keySet()); + return ret; + } + + public Object put(Object key, Object value) { + return submap(key).put(key, value); + } + + public void putAll(Map arg0) { + for (Iterator i=arg0.keySet().iterator(); i.hasNext(); ) { + Object k=i.next(); + Object v=arg0.get(k); + put(k,v); + } + } + + public Object remove(Object key) { + return submap(key).remove(key); + } + + public int size() { + int ret=0; + for (int i=0; i<256; ++i) + ret += level0[i].size(); + return ret; + } + + public Collection values() { + List ret=new ArrayList(size()); + for (int i=0; i<256; ++i) + ret.addAll(level0[i].values()); + return ret; + } + +} diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java b/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java index d33aa97..fa206fd 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java +++ b/org.spearce.jgit/src/org/spearce/jgit/lib/PackFile.java @@ -32,8 +32,7 @@ public class PackFile { private final WindowedFile pack; private final WindowedFile idx; - - private final long[] idxHeader; + private byte[][] idxdata; private long objectCnt; @@ -52,10 +51,9 @@ public class PackFile { .substring(0, dot) + ".idx"); // FIXME window size and mmap type should be configurable - idx = new WindowedFile(repo.getWindowCache(), idxFile, - 64 * 1024 * 1024, true); + idx = new WindowedFile(new WindowCache(8*1024*1024,1), idxFile, 8*1024*1024, true); try { - idxHeader = readIndexHeader(); + readIndexHeader(); } catch (IOException ioe) { try { idx.close(); @@ -75,7 +73,7 @@ public class PackFile { } ObjectLoader resolveBase(final long ofs) throws IOException { - return reader(ofs, new byte[Constants.OBJECT_ID_LENGTH]); + return reader(ofs); } /** @@ -87,19 +85,10 @@ public class PackFile { * * @param id * the object to look for. Must not be null. - * @param tmp - * a temporary buffer loaned to this pack for use during the - * search. This buffer must be at least - * {@link Constants#OBJECT_ID_LENGTH} bytes in size. The buffer - * will be overwritten during the search, but is unused upon - * return. * @return true if the object is in this pack; false otherwise. - * @throws IOException - * there was an error reading data from the pack's index file. */ - public boolean hasObject(final ObjectId id, final byte[] tmp) - throws IOException { - return findOffset(id, tmp) != -1; + public boolean hasObject(final ObjectId id) { + return findOffset(id) != -1; } /** @@ -116,25 +105,17 @@ public class PackFile { * * @param id * the object to obtain from the pack. Must not be null. - * @param tmp - * a temporary buffer loaned to this pack for use during the - * search, and given to the returned loader if the object is - * found. This buffer must be at least - * {@link Constants#OBJECT_ID_LENGTH} bytes in size. The buffer - * will be overwritten during the search. The buffer will be - * given to the loader if a loader is returned. If null is - * returned the caller may reuse the buffer. * @return the object loader for the requested object if it is contained in * this pack; null if the object was not found. * @throws IOException * the pack file or the index could not be read. */ - public PackedObjectLoader get(final ObjectId id, final byte[] tmp) + public PackedObjectLoader get(final ObjectId id) throws IOException { - final long offset = findOffset(id, tmp); + final long offset = findOffset(id); if (offset == -1) return null; - final PackedObjectLoader objReader = reader(offset, tmp); + final PackedObjectLoader objReader = reader(offset); objReader.setId(id); return objReader; } @@ -173,22 +154,35 @@ public class PackFile { objectCnt = pack.readUInt32(position, intbuf); } - private long[] readIndexHeader() throws CorruptObjectException, IOException { + private void readIndexHeader() throws CorruptObjectException, IOException { if (idx.length() != (IDX_HDR_LEN + (24 * objectCnt) + (2 * Constants.OBJECT_ID_LENGTH))) throw new CorruptObjectException("Invalid pack index"); - final long[] idxHeader = new long[256]; + final long[] idxHeader = new long[256]; // really unsigned 32-bit... final byte[] intbuf = new byte[4]; for (int k = 0; k < idxHeader.length; k++) idxHeader[k] = idx.readUInt32(k * 4, intbuf); - return idxHeader; + idxdata = new byte[idxHeader.length][]; + for (int k = 0; k < idxHeader.length; k++) { + int n; + if (k == 0) { + n = (int)(idxHeader[k]); + } else { + n = (int)(idxHeader[k]-idxHeader[k-1]); + } + if (n > 0) { + idxdata[k] = new byte[n * (Constants.OBJECT_ID_LENGTH + 4)]; + int off = (int) ((k == 0) ? 0 : idxHeader[k-1] * (Constants.OBJECT_ID_LENGTH + 4)); + idx.read(off + IDX_HDR_LEN, idxdata[k]); + } + } } - private PackedObjectLoader reader(final long objOffset, final byte[] ib) + private PackedObjectLoader reader(final long objOffset) throws IOException { long pos = objOffset; int p = 0; - + final byte[] ib = new byte[Constants.OBJECT_ID_LENGTH]; pack.readFully(pos, ib); int c = ib[p++] & 0xff; final int typeCode = (c >> 4) & 7; @@ -239,23 +233,26 @@ public class PackFile { return new WholePackedObjectLoader(this, pos, type, (int) size); } - private long findOffset(final ObjectId objId, final byte[] tmpid) - throws IOException { + private long findOffset(final ObjectId objId) { final int levelOne = objId.getFirstByte(); - long high = idxHeader[levelOne]; - long low = levelOne == 0 ? 0 : idxHeader[levelOne - 1]; - + byte[] data = idxdata[levelOne]; + if (data == null) + return -1; + long high = data.length / (4 + Constants.OBJECT_ID_LENGTH); + long low = 0; do { final long mid = (low + high) / 2; - final long pos = IDX_HDR_LEN - + ((4 + Constants.OBJECT_ID_LENGTH) * mid) + 4; - idx.readFully(pos, tmpid); - final int cmp = objId.compareTo(tmpid); + final long pos = ((4 + Constants.OBJECT_ID_LENGTH) * mid) + 4; + final int cmp = objId.compareTo(data, pos); if (cmp < 0) high = mid; - else if (cmp == 0) - return idx.readUInt32(pos - 4, tmpid); - else + else if (cmp == 0) { + int b0 = data[(int)pos-4] & 0xff; + int b1 = data[(int)pos-3] & 0xff; + int b2 = data[(int)pos-2] & 0xff; + int b3 = data[(int)pos-1] & 0xff; + return (((long)b0) << 24) | ( b1 << 16 ) | ( b2 << 8 ) | (b3); + } else low = mid + 1; } while (low < high); return -1; diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/Repository.java b/org.spearce.jgit/src/org/spearce/jgit/lib/Repository.java index 0f2a900..482f41d 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/lib/Repository.java +++ b/org.spearce.jgit/src/org/spearce/jgit/lib/Repository.java @@ -152,17 +152,9 @@ public class Repository { public boolean hasObject(final ObjectId objectId) { int k = packs.length; if (k > 0) { - final byte[] tmp = new byte[Constants.OBJECT_ID_LENGTH]; do { - try { - if (packs[--k].hasObject(objectId, tmp)) - return true; - } catch (IOException ioe) { - // This shouldn't happen unless the pack was corrupted - // after we opened it. We'll ignore the error as though - // the object does not exist in this pack. - // - } + if (packs[--k].hasObject(objectId)) + return true; } while (k > 0); } return toFile(objectId).isFile(); @@ -171,10 +163,9 @@ public class Repository { public ObjectLoader openObject(final ObjectId id) throws IOException { int k = packs.length; if (k > 0) { - final byte[] tmp = new byte[Constants.OBJECT_ID_LENGTH]; do { try { - final ObjectLoader ol = packs[--k].get(id, tmp); + final ObjectLoader ol = packs[--k].get(id); if (ol != null) return ol; } catch (IOException ioe) { @@ -185,7 +176,7 @@ public class Repository { // time to collect and try once more. try { System.gc(); - final ObjectLoader ol = packs[k].get(id, tmp); + final ObjectLoader ol = packs[k].get(id); if (ol != null) return ol; } catch (IOException ioe2) { diff --git a/org.spearce.jgit/tst/org/spearce/jgit/lib/ObjectIdMapTest.java b/org.spearce.jgit/tst/org/spearce/jgit/lib/ObjectIdMapTest.java new file mode 100644 index 0000000..f98c6f7 --- /dev/null +++ b/org.spearce.jgit/tst/org/spearce/jgit/lib/ObjectIdMapTest.java @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2006 Robin Rosenberg <robin.rosenberg@xxxxxxxxxx> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License, version 2, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 + */ +package org.spearce.jgit.lib; + +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; + +import junit.framework.TestCase; +import junit.textui.TestRunner; + +public class ObjectIdMapTest extends TestCase { + + ObjectId[] ids = new ObjectId[1000000]; + + protected void setUp() throws Exception { + int b=0; + for (int i=0; i<ids.length; ++i) { + byte[] data = new byte[Constants.OBJECT_ID_LENGTH]; + for (int j=0; j<Constants.OBJECT_ID_LENGTH; ++j) + data[j] = (byte) (b++^0xEE); + ids[i] = new ObjectId(data); + } + } + + protected void tearDown() throws Exception { + ids = null; // avoid out of memory + } + + public void testBoth() { + long d1=0; + long d2=0; + long d3=0; + long d4=0; + long d5=0; + long d6=0; + + for (int j=0; j<64; ++j) { + int x = + ((j & 1)!=0 ? 1 : 0) | + ((j & 2)!=0 ? 2 : 0) | + ((j & 4)!=0 ? 16 : 0) | + ((j & 8)!=0 ? 32 : 0) | + ((j & 16)!=0 ? 4 : 0) | + ((j & 32)!=0 ? 8 : 0); + + if ((x&1) == 0) { + long t0 = System.currentTimeMillis(); + + Map treeMap = new TreeMap(); + for (int i=0; i<ids.length; ++i) + treeMap.put(ids[i],ids[i]); + + long t1 = System.currentTimeMillis(); + d1 += t1-t0; + } + if ((x&2) == 0) { + long t0 = System.currentTimeMillis(); + Map hashMap = new HashMap(); + for (int i=0; i<ids.length; ++i) + hashMap.put(ids[i],ids[i]); + long t1 = System.currentTimeMillis(); + d2 += t1-t0; + } + + if ((x&4) == 0) { + long t0= System.currentTimeMillis(); + + Map levelMapWithTree = new ObjectIdMap(new TreeMap()); + for (int i=0; i<ids.length; ++i) + levelMapWithTree.put(ids[i],ids[i]); + + long t1 = System.currentTimeMillis(); + d3 += t1-t0; + } + + if ((x&8) == 0) { + long t0 = System.currentTimeMillis(); + Map levelMapWithHash = new ObjectIdMap(new HashMap()); + for (int i=0; i<ids.length; ++i) + levelMapWithHash.put(ids[i],ids[i]); + + long t1 = System.currentTimeMillis(); + + d4 += t1-t0; + } + + if ((x&16) == 0) { + long t0= System.currentTimeMillis(); + + Map levelMapWithTreeAndSpecialCompare = new ObjectIdMap(new TreeMap(new Comparator() { + + public int compare(Object arg0, Object arg1) { + byte[] b0=((ObjectId)arg0).getBytes(); + byte[] b1=((ObjectId)arg1).getBytes(); + for (int i=1; i<Constants.OBJECT_ID_LENGTH; ++i) { + int a=b0[i]&0xff; + int b=b1[i]&0xff; + int c=a-b; + if (c!=0) + return c; + } + return 0; + } + + })); + for (int i=0; i<ids.length; ++i) + levelMapWithTreeAndSpecialCompare.put(ids[i],ids[i]); + + long t1 = System.currentTimeMillis(); + d5 += t1-t0; + } + + if ((j&32) == 0) { + long t0= System.currentTimeMillis(); + + Map levelMapWithTreeAndSpecialCompare = new ObjectIdMap(new TreeMap(new Comparator() { + + public int compare(Object arg0, Object arg1) { + return ((Comparable)arg0).compareTo(arg1); + } + + })); + for (int i=0; i<ids.length; ++i) + levelMapWithTreeAndSpecialCompare.put(ids[i],ids[i]); + + long t1 = System.currentTimeMillis(); + d6 += t1-t0; + } + } + + System.out.println("TreeMap ="+d1); + System.out.println("HashMap ="+d2); + System.out.println("Partitioned TreeMap ObjectId.compare ="+d3); + System.out.println("Partitioned HashMap ="+d4); + System.out.println("Partitioned TreeMap enhanced compare ="+d5); + System.out.println("Partitioned TreeMap dummy compare ="+d6); + assertEquals(d5*10/10000, d2*8/10000); // d5 is ~20% better + } + + public void testFunc() { + Map treeMap = new TreeMap(); + for (int i=0; i<ids.length/100; ++i) + treeMap.put(ids[i],ids[i]); + Map levelMapWithTree = new ObjectIdMap(new TreeMap()); + for (int i=0; i<ids.length/100; ++i) + levelMapWithTree.put(ids[i],ids[i]); + + assertEquals(treeMap, levelMapWithTree); + } + + public static void main(String[] args) { + TestRunner.run(ObjectIdMapTest.class); + } +} diff --git a/org.spearce.jgit/tst/org/spearce/jgit/lib/T0004_PackReader.java b/org.spearce.jgit/tst/org/spearce/jgit/lib/T0004_PackReader.java index 24f6b03..118415d 100644 --- a/org.spearce.jgit/tst/org/spearce/jgit/lib/T0004_PackReader.java +++ b/org.spearce.jgit/tst/org/spearce/jgit/lib/T0004_PackReader.java @@ -30,7 +30,7 @@ public class T0004_PackReader extends RepositoryTestCase { id = new ObjectId("902d5476fa249b7abc9d84c611577a81381f0327"); pr = new PackFile(db, TEST_PACK); - or = pr.get(id, new byte[Constants.OBJECT_ID_LENGTH]); + or = pr.get(id); assertNotNull(or); assertEquals(id, or.getId()); assertEquals(Constants.TYPE_TREE, or.getType()); - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html