When indexing a pack file coming in from the network there may be an object contained in the pack that we already have. Its only safe to retain that object in our newly stored pack if it exactly matches the content we already have on disk. Storing a different content for the same object name runs the risk of allowing a malicious attacker to replace an object in our store. Data validation is performed the first time we have the object in memory, which occurs when we first compute its SHA-1. This is the earliest opportunity we have to validate that there is no collision, and it reduces the number of times we need to do a read for an object. However our memory usage when we process a whole object is now increased as the collision test is done by loading the entire object into memory. Future improvements may permit a streaming compare of the objects. Signed-off-by: Shawn O. Pearce <spearce@xxxxxxxxxxx> --- This is only the first in what will be a series to add basic fsck support to jgit. My day-job application is accepting untrusted input through IndexPack so I need to fully validate the objects are sane before I can allow them to enter the local repository. In the end we'll honor receive.fsckObjects. But right now this is enough to detect evil SHA-1 collisions, and is the same level of protection that git.git's index-pack has. I'm offering this up for comment because its done and ready for inclusion. The rest of the object fsck rules are going to take a bit more time, but I hope to have most of them done tomorrow or the day after. .../src/org/spearce/jgit/transport/IndexPack.java | 63 +++++++++++++++++--- 1 files changed, 54 insertions(+), 9 deletions(-) diff --git a/org.spearce.jgit/src/org/spearce/jgit/transport/IndexPack.java b/org.spearce.jgit/src/org/spearce/jgit/transport/IndexPack.java index bc52896..8a66ec9 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/transport/IndexPack.java +++ b/org.spearce.jgit/src/org/spearce/jgit/transport/IndexPack.java @@ -375,6 +375,7 @@ private void resolveDeltas(final long pos, final int oldCRC, int type, objectDigest.update(data); tempObjectId.fromRaw(objectDigest.digest(), 0); + verifyNoCollision(type, data); oe = new PackedObjectInfo(pos, crc32, tempObjectId); entries[entryCount++] = oe; } @@ -622,7 +623,7 @@ private void indexOneObject() throws IOException { r = new ArrayList<UnresolvedDelta>(8); baseByPos.put(base, r); } - inflateFromInput(false); + skipInflateFromInput(sz); r.add(new UnresolvedDelta(pos, (int) crc.getValue())); deltaCount++; break; @@ -637,7 +638,7 @@ private void indexOneObject() throws IOException { r = new ArrayList<UnresolvedDelta>(8); baseById.put(base, r); } - inflateFromInput(false); + skipInflateFromInput(sz); r.add(new UnresolvedDelta(pos, (int) crc.getValue())); deltaCount++; break; @@ -649,17 +650,30 @@ private void indexOneObject() throws IOException { private void whole(final int type, final long pos, final long sz) throws IOException { + final byte[] data = inflateFromInput(sz); objectDigest.update(Constants.encodedTypeString(type)); objectDigest.update((byte) ' '); objectDigest.update(Constants.encodeASCII(sz)); objectDigest.update((byte) 0); - inflateFromInput(true); + objectDigest.update(data); tempObjectId.fromRaw(objectDigest.digest(), 0); + verifyNoCollision(type, data); final int crc32 = (int) crc.getValue(); entries[entryCount++] = new PackedObjectInfo(pos, crc32, tempObjectId); } + private void verifyNoCollision(final int type, final byte[] data) + throws IOException { + final ObjectLoader ldr = repo.openObject(tempObjectId); + if (ldr != null) { + final byte[] existingData = ldr.getCachedBytes(); + if (ldr.getType() != type || !Arrays.equals(data, existingData)) { + throw new IOException("collision in " + tempObjectId.name()); + } + } + } + // Current position of {@link #bOffset} within the entire file. private long position() { return bBase + bOffset; @@ -747,7 +761,7 @@ private void sync() throws IOException { bOffset = 0; } - private void inflateFromInput(final boolean digest) throws IOException { + private void skipInflateFromInput(long sz) throws IOException { final Inflater inf = inflater; try { final byte[] dst = objectData; @@ -765,21 +779,52 @@ private void inflateFromInput(final boolean digest) throws IOException { int free = dst.length - n; if (free < 8) { - if (digest) - objectDigest.update(dst, 0, n); + sz -= n; n = 0; free = dst.length; } - n += inf.inflate(dst, n, free); } - if (digest) - objectDigest.update(dst, 0, n); + if (n != sz) + throw new DataFormatException("wrong decompressed length"); + n = bAvail - inf.getRemaining(); + if (n > 0) { + crc.update(buf, p, n); + use(n); + } + } catch (DataFormatException dfe) { + throw corrupt(dfe); + } finally { + inf.reset(); + } + } + + private byte[] inflateFromInput(final long sz) throws IOException { + final byte[] dst = new byte[(int) sz]; + final Inflater inf = inflater; + try { + int n = 0; + int p = -1; + while (!inf.finished()) { + if (inf.needsInput()) { + if (p >= 0) { + crc.update(buf, p, bAvail); + use(bAvail); + } + p = fillFromInput(1); + inf.setInput(buf, p, bAvail); + } + + n += inf.inflate(dst, n, dst.length - n); + } + if (n != sz) + throw new DataFormatException("wrong decompressed length"); n = bAvail - inf.getRemaining(); if (n > 0) { crc.update(buf, p, n); use(n); } + return dst; } catch (DataFormatException dfe) { throw corrupt(dfe); } finally { -- 1.6.0.2.513.g6dbd -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html