Re: x86 SHA1: Faster than OpenSSL

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> It would be better to have a more git-centric benchmark that actually 
> shows some real git load, rather than a sha1-only microbenchmark.
> 
> The thing that I'd prefer is simply
>
>	git fsck --full
>
> on the Linux kernel archive. For me (with a fast machine), it takes about 
> 4m30s with the OpenSSL SHA1, and takes 6m40s with the Mozilla SHA1 (ie 
> using a NO_OPENSSL=1 build).

The actual goal of this effort is to address the dynamic linker startup
time issues by removing the second-largest contributor after libcurl,
namely openssl.  Optimizing the assembly code is just the fun part. ;-)

Anyway, on the git repository:

[1273]$ time x/git-fsck --full			(New SHA1 code)
dangling tree 524973049a7e4593df4af41e0564912f678a41ac
dangling tree 7da7d73185a1df5c2a477d2ee5599ac8a58cad56

real    0m59.306s
user    0m58.760s
sys     0m0.550s
[1274]$ time ./git-fsck --full			(OpenSSL)
dangling tree 524973049a7e4593df4af41e0564912f678a41ac
dangling tree 7da7d73185a1df5c2a477d2ee5599ac8a58cad56

real    1m0.364s
user    0m59.970s
sys     0m0.400s

1.6% is a pretty minor difference, especially as the machine is running
a backup at the time (but it's a quad-core, with near-zero CPU usage;
the business is all I/O).

On the full Linux repository, I repacked it first to make sure that
everything was in RAM, and I have the first result:

[517]$ time ~/git/x/git-fsck --full		(New SHA1 code)

real    10m12.702s
user    9m48.410s
sys     0m23.350s
[518]$ time ~/git/git-fsck --full		(OpenSSL)

real    10m26.083s
user    10m2.800s
sys     0m22.000s

Again, 2.2% is not a huge improvement.  But my only goal was not to be worse.

> So that's an example of a load that is actually very sensitive to SHA1 
> performance (more so than _most_ git loads, I suspect), and at the same 
> time is a real git load rather than some SHA1-only microbenchmark. It also 
> shows very clearly why we default to the OpenSSL version over the Mozilla 
> one.

I wasn't questioning *that*.  As I said, I was just doing the fun part
of importing a heavily-optimized OpenSSL-like SHA1 implementation into
the git source tree.

(The un-fun part is modifying the build process to detect the target
processor and include the right asm automatically.)

Anyway, if you want to test it, here's a crude x86_32-only patch to the
git tree.  "make NO_OPENSSL=1" to use the new code.

diff --git a/Makefile b/Makefile
index daf4296..8531c39 100644
--- a/Makefile
+++ b/Makefile
@@ -1176,8 +1176,10 @@ ifdef ARM_SHA1
 	LIB_OBJS += arm/sha1.o arm/sha1_arm.o
 else
 ifdef MOZILLA_SHA1
-	SHA1_HEADER = "mozilla-sha1/sha1.h"
-	LIB_OBJS += mozilla-sha1/sha1.o
+#	SHA1_HEADER = "mozilla-sha1/sha1.h"
+#	LIB_OBJS += mozilla-sha1/sha1.o
+	SHA1_HEADER = "x86/sha1.h"
+	LIB_OBJS += x86/sha1.o x86/sha1-x86.o
 else
 	SHA1_HEADER = <openssl/sha.h>
 	EXTLIBS += $(LIB_4_CRYPTO)
diff --git a/x86/sha1-x86.s b/x86/sha1-x86.s
new file mode 100644
index 0000000..96796d4
--- /dev/null
+++ b/x86/sha1-x86.s
@@ -0,0 +1,1372 @@
+.file	"sha1-586.s"
+.text
+.globl	sha1_block_data_order
+.type	sha1_block_data_order,@function
+.align	16
+sha1_block_data_order:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	movl	28(%esp),%eax
+	subl	$64,%esp
+	shll	$6,%eax
+	addl	%esi,%eax
+	movl	%eax,92(%esp)
+	movl	16(%edi),%ebp
+	movl	12(%edi),%edx
+.align	16
+.L000loop:
+	movl	(%esi),%ecx
+	movl	4(%esi),%ebx
+	bswap	%ecx
+	movl	8(%esi),%eax
+	bswap	%ebx
+	movl	%ecx,(%esp)
+	movl	12(%esi),%ecx
+	bswap	%eax
+	movl	%ebx,4(%esp)
+	movl	16(%esi),%ebx
+	bswap	%ecx
+	movl	%eax,8(%esp)
+	movl	20(%esi),%eax
+	bswap	%ebx
+	movl	%ecx,12(%esp)
+	movl	24(%esi),%ecx
+	bswap	%eax
+	movl	%ebx,16(%esp)
+	movl	28(%esi),%ebx
+	bswap	%ecx
+	movl	%eax,20(%esp)
+	movl	32(%esi),%eax
+	bswap	%ebx
+	movl	%ecx,24(%esp)
+	movl	36(%esi),%ecx
+	bswap	%eax
+	movl	%ebx,28(%esp)
+	movl	40(%esi),%ebx
+	bswap	%ecx
+	movl	%eax,32(%esp)
+	movl	44(%esi),%eax
+	bswap	%ebx
+	movl	%ecx,36(%esp)
+	movl	48(%esi),%ecx
+	bswap	%eax
+	movl	%ebx,40(%esp)
+	movl	52(%esi),%ebx
+	bswap	%ecx
+	movl	%eax,44(%esp)
+	movl	56(%esi),%eax
+	bswap	%ebx
+	movl	%ecx,48(%esp)
+	movl	60(%esi),%ecx
+	bswap	%eax
+	movl	%ebx,52(%esp)
+	bswap	%ecx
+	movl	%eax,56(%esp)
+	movl	%ecx,60(%esp)
+	movl	%esi,88(%esp)
+	movl	8(%edi),%ecx
+	movl	4(%edi),%ebx
+	movl	(%edi),%eax
+	/* 00_15 0 */
+	movl	%edx,%edi
+	movl	(%esp),%esi
+	xorl	%ecx,%edi
+	andl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	1518500249(%ebp,%esi,1),%ebp
+	movl	%eax,%esi
+	xorl	%edx,%edi
+	roll	$5,%esi
+	addl	%edi,%ebp
+	movl	%ecx,%edi
+	addl	%esi,%ebp
+	/* 00_15 1 */
+	movl	4(%esp),%esi
+	xorl	%ebx,%edi
+	andl	%eax,%edi
+	rorl	$2,%eax
+	leal	1518500249(%edx,%esi,1),%edx
+	movl	%ebp,%esi
+	xorl	%ecx,%edi
+	roll	$5,%esi
+	addl	%edi,%edx
+	movl	%ebx,%edi
+	addl	%esi,%edx
+	/* 00_15 2 */
+	movl	8(%esp),%esi
+	xorl	%eax,%edi
+	andl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	1518500249(%ecx,%esi,1),%ecx
+	movl	%edx,%esi
+	xorl	%ebx,%edi
+	roll	$5,%esi
+	addl	%edi,%ecx
+	movl	%eax,%edi
+	addl	%esi,%ecx
+	/* 00_15 3 */
+	movl	12(%esp),%esi
+	xorl	%ebp,%edi
+	andl	%edx,%edi
+	rorl	$2,%edx
+	leal	1518500249(%ebx,%esi,1),%ebx
+	movl	%ecx,%esi
+	xorl	%eax,%edi
+	roll	$5,%esi
+	addl	%edi,%ebx
+	movl	%ebp,%edi
+	addl	%esi,%ebx
+	/* 00_15 4 */
+	movl	16(%esp),%esi
+	xorl	%edx,%edi
+	andl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	1518500249(%eax,%esi,1),%eax
+	movl	%ebx,%esi
+	xorl	%ebp,%edi
+	roll	$5,%esi
+	addl	%edi,%eax
+	movl	%edx,%edi
+	addl	%esi,%eax
+	/* 00_15 5 */
+	movl	20(%esp),%esi
+	xorl	%ecx,%edi
+	andl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	1518500249(%ebp,%esi,1),%ebp
+	movl	%eax,%esi
+	xorl	%edx,%edi
+	roll	$5,%esi
+	addl	%edi,%ebp
+	movl	%ecx,%edi
+	addl	%esi,%ebp
+	/* 00_15 6 */
+	movl	24(%esp),%esi
+	xorl	%ebx,%edi
+	andl	%eax,%edi
+	rorl	$2,%eax
+	leal	1518500249(%edx,%esi,1),%edx
+	movl	%ebp,%esi
+	xorl	%ecx,%edi
+	roll	$5,%esi
+	addl	%edi,%edx
+	movl	%ebx,%edi
+	addl	%esi,%edx
+	/* 00_15 7 */
+	movl	28(%esp),%esi
+	xorl	%eax,%edi
+	andl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	1518500249(%ecx,%esi,1),%ecx
+	movl	%edx,%esi
+	xorl	%ebx,%edi
+	roll	$5,%esi
+	addl	%edi,%ecx
+	movl	%eax,%edi
+	addl	%esi,%ecx
+	/* 00_15 8 */
+	movl	32(%esp),%esi
+	xorl	%ebp,%edi
+	andl	%edx,%edi
+	rorl	$2,%edx
+	leal	1518500249(%ebx,%esi,1),%ebx
+	movl	%ecx,%esi
+	xorl	%eax,%edi
+	roll	$5,%esi
+	addl	%edi,%ebx
+	movl	%ebp,%edi
+	addl	%esi,%ebx
+	/* 00_15 9 */
+	movl	36(%esp),%esi
+	xorl	%edx,%edi
+	andl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	1518500249(%eax,%esi,1),%eax
+	movl	%ebx,%esi
+	xorl	%ebp,%edi
+	roll	$5,%esi
+	addl	%edi,%eax
+	movl	%edx,%edi
+	addl	%esi,%eax
+	/* 00_15 10 */
+	movl	40(%esp),%esi
+	xorl	%ecx,%edi
+	andl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	1518500249(%ebp,%esi,1),%ebp
+	movl	%eax,%esi
+	xorl	%edx,%edi
+	roll	$5,%esi
+	addl	%edi,%ebp
+	movl	%ecx,%edi
+	addl	%esi,%ebp
+	/* 00_15 11 */
+	movl	44(%esp),%esi
+	xorl	%ebx,%edi
+	andl	%eax,%edi
+	rorl	$2,%eax
+	leal	1518500249(%edx,%esi,1),%edx
+	movl	%ebp,%esi
+	xorl	%ecx,%edi
+	roll	$5,%esi
+	addl	%edi,%edx
+	movl	%ebx,%edi
+	addl	%esi,%edx
+	/* 00_15 12 */
+	movl	48(%esp),%esi
+	xorl	%eax,%edi
+	andl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	1518500249(%ecx,%esi,1),%ecx
+	movl	%edx,%esi
+	xorl	%ebx,%edi
+	roll	$5,%esi
+	addl	%edi,%ecx
+	movl	%eax,%edi
+	addl	%esi,%ecx
+	/* 00_15 13 */
+	movl	52(%esp),%esi
+	xorl	%ebp,%edi
+	andl	%edx,%edi
+	rorl	$2,%edx
+	leal	1518500249(%ebx,%esi,1),%ebx
+	movl	%ecx,%esi
+	xorl	%eax,%edi
+	roll	$5,%esi
+	addl	%edi,%ebx
+	movl	%ebp,%edi
+	addl	%esi,%ebx
+	/* 00_15 14 */
+	movl	56(%esp),%esi
+	xorl	%edx,%edi
+	andl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	1518500249(%eax,%esi,1),%eax
+	movl	%ebx,%esi
+	xorl	%ebp,%edi
+	roll	$5,%esi
+	addl	%edi,%eax
+	movl	%edx,%edi
+	addl	%esi,%eax
+	/* 00_15 15 */
+	movl	60(%esp),%esi
+	xorl	%ecx,%edi
+	andl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	1518500249(%ebp,%esi,1),%ebp
+	xorl	%edx,%edi
+	movl	(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	8(%esp),%esi
+	roll	$5,%edi
+	xorl	32(%esp),%esi
+	/* 16_19 16 */
+	xorl	52(%esp),%esi
+	addl	%edi,%ebp
+	movl	%ecx,%edi
+	roll	$1,%esi
+	xorl	%ebx,%edi
+	movl	%esi,(%esp)
+	andl	%eax,%edi
+	rorl	$2,%eax
+	leal	1518500249(%edx,%esi,1),%edx
+	movl	4(%esp),%esi
+	xorl	%ecx,%edi
+	xorl	12(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	36(%esp),%esi
+	roll	$5,%edi
+	/* 16_19 17 */
+	xorl	56(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebx,%edi
+	roll	$1,%esi
+	xorl	%eax,%edi
+	movl	%esi,4(%esp)
+	andl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	1518500249(%ecx,%esi,1),%ecx
+	movl	8(%esp),%esi
+	xorl	%ebx,%edi
+	xorl	16(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	40(%esp),%esi
+	roll	$5,%edi
+	/* 16_19 18 */
+	xorl	60(%esp),%esi
+	addl	%edi,%ecx
+	movl	%eax,%edi
+	roll	$1,%esi
+	xorl	%ebp,%edi
+	movl	%esi,8(%esp)
+	andl	%edx,%edi
+	rorl	$2,%edx
+	leal	1518500249(%ebx,%esi,1),%ebx
+	movl	12(%esp),%esi
+	xorl	%eax,%edi
+	xorl	20(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	44(%esp),%esi
+	roll	$5,%edi
+	/* 16_19 19 */
+	xorl	(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ebp,%edi
+	roll	$1,%esi
+	xorl	%edx,%edi
+	movl	%esi,12(%esp)
+	andl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	1518500249(%eax,%esi,1),%eax
+	movl	16(%esp),%esi
+	xorl	%ebp,%edi
+	xorl	24(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	48(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 20 */
+	xorl	4(%esp),%esi
+	addl	%edi,%eax
+	roll	$1,%esi
+	movl	%edx,%edi
+	movl	%esi,16(%esp)
+	xorl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	1859775393(%ebp,%esi,1),%ebp
+	movl	20(%esp),%esi
+	xorl	%ecx,%edi
+	xorl	28(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	52(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 21 */
+	xorl	8(%esp),%esi
+	addl	%edi,%ebp
+	roll	$1,%esi
+	movl	%ecx,%edi
+	movl	%esi,20(%esp)
+	xorl	%eax,%edi
+	rorl	$2,%eax
+	leal	1859775393(%edx,%esi,1),%edx
+	movl	24(%esp),%esi
+	xorl	%ebx,%edi
+	xorl	32(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	56(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 22 */
+	xorl	12(%esp),%esi
+	addl	%edi,%edx
+	roll	$1,%esi
+	movl	%ebx,%edi
+	movl	%esi,24(%esp)
+	xorl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	1859775393(%ecx,%esi,1),%ecx
+	movl	28(%esp),%esi
+	xorl	%eax,%edi
+	xorl	36(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	60(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 23 */
+	xorl	16(%esp),%esi
+	addl	%edi,%ecx
+	roll	$1,%esi
+	movl	%eax,%edi
+	movl	%esi,28(%esp)
+	xorl	%edx,%edi
+	rorl	$2,%edx
+	leal	1859775393(%ebx,%esi,1),%ebx
+	movl	32(%esp),%esi
+	xorl	%ebp,%edi
+	xorl	40(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 24 */
+	xorl	20(%esp),%esi
+	addl	%edi,%ebx
+	roll	$1,%esi
+	movl	%ebp,%edi
+	movl	%esi,32(%esp)
+	xorl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	36(%esp),%esi
+	xorl	%edx,%edi
+	xorl	44(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	4(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 25 */
+	xorl	24(%esp),%esi
+	addl	%edi,%eax
+	roll	$1,%esi
+	movl	%edx,%edi
+	movl	%esi,36(%esp)
+	xorl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	1859775393(%ebp,%esi,1),%ebp
+	movl	40(%esp),%esi
+	xorl	%ecx,%edi
+	xorl	48(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	8(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 26 */
+	xorl	28(%esp),%esi
+	addl	%edi,%ebp
+	roll	$1,%esi
+	movl	%ecx,%edi
+	movl	%esi,40(%esp)
+	xorl	%eax,%edi
+	rorl	$2,%eax
+	leal	1859775393(%edx,%esi,1),%edx
+	movl	44(%esp),%esi
+	xorl	%ebx,%edi
+	xorl	52(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	12(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 27 */
+	xorl	32(%esp),%esi
+	addl	%edi,%edx
+	roll	$1,%esi
+	movl	%ebx,%edi
+	movl	%esi,44(%esp)
+	xorl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	1859775393(%ecx,%esi,1),%ecx
+	movl	48(%esp),%esi
+	xorl	%eax,%edi
+	xorl	56(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	16(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 28 */
+	xorl	36(%esp),%esi
+	addl	%edi,%ecx
+	roll	$1,%esi
+	movl	%eax,%edi
+	movl	%esi,48(%esp)
+	xorl	%edx,%edi
+	rorl	$2,%edx
+	leal	1859775393(%ebx,%esi,1),%ebx
+	movl	52(%esp),%esi
+	xorl	%ebp,%edi
+	xorl	60(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	20(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 29 */
+	xorl	40(%esp),%esi
+	addl	%edi,%ebx
+	roll	$1,%esi
+	movl	%ebp,%edi
+	movl	%esi,52(%esp)
+	xorl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	56(%esp),%esi
+	xorl	%edx,%edi
+	xorl	(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	24(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 30 */
+	xorl	44(%esp),%esi
+	addl	%edi,%eax
+	roll	$1,%esi
+	movl	%edx,%edi
+	movl	%esi,56(%esp)
+	xorl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	1859775393(%ebp,%esi,1),%ebp
+	movl	60(%esp),%esi
+	xorl	%ecx,%edi
+	xorl	4(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	28(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 31 */
+	xorl	48(%esp),%esi
+	addl	%edi,%ebp
+	roll	$1,%esi
+	movl	%ecx,%edi
+	movl	%esi,60(%esp)
+	xorl	%eax,%edi
+	rorl	$2,%eax
+	leal	1859775393(%edx,%esi,1),%edx
+	movl	(%esp),%esi
+	xorl	%ebx,%edi
+	xorl	8(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	32(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 32 */
+	xorl	52(%esp),%esi
+	addl	%edi,%edx
+	roll	$1,%esi
+	movl	%ebx,%edi
+	movl	%esi,(%esp)
+	xorl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	1859775393(%ecx,%esi,1),%ecx
+	movl	4(%esp),%esi
+	xorl	%eax,%edi
+	xorl	12(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	36(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 33 */
+	xorl	56(%esp),%esi
+	addl	%edi,%ecx
+	roll	$1,%esi
+	movl	%eax,%edi
+	movl	%esi,4(%esp)
+	xorl	%edx,%edi
+	rorl	$2,%edx
+	leal	1859775393(%ebx,%esi,1),%ebx
+	movl	8(%esp),%esi
+	xorl	%ebp,%edi
+	xorl	16(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	40(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 34 */
+	xorl	60(%esp),%esi
+	addl	%edi,%ebx
+	roll	$1,%esi
+	movl	%ebp,%edi
+	movl	%esi,8(%esp)
+	xorl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	12(%esp),%esi
+	xorl	%edx,%edi
+	xorl	20(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	44(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 35 */
+	xorl	(%esp),%esi
+	addl	%edi,%eax
+	roll	$1,%esi
+	movl	%edx,%edi
+	movl	%esi,12(%esp)
+	xorl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	1859775393(%ebp,%esi,1),%ebp
+	movl	16(%esp),%esi
+	xorl	%ecx,%edi
+	xorl	24(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	48(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 36 */
+	xorl	4(%esp),%esi
+	addl	%edi,%ebp
+	roll	$1,%esi
+	movl	%ecx,%edi
+	movl	%esi,16(%esp)
+	xorl	%eax,%edi
+	rorl	$2,%eax
+	leal	1859775393(%edx,%esi,1),%edx
+	movl	20(%esp),%esi
+	xorl	%ebx,%edi
+	xorl	28(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	52(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 37 */
+	xorl	8(%esp),%esi
+	addl	%edi,%edx
+	roll	$1,%esi
+	movl	%ebx,%edi
+	movl	%esi,20(%esp)
+	xorl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	1859775393(%ecx,%esi,1),%ecx
+	movl	24(%esp),%esi
+	xorl	%eax,%edi
+	xorl	32(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	56(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 38 */
+	xorl	12(%esp),%esi
+	addl	%edi,%ecx
+	roll	$1,%esi
+	movl	%eax,%edi
+	movl	%esi,24(%esp)
+	xorl	%edx,%edi
+	rorl	$2,%edx
+	leal	1859775393(%ebx,%esi,1),%ebx
+	movl	28(%esp),%esi
+	xorl	%ebp,%edi
+	xorl	36(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	60(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 39 */
+	xorl	16(%esp),%esi
+	addl	%edi,%ebx
+	roll	$1,%esi
+	movl	%ebp,%edi
+	movl	%esi,28(%esp)
+	xorl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	1859775393(%eax,%esi,1),%eax
+	movl	32(%esp),%esi
+	xorl	%edx,%edi
+	xorl	40(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	(%esp),%esi
+	roll	$5,%edi
+	/* 40_59 40 */
+	addl	%edi,%eax
+	movl	%edx,%edi
+	xorl	20(%esp),%esi
+	andl	%ecx,%edi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	movl	%edx,%edi
+	movl	%esi,32(%esp)
+	xorl	%ecx,%edi
+	leal	2400959708(%ebp,%esi,1),%ebp
+	andl	%ebx,%edi
+	rorl	$2,%ebx
+	movl	36(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	44(%esp),%esi
+	roll	$5,%edi
+	xorl	4(%esp),%esi
+	/* 40_59 41 */
+	addl	%edi,%ebp
+	movl	%ecx,%edi
+	xorl	24(%esp),%esi
+	andl	%ebx,%edi
+	roll	$1,%esi
+	addl	%edi,%edx
+	movl	%ecx,%edi
+	movl	%esi,36(%esp)
+	xorl	%ebx,%edi
+	leal	2400959708(%edx,%esi,1),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	movl	40(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	48(%esp),%esi
+	roll	$5,%edi
+	xorl	8(%esp),%esi
+	/* 40_59 42 */
+	addl	%edi,%edx
+	movl	%ebx,%edi
+	xorl	28(%esp),%esi
+	andl	%eax,%edi
+	roll	$1,%esi
+	addl	%edi,%ecx
+	movl	%ebx,%edi
+	movl	%esi,40(%esp)
+	xorl	%eax,%edi
+	leal	2400959708(%ecx,%esi,1),%ecx
+	andl	%ebp,%edi
+	rorl	$2,%ebp
+	movl	44(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	52(%esp),%esi
+	roll	$5,%edi
+	xorl	12(%esp),%esi
+	/* 40_59 43 */
+	addl	%edi,%ecx
+	movl	%eax,%edi
+	xorl	32(%esp),%esi
+	andl	%ebp,%edi
+	roll	$1,%esi
+	addl	%edi,%ebx
+	movl	%eax,%edi
+	movl	%esi,44(%esp)
+	xorl	%ebp,%edi
+	leal	2400959708(%ebx,%esi,1),%ebx
+	andl	%edx,%edi
+	rorl	$2,%edx
+	movl	48(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	56(%esp),%esi
+	roll	$5,%edi
+	xorl	16(%esp),%esi
+	/* 40_59 44 */
+	addl	%edi,%ebx
+	movl	%ebp,%edi
+	xorl	36(%esp),%esi
+	andl	%edx,%edi
+	roll	$1,%esi
+	addl	%edi,%eax
+	movl	%ebp,%edi
+	movl	%esi,48(%esp)
+	xorl	%edx,%edi
+	leal	2400959708(%eax,%esi,1),%eax
+	andl	%ecx,%edi
+	rorl	$2,%ecx
+	movl	52(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	60(%esp),%esi
+	roll	$5,%edi
+	xorl	20(%esp),%esi
+	/* 40_59 45 */
+	addl	%edi,%eax
+	movl	%edx,%edi
+	xorl	40(%esp),%esi
+	andl	%ecx,%edi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	movl	%edx,%edi
+	movl	%esi,52(%esp)
+	xorl	%ecx,%edi
+	leal	2400959708(%ebp,%esi,1),%ebp
+	andl	%ebx,%edi
+	rorl	$2,%ebx
+	movl	56(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	(%esp),%esi
+	roll	$5,%edi
+	xorl	24(%esp),%esi
+	/* 40_59 46 */
+	addl	%edi,%ebp
+	movl	%ecx,%edi
+	xorl	44(%esp),%esi
+	andl	%ebx,%edi
+	roll	$1,%esi
+	addl	%edi,%edx
+	movl	%ecx,%edi
+	movl	%esi,56(%esp)
+	xorl	%ebx,%edi
+	leal	2400959708(%edx,%esi,1),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	movl	60(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	4(%esp),%esi
+	roll	$5,%edi
+	xorl	28(%esp),%esi
+	/* 40_59 47 */
+	addl	%edi,%edx
+	movl	%ebx,%edi
+	xorl	48(%esp),%esi
+	andl	%eax,%edi
+	roll	$1,%esi
+	addl	%edi,%ecx
+	movl	%ebx,%edi
+	movl	%esi,60(%esp)
+	xorl	%eax,%edi
+	leal	2400959708(%ecx,%esi,1),%ecx
+	andl	%ebp,%edi
+	rorl	$2,%ebp
+	movl	(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	8(%esp),%esi
+	roll	$5,%edi
+	xorl	32(%esp),%esi
+	/* 40_59 48 */
+	addl	%edi,%ecx
+	movl	%eax,%edi
+	xorl	52(%esp),%esi
+	andl	%ebp,%edi
+	roll	$1,%esi
+	addl	%edi,%ebx
+	movl	%eax,%edi
+	movl	%esi,(%esp)
+	xorl	%ebp,%edi
+	leal	2400959708(%ebx,%esi,1),%ebx
+	andl	%edx,%edi
+	rorl	$2,%edx
+	movl	4(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	12(%esp),%esi
+	roll	$5,%edi
+	xorl	36(%esp),%esi
+	/* 40_59 49 */
+	addl	%edi,%ebx
+	movl	%ebp,%edi
+	xorl	56(%esp),%esi
+	andl	%edx,%edi
+	roll	$1,%esi
+	addl	%edi,%eax
+	movl	%ebp,%edi
+	movl	%esi,4(%esp)
+	xorl	%edx,%edi
+	leal	2400959708(%eax,%esi,1),%eax
+	andl	%ecx,%edi
+	rorl	$2,%ecx
+	movl	8(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	16(%esp),%esi
+	roll	$5,%edi
+	xorl	40(%esp),%esi
+	/* 40_59 50 */
+	addl	%edi,%eax
+	movl	%edx,%edi
+	xorl	60(%esp),%esi
+	andl	%ecx,%edi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	movl	%edx,%edi
+	movl	%esi,8(%esp)
+	xorl	%ecx,%edi
+	leal	2400959708(%ebp,%esi,1),%ebp
+	andl	%ebx,%edi
+	rorl	$2,%ebx
+	movl	12(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	20(%esp),%esi
+	roll	$5,%edi
+	xorl	44(%esp),%esi
+	/* 40_59 51 */
+	addl	%edi,%ebp
+	movl	%ecx,%edi
+	xorl	(%esp),%esi
+	andl	%ebx,%edi
+	roll	$1,%esi
+	addl	%edi,%edx
+	movl	%ecx,%edi
+	movl	%esi,12(%esp)
+	xorl	%ebx,%edi
+	leal	2400959708(%edx,%esi,1),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	movl	16(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	24(%esp),%esi
+	roll	$5,%edi
+	xorl	48(%esp),%esi
+	/* 40_59 52 */
+	addl	%edi,%edx
+	movl	%ebx,%edi
+	xorl	4(%esp),%esi
+	andl	%eax,%edi
+	roll	$1,%esi
+	addl	%edi,%ecx
+	movl	%ebx,%edi
+	movl	%esi,16(%esp)
+	xorl	%eax,%edi
+	leal	2400959708(%ecx,%esi,1),%ecx
+	andl	%ebp,%edi
+	rorl	$2,%ebp
+	movl	20(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	28(%esp),%esi
+	roll	$5,%edi
+	xorl	52(%esp),%esi
+	/* 40_59 53 */
+	addl	%edi,%ecx
+	movl	%eax,%edi
+	xorl	8(%esp),%esi
+	andl	%ebp,%edi
+	roll	$1,%esi
+	addl	%edi,%ebx
+	movl	%eax,%edi
+	movl	%esi,20(%esp)
+	xorl	%ebp,%edi
+	leal	2400959708(%ebx,%esi,1),%ebx
+	andl	%edx,%edi
+	rorl	$2,%edx
+	movl	24(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	32(%esp),%esi
+	roll	$5,%edi
+	xorl	56(%esp),%esi
+	/* 40_59 54 */
+	addl	%edi,%ebx
+	movl	%ebp,%edi
+	xorl	12(%esp),%esi
+	andl	%edx,%edi
+	roll	$1,%esi
+	addl	%edi,%eax
+	movl	%ebp,%edi
+	movl	%esi,24(%esp)
+	xorl	%edx,%edi
+	leal	2400959708(%eax,%esi,1),%eax
+	andl	%ecx,%edi
+	rorl	$2,%ecx
+	movl	28(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	36(%esp),%esi
+	roll	$5,%edi
+	xorl	60(%esp),%esi
+	/* 40_59 55 */
+	addl	%edi,%eax
+	movl	%edx,%edi
+	xorl	16(%esp),%esi
+	andl	%ecx,%edi
+	roll	$1,%esi
+	addl	%edi,%ebp
+	movl	%edx,%edi
+	movl	%esi,28(%esp)
+	xorl	%ecx,%edi
+	leal	2400959708(%ebp,%esi,1),%ebp
+	andl	%ebx,%edi
+	rorl	$2,%ebx
+	movl	32(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	40(%esp),%esi
+	roll	$5,%edi
+	xorl	(%esp),%esi
+	/* 40_59 56 */
+	addl	%edi,%ebp
+	movl	%ecx,%edi
+	xorl	20(%esp),%esi
+	andl	%ebx,%edi
+	roll	$1,%esi
+	addl	%edi,%edx
+	movl	%ecx,%edi
+	movl	%esi,32(%esp)
+	xorl	%ebx,%edi
+	leal	2400959708(%edx,%esi,1),%edx
+	andl	%eax,%edi
+	rorl	$2,%eax
+	movl	36(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	44(%esp),%esi
+	roll	$5,%edi
+	xorl	4(%esp),%esi
+	/* 40_59 57 */
+	addl	%edi,%edx
+	movl	%ebx,%edi
+	xorl	24(%esp),%esi
+	andl	%eax,%edi
+	roll	$1,%esi
+	addl	%edi,%ecx
+	movl	%ebx,%edi
+	movl	%esi,36(%esp)
+	xorl	%eax,%edi
+	leal	2400959708(%ecx,%esi,1),%ecx
+	andl	%ebp,%edi
+	rorl	$2,%ebp
+	movl	40(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	48(%esp),%esi
+	roll	$5,%edi
+	xorl	8(%esp),%esi
+	/* 40_59 58 */
+	addl	%edi,%ecx
+	movl	%eax,%edi
+	xorl	28(%esp),%esi
+	andl	%ebp,%edi
+	roll	$1,%esi
+	addl	%edi,%ebx
+	movl	%eax,%edi
+	movl	%esi,40(%esp)
+	xorl	%ebp,%edi
+	leal	2400959708(%ebx,%esi,1),%ebx
+	andl	%edx,%edi
+	rorl	$2,%edx
+	movl	44(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	52(%esp),%esi
+	roll	$5,%edi
+	xorl	12(%esp),%esi
+	/* 40_59 59 */
+	addl	%edi,%ebx
+	movl	%ebp,%edi
+	xorl	32(%esp),%esi
+	andl	%edx,%edi
+	roll	$1,%esi
+	addl	%edi,%eax
+	movl	%ebp,%edi
+	movl	%esi,44(%esp)
+	xorl	%edx,%edi
+	leal	2400959708(%eax,%esi,1),%eax
+	andl	%ecx,%edi
+	rorl	$2,%ecx
+	movl	48(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	56(%esp),%esi
+	roll	$5,%edi
+	xorl	16(%esp),%esi
+	/* 20_39 60 */
+	xorl	36(%esp),%esi
+	addl	%edi,%eax
+	roll	$1,%esi
+	movl	%edx,%edi
+	movl	%esi,48(%esp)
+	xorl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	3395469782(%ebp,%esi,1),%ebp
+	movl	52(%esp),%esi
+	xorl	%ecx,%edi
+	xorl	60(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	20(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 61 */
+	xorl	40(%esp),%esi
+	addl	%edi,%ebp
+	roll	$1,%esi
+	movl	%ecx,%edi
+	movl	%esi,52(%esp)
+	xorl	%eax,%edi
+	rorl	$2,%eax
+	leal	3395469782(%edx,%esi,1),%edx
+	movl	56(%esp),%esi
+	xorl	%ebx,%edi
+	xorl	(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	24(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 62 */
+	xorl	44(%esp),%esi
+	addl	%edi,%edx
+	roll	$1,%esi
+	movl	%ebx,%edi
+	movl	%esi,56(%esp)
+	xorl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	3395469782(%ecx,%esi,1),%ecx
+	movl	60(%esp),%esi
+	xorl	%eax,%edi
+	xorl	4(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	28(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 63 */
+	xorl	48(%esp),%esi
+	addl	%edi,%ecx
+	roll	$1,%esi
+	movl	%eax,%edi
+	movl	%esi,60(%esp)
+	xorl	%edx,%edi
+	rorl	$2,%edx
+	leal	3395469782(%ebx,%esi,1),%ebx
+	movl	(%esp),%esi
+	xorl	%ebp,%edi
+	xorl	8(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	32(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 64 */
+	xorl	52(%esp),%esi
+	addl	%edi,%ebx
+	roll	$1,%esi
+	movl	%ebp,%edi
+	movl	%esi,(%esp)
+	xorl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	3395469782(%eax,%esi,1),%eax
+	movl	4(%esp),%esi
+	xorl	%edx,%edi
+	xorl	12(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	36(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 65 */
+	xorl	56(%esp),%esi
+	addl	%edi,%eax
+	roll	$1,%esi
+	movl	%edx,%edi
+	movl	%esi,4(%esp)
+	xorl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	3395469782(%ebp,%esi,1),%ebp
+	movl	8(%esp),%esi
+	xorl	%ecx,%edi
+	xorl	16(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	40(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 66 */
+	xorl	60(%esp),%esi
+	addl	%edi,%ebp
+	roll	$1,%esi
+	movl	%ecx,%edi
+	movl	%esi,8(%esp)
+	xorl	%eax,%edi
+	rorl	$2,%eax
+	leal	3395469782(%edx,%esi,1),%edx
+	movl	12(%esp),%esi
+	xorl	%ebx,%edi
+	xorl	20(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	44(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 67 */
+	xorl	(%esp),%esi
+	addl	%edi,%edx
+	roll	$1,%esi
+	movl	%ebx,%edi
+	movl	%esi,12(%esp)
+	xorl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	3395469782(%ecx,%esi,1),%ecx
+	movl	16(%esp),%esi
+	xorl	%eax,%edi
+	xorl	24(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	48(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 68 */
+	xorl	4(%esp),%esi
+	addl	%edi,%ecx
+	roll	$1,%esi
+	movl	%eax,%edi
+	movl	%esi,16(%esp)
+	xorl	%edx,%edi
+	rorl	$2,%edx
+	leal	3395469782(%ebx,%esi,1),%ebx
+	movl	20(%esp),%esi
+	xorl	%ebp,%edi
+	xorl	28(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	52(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 69 */
+	xorl	8(%esp),%esi
+	addl	%edi,%ebx
+	roll	$1,%esi
+	movl	%ebp,%edi
+	movl	%esi,20(%esp)
+	xorl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	3395469782(%eax,%esi,1),%eax
+	movl	24(%esp),%esi
+	xorl	%edx,%edi
+	xorl	32(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	56(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 70 */
+	xorl	12(%esp),%esi
+	addl	%edi,%eax
+	roll	$1,%esi
+	movl	%edx,%edi
+	movl	%esi,24(%esp)
+	xorl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	3395469782(%ebp,%esi,1),%ebp
+	movl	28(%esp),%esi
+	xorl	%ecx,%edi
+	xorl	36(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	60(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 71 */
+	xorl	16(%esp),%esi
+	addl	%edi,%ebp
+	roll	$1,%esi
+	movl	%ecx,%edi
+	movl	%esi,28(%esp)
+	xorl	%eax,%edi
+	rorl	$2,%eax
+	leal	3395469782(%edx,%esi,1),%edx
+	movl	32(%esp),%esi
+	xorl	%ebx,%edi
+	xorl	40(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 72 */
+	xorl	20(%esp),%esi
+	addl	%edi,%edx
+	roll	$1,%esi
+	movl	%ebx,%edi
+	movl	%esi,32(%esp)
+	xorl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	3395469782(%ecx,%esi,1),%ecx
+	movl	36(%esp),%esi
+	xorl	%eax,%edi
+	xorl	44(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	4(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 73 */
+	xorl	24(%esp),%esi
+	addl	%edi,%ecx
+	roll	$1,%esi
+	movl	%eax,%edi
+	movl	%esi,36(%esp)
+	xorl	%edx,%edi
+	rorl	$2,%edx
+	leal	3395469782(%ebx,%esi,1),%ebx
+	movl	40(%esp),%esi
+	xorl	%ebp,%edi
+	xorl	48(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	8(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 74 */
+	xorl	28(%esp),%esi
+	addl	%edi,%ebx
+	roll	$1,%esi
+	movl	%ebp,%edi
+	movl	%esi,40(%esp)
+	xorl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	3395469782(%eax,%esi,1),%eax
+	movl	44(%esp),%esi
+	xorl	%edx,%edi
+	xorl	52(%esp),%esi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	xorl	12(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 75 */
+	xorl	32(%esp),%esi
+	addl	%edi,%eax
+	roll	$1,%esi
+	movl	%edx,%edi
+	movl	%esi,44(%esp)
+	xorl	%ebx,%edi
+	rorl	$2,%ebx
+	leal	3395469782(%ebp,%esi,1),%ebp
+	movl	48(%esp),%esi
+	xorl	%ecx,%edi
+	xorl	56(%esp),%esi
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	xorl	16(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 76 */
+	xorl	36(%esp),%esi
+	addl	%edi,%ebp
+	roll	$1,%esi
+	movl	%ecx,%edi
+	movl	%esi,48(%esp)
+	xorl	%eax,%edi
+	rorl	$2,%eax
+	leal	3395469782(%edx,%esi,1),%edx
+	movl	52(%esp),%esi
+	xorl	%ebx,%edi
+	xorl	60(%esp),%esi
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	xorl	20(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 77 */
+	xorl	40(%esp),%esi
+	addl	%edi,%edx
+	roll	$1,%esi
+	movl	%ebx,%edi
+	xorl	%ebp,%edi
+	rorl	$2,%ebp
+	leal	3395469782(%ecx,%esi,1),%ecx
+	movl	56(%esp),%esi
+	xorl	%eax,%edi
+	xorl	(%esp),%esi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	xorl	24(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 78 */
+	xorl	44(%esp),%esi
+	addl	%edi,%ecx
+	roll	$1,%esi
+	movl	%eax,%edi
+	xorl	%edx,%edi
+	rorl	$2,%edx
+	leal	3395469782(%ebx,%esi,1),%ebx
+	movl	60(%esp),%esi
+	xorl	%ebp,%edi
+	xorl	4(%esp),%esi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	xorl	28(%esp),%esi
+	roll	$5,%edi
+	/* 20_39 79 */
+	xorl	48(%esp),%esi
+	addl	%edi,%ebx
+	roll	$1,%esi
+	movl	%ebp,%edi
+	xorl	%ecx,%edi
+	rorl	$2,%ecx
+	leal	3395469782(%eax,%esi,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%edi
+	addl	%edi,%eax
+	/* Loop trailer */
+	movl	84(%esp),%edi
+	movl	88(%esp),%esi
+	addl	16(%edi),%ebp
+	addl	12(%edi),%edx
+	addl	%ecx,8(%edi)
+	addl	%ebx,4(%edi)
+	addl	$64,%esi
+	addl	%eax,(%edi)
+	movl	%edx,12(%edi)
+	movl	%ebp,16(%edi)
+	cmpl	92(%esp),%esi
+	jb	.L000loop
+	addl	$64,%esp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.L_sha1_block_data_order_end:
+.size	sha1_block_data_order,.L_sha1_block_data_order_end-sha1_block_data_order
+.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/x86/sha1.c b/x86/sha1.c
new file mode 100644
index 0000000..4c1a569
--- /dev/null
+++ b/x86/sha1.c
@@ -0,0 +1,81 @@
+/*
+ * SHA-1 implementation.
+ *
+ * Copyright (C) 2005 Paul Mackerras <paulus@xxxxxxxxx>
+ *
+ * This version assumes we are running on a big-endian machine.
+ * It calls an external sha1_core() to process blocks of 64 bytes.
+ */
+#include <stdio.h>
+#include <string.h>
+#include <arpa/inet.h>	/* For htonl */
+#include "sha1.h"
+
+#define x86_sha1_core sha1_block_data_order
+extern void x86_sha1_core(uint32_t hash[5], const unsigned char *p,
+			  unsigned int nblocks);
+
+void x86_SHA1_Init(x86_SHA_CTX *c)
+{
+	/* Matches prefix of scontext structure */
+	static struct {
+		uint32_t hash[5];
+		uint64_t len;
+	} const iv = {
+		{ 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 },
+		0
+	};
+
+	memcpy(c, &iv, sizeof iv);
+}
+
+void x86_SHA1_Update(x86_SHA_CTX *c, const void *p, unsigned long n)
+{
+	unsigned pos = (unsigned)c->len & 63;
+	unsigned long nb;
+
+	c->len += n;
+
+	/* Initial partial block */
+	if (pos) {
+		unsigned space = 64 - pos;
+		if (space > n)
+			goto end;
+		memcpy(c->buf + pos, p, space);
+		p += space;
+		n -= space;
+		x86_sha1_core(c->hash, c->buf, 1);
+	}
+
+	/* The big impressive middle */
+	nb = n >> 6;
+	if (nb) {
+		x86_sha1_core(c->hash, p, nb);
+		p += nb << 6;
+		n &= 63;
+	}
+	pos = 0;
+end:
+	/* Final partial block */
+	memcpy(c->buf + pos, p, n);
+}
+
+void x86_SHA1_Final(unsigned char *hash, x86_SHA_CTX *c)
+{
+	unsigned pos = (unsigned)c->len & 63;
+
+	c->buf[pos++] = 0x80;
+	if (pos > 56) {
+		memset(c->buf + pos, 0, 64 - pos);
+		x86_sha1_core(c->hash, c->buf, 1);
+		pos = 0;
+	}
+	memset(c->buf + pos, 0, 56 - pos);
+	/* Last two words are 64-bit *bit* count */
+	*(uint32_t *)(c->buf + 56) = htonl((uint32_t)(c->len >> 29));
+	*(uint32_t *)(c->buf + 60) = htonl((uint32_t)c->len << 3);
+	x86_sha1_core(c->hash, c->buf, 1);
+
+	for (pos = 0; pos < 5; pos++)
+		((uint32_t *)hash)[pos] = htonl(c->hash[pos]);
+}
diff --git a/x86/sha1.h b/x86/sha1.h
new file mode 100644
index 0000000..8988da9
--- /dev/null
+++ b/x86/sha1.h
@@ -0,0 +1,21 @@
+/*
+ * SHA-1 implementation.
+ *
+ * Copyright (C) 2005 Paul Mackerras <paulus@xxxxxxxxx>
+ */
+#include <stdint.h>
+
+typedef struct {
+	uint32_t hash[5];
+	uint64_t len;
+	unsigned char buf[64];	/* Keep this aligned */
+} x86_SHA_CTX;
+
+void x86_SHA1_Init(x86_SHA_CTX *c);
+void x86_SHA1_Update(x86_SHA_CTX *c, const void *p, unsigned long n);
+void x86_SHA1_Final(unsigned char *hash, x86_SHA_CTX *c);
+
+#define git_SHA_CTX	x86_SHA_CTX
+#define git_SHA1_Init	x86_SHA1_Init
+#define git_SHA1_Update	x86_SHA1_Update
+#define git_SHA1_Final	x86_SHA1_Final
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]