As long as I was hacking on PowerPC asm, I figured I might as well take a crack at the openssl dependency, too. This is a draft x86 SHA1 that is a little over 2x faster than the C version on a Pentium M. I haven't yet started competing with the OpenSSL code. This might be useful for the folks who are careful about licensing and don't like to get mixed up in the OpenSSL/GPL license tangle. Work in progress, but it functions. Public domain. --- /dev/null 2006-04-13 05:29:14.000000000 -0400 +++ sha1x86.S 2006-06-23 09:14:21.000000000 -0400 @@ -0,0 +1,233 @@ + .text +#define K1 0x5a827999 +#define K2 0x6ed9eba1 +#define K3 0x8f1bbcdc +#define K4 0xca62c1d6 + +#define A %edi +#define B %ebx +#define C %ecx +#define D %edx +#define E %ebp + +#define T %eax + +#define MIX(base) \ + movl 60-base(%esp),T; \ + xorl 52-base(%esp),T; \ + xorl 28-base(%esp),T; \ + xorl 8-base(%esp),T; \ + roll $1,T + +/* + *In these choice functions, C is the value most recently modified + * (It was the B that was rotated in the previous round), so schedule its + * use as late as possible. + */ + +/* Choice function: bitwise b ? c : d = ((d ^ c) & b) ^ d */ +#define F1(b,c,d,e) \ + movl d,T; \ + xorl c,T; \ + andl b,T; \ + roll $30,b; \ + xorl d,T; \ + addl T,e + +/* Parity function: b ^ c ^ d = (b ^ d) ^ c */ +#define F2(b,c,d,e) \ + movl b,T; \ + roll $30,b; \ + xorl d,T; \ + xorl c,T; \ + addl T,e + +/* Majority function: (b&c) | (c&d) | (d&b) = (b&d) + ((b^d)&c) */ +#define F3(b,c,d,e) \ + movl b,T; \ + andl d,T; \ + addl T,e; \ + movl b,T; \ + roll $30,b; \ + xorl d,T; \ + andl c,T; \ + addl T,e + +/* + * Register assignments: + * %eax - temp + * %esi - Pointer to input data + * %edi, %ebx, %ecx, %edx, %ebp - A..E + */ + + /* + * The basic round: + * e += ROTL(e,5) + F(b,c,d) + W[i] + 0x5a827999 + */ + +/* This version fetches (and swaps) data from %esi */ +#define ROUND_LOAD(F,a,b,c,d,e,K) \ + lodsl; \ + addl $K,e; \ + bswap T; \ + addl T,e; \ + pushl T; \ + F(b,c,d,e); \ + movl a,T; \ + roll $5,T; \ + addl T,e + +/* The standard round: compute the new W value and push it on the stack */ +#define ROUND_MIX(F,a,b,c,d,e,K) \ + MIX(0); \ + addl $K,e; \ + addl T,e; \ + pushl T; \ + F(b,c,d,e); \ + movl a,T; \ + roll $5,T; \ + addl T,e + +/* Mix the W[] value, but do NOT push it, as it's never used */ +#define ROUND_LAST(F,a,b,c,d,e,K,base) \ + MIX(base); \ + addl $K,e; \ + addl T,e; \ + F(b,c,d,e); \ + movl a,T; \ + roll $5,T; \ + addl T,e + +/* Args are context (A..E, then a W[] array), then input data */ +.globl shaHashBlock + .type shaHashBlock, @function +shaHashBlock: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi +/* Args now start at 20(%esp) */ + + movl 20(%esp),T + movl 24(%esp),%esi + + movl (T),A + movl 4(T),B + movl 8(T),C + movl 12(T),D + movl 16(T),E + + ROUND_LOAD(F1, A,B,C,D,E, K1); + ROUND_LOAD(F1, E,A,B,C,D, K1); + ROUND_LOAD(F1, D,E,A,B,C, K1); + ROUND_LOAD(F1, C,D,E,A,B, K1); + ROUND_LOAD(F1, B,C,D,E,A, K1); + + ROUND_LOAD(F1, A,B,C,D,E, K1); + ROUND_LOAD(F1, E,A,B,C,D, K1); + ROUND_LOAD(F1, D,E,A,B,C, K1); + ROUND_LOAD(F1, C,D,E,A,B, K1); + ROUND_LOAD(F1, B,C,D,E,A, K1); + + ROUND_LOAD(F1, A,B,C,D,E, K1); + ROUND_LOAD(F1, E,A,B,C,D, K1); + ROUND_LOAD(F1, D,E,A,B,C, K1); + ROUND_LOAD(F1, C,D,E,A,B, K1); + ROUND_LOAD(F1, B,C,D,E,A, K1); + + ROUND_LOAD(F1, A,B,C,D,E, K1); + ROUND_MIX(F1, E,A,B,C,D, K1); + ROUND_MIX(F1, D,E,A,B,C, K1); + ROUND_MIX(F1, C,D,E,A,B, K1); + ROUND_MIX(F1, B,C,D,E,A, K1); + + ROUND_MIX(F2, A,B,C,D,E, K2); + ROUND_MIX(F2, E,A,B,C,D, K2); + ROUND_MIX(F2, D,E,A,B,C, K2); + ROUND_MIX(F2, C,D,E,A,B, K2); + ROUND_MIX(F2, B,C,D,E,A, K2); + + ROUND_MIX(F2, A,B,C,D,E, K2); + ROUND_MIX(F2, E,A,B,C,D, K2); + ROUND_MIX(F2, D,E,A,B,C, K2); + ROUND_MIX(F2, C,D,E,A,B, K2); + ROUND_MIX(F2, B,C,D,E,A, K2); + + ROUND_MIX(F2, A,B,C,D,E, K2); + ROUND_MIX(F2, E,A,B,C,D, K2); + ROUND_MIX(F2, D,E,A,B,C, K2); + ROUND_MIX(F2, C,D,E,A,B, K2); + ROUND_MIX(F2, B,C,D,E,A, K2); + + ROUND_MIX(F2, A,B,C,D,E, K2); + ROUND_MIX(F2, E,A,B,C,D, K2); + ROUND_MIX(F2, D,E,A,B,C, K2); + ROUND_MIX(F2, C,D,E,A,B, K2); + ROUND_MIX(F2, B,C,D,E,A, K2); + + ROUND_MIX(F3, A,B,C,D,E, K3); + ROUND_MIX(F3, E,A,B,C,D, K3); + ROUND_MIX(F3, D,E,A,B,C, K3); + ROUND_MIX(F3, C,D,E,A,B, K3); + ROUND_MIX(F3, B,C,D,E,A, K3); + + ROUND_MIX(F3, A,B,C,D,E, K3); + ROUND_MIX(F3, E,A,B,C,D, K3); + ROUND_MIX(F3, D,E,A,B,C, K3); + ROUND_MIX(F3, C,D,E,A,B, K3); + ROUND_MIX(F3, B,C,D,E,A, K3); + + ROUND_MIX(F3, A,B,C,D,E, K3); + ROUND_MIX(F3, E,A,B,C,D, K3); + ROUND_MIX(F3, D,E,A,B,C, K3); + ROUND_MIX(F3, C,D,E,A,B, K3); + ROUND_MIX(F3, B,C,D,E,A, K3); + + ROUND_MIX(F3, A,B,C,D,E, K3); + ROUND_MIX(F3, E,A,B,C,D, K3); + ROUND_MIX(F3, D,E,A,B,C, K3); + ROUND_MIX(F3, C,D,E,A,B, K3); + ROUND_MIX(F3, B,C,D,E,A, K3); + + ROUND_MIX(F2, A,B,C,D,E, K4); + ROUND_MIX(F2, E,A,B,C,D, K4); + ROUND_MIX(F2, D,E,A,B,C, K4); + ROUND_MIX(F2, C,D,E,A,B, K4); + ROUND_MIX(F2, B,C,D,E,A, K4); + + ROUND_MIX(F2, A,B,C,D,E, K4); + ROUND_MIX(F2, E,A,B,C,D, K4); + ROUND_MIX(F2, D,E,A,B,C, K4); + ROUND_MIX(F2, C,D,E,A,B, K4); + ROUND_MIX(F2, B,C,D,E,A, K4); + + ROUND_MIX(F2, A,B,C,D,E, K4); + ROUND_MIX(F2, E,A,B,C,D, K4); + ROUND_MIX(F2, D,E,A,B,C, K4); + ROUND_MIX(F2, C,D,E,A,B, K4); + ROUND_MIX(F2, B,C,D,E,A, K4); + + ROUND_MIX(F2, A,B,C,D,E, K4); + ROUND_MIX(F2, E,A,B,C,D, K4); + ROUND_LAST(F2, D,E,A,B,C, K4, 0); + ROUND_LAST(F2, C,D,E,A,B, K4, 4); + ROUND_LAST(F2, B,C,D,E,A, K4, 8); + + addl $77*4,%esp + + movl 20(%esp),T + + addl A, (T) + addl B, 4(T) + addl C, 8(T) + addl D,12(T) + addl E,16(T) + + popl %edi + popl %esi + popl %ebx + popl %ebp + + ret + + .size shaHashBlock, .-shaHashBlock --- /dev/null 2006-04-13 05:29:14.000000000 -0400 +++ sha1asm.h 2006-06-23 10:24:35.578683250 -0400 @@ -0,0 +1,12 @@ +#include <stdint.h> +#include <stddef.h> /* For size_t */ + +typedef struct sha_ctx { + uint32_t hash[5]; + uint32_t data[16]; + uint32_t sizeL, sizeH; +} SHA_CTX; + +void SHA1_Init(SHA_CTX *ctx); +void SHA1_Update(SHA_CTX *ctx, const void *dataIn, size_t len); +void SHA1_Final(unsigned char hashout[20], SHA_CTX *ctx); --- /dev/null 2006-04-13 05:29:14.000000000 -0400 +++ sha1asm.c 2006-06-23 10:25:26.798757250 -0400 @@ -0,0 +1,163 @@ + +#include <string.h> /* For memcpy */ +#include <arpa/inet.h> /* For htonl */ + +#include "sha1asm.h" + +#if ASM +extern void shaHashBlock(SHA_CTX *ctx, uint32_t const *input); +#else + +/* + * This chunk of this file are subject to the Mozilla Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Original Code is SHA 180-1 Reference Implementation (Compact version) + * + * The Initial Developer of the Original Code is Paul Kocher of + * Cryptography Research. Portions created by Paul Kocher are + * Copyright (C) 1995-9 by Cryptography Research, Inc. All + * Rights Reserved. + * + * Contributor(s): + * + * Paul Kocher + * + * Alternatively, this portion of this file may be used under the + * terms of the GNU General Public License Version 2 or later (the + * "GPL"), in which case the provisions of the GPL are applicable + * instead of those above. If you wish to allow use of your + * version of this file only under the terms of the GPL and not to + * allow others to use your version of this file under the MPL, + * indicate your decision by deleting the provisions above and + * replace them with the notice and other provisions required by + * the GPL. If you do not delete the provisions above, a recipient + * may use your version of this file under either the MPL or the + * GPL. + */ + +#define ROTL(X,n) (((X) << (n)) | ((X) >> (32-(n)))) + +static void +shaHashBlock(SHA_CTX *ctx, uint32_t const *input) +{ + int i; + uint32_t A,B,C,D,E,T; + uint32_t W[80]; + + for (i = 0; i < 16; i++) + W[i] = ntohl(input[i]); + for (i = 16; i < 80; i++) { + T = W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]; + W[i] = ROTL(T, 1); + } + + A = ctx->hash[0]; + B = ctx->hash[1]; + C = ctx->hash[2]; + D = ctx->hash[3]; + E = ctx->hash[4]; + + for (i = 0; i < 20; i++) { + T = ROTL(A,5) + (((C^D)&B)^D) + E + W[i] + 0x5a827999; + E = D; D = C; C = ROTL(B, 30); B = A; A = T; + } + for (i = 20; i < 40; i++) { + T = ROTL(A,5) + (B^C^D) + E + W[i] + 0x6ed9eba1; + E = D; D = C; C = ROTL(B, 30); B = A; A = T; + } + for (i = 40; i < 60; i++) { + T = ROTL(A,5) + (B&C) + (D&(B^C)) + E + W[i] + 0x8f1bbcdc; + E = D; D = C; C = ROTL(B, 30); B = A; A = T; + } + for (i = 60; i < 80; i++) { + T = ROTL(A,5) + (B^C^D) + E + W[i] + 0xca62c1d6; + E = D; D = C; C = ROTL(B, 30); B = A; A = T; + } + + ctx->hash[0] += A; + ctx->hash[1] += B; + ctx->hash[2] += C; + ctx->hash[3] += D; + ctx->hash[4] += E; +} +#endif + +/* + * The following part of the file is NOT subject to the above license, and is + * instead placed in the public domain. + */ + +void +SHA1_Init(SHA_CTX *ctx) +{ + /* Initialize H with the magic constants (see FIPS180 for constants) + */ + ctx->hash[0] = 0x67452301; + ctx->hash[1] = 0xefcdab89; + ctx->hash[2] = 0x98badcfe; + ctx->hash[3] = 0x10325476; + ctx->hash[4] = 0xc3d2e1f0; + + ctx->sizeH = ctx->sizeL = 0; +} + +void +SHA1_Update(SHA_CTX *ctx, const void *data, size_t len) +{ + unsigned pos = ctx->sizeL % 64; + + ctx->sizeL += len; + ctx->sizeH += (ctx->sizeL < (uint32_t)len); + ctx->sizeH += len >> 16 >> 16; /* In case size_t is 64 bits */ + + /* Leading partial block */ + if (pos) { + unsigned avail = 64 - pos; + if (avail > len) + goto end; + memcpy((char *)ctx->data + pos, data, avail); + data = (char const *)data + avail; + len -= avail; + shaHashBlock(ctx, ctx->data); + } + /* Full blocks */ + while (len >= 64) { + shaHashBlock(ctx, data); + data = (char const *)data + 64; + len -= 64; + } + pos = 0; +end: + /* Buffer trailing partial block */ + memcpy((char *)ctx->data + pos, data, len); +} + +void +SHA1_Final(unsigned char hashout[20], SHA_CTX *ctx) +{ + static unsigned char const padding[64] = { 0x80, 0 /* more zeros */ }; + uint32_t sizeL = ctx->sizeL; + uint32_t sizeH = ctx->sizeH; + int i; + + /* Append final padding, leaving 8 bytes free */ + SHA1_Update(ctx, padding, 64 - ((sizeL + 8) % 64)); + ctx->data[14] = htonl(sizeH << 3 | sizeL >> 29); + ctx->data[15] = htonl(sizeL << 3); + + shaHashBlock(ctx, ctx->data); + + for (i = 0; i < 5; i++) + ((uint32_t *)hashout)[i] = htonl(ctx->hash[i]); + + memset(ctx, 0, sizeof *ctx); +} + - : send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html