I've noticed before that coreutils hashing utils were a little behind in performance, but was prompted to look at it again when I noticed the recently updated sha1 implementation in git: http://git.kernel.org/?p=git/git.git;a=history;f=block-sha1;h=d3121f7;hb=pu Testing that with the attached program which I wrote in a couple of mins to try and match sha1sum's system calls shows that it's around 33% faster, as shown below: $ gcc $(rpm -q --qf="%{OPTFLAGS}\n" coreutils) linus-sha1.c sha1.c -o linus-sha1 $ time ./linus-sha1 300MB_file df1e19e245fee4f53087b50ef953ca2c8d1644d7 300MB_file real 0m2.742s user 0m2.516s sys 0m0.206s $ time ~/git/coreutils/src/sha1sum 300MB_file df1e19e245fee4f53087b50ef953ca2c8d1644d7 300MB_file real 0m4.166s user 0m3.846s sys 0m0.298s So, could we use that code in coreutils? Think of all the dead fish it would save. I've also attached a trivial block-sha1 patch which doesn't affect performance, but does suppress a signed unsigned comparison warning which occurs with -Wextra for example. cheers, Pádraig.
/* gcc -O2 -Wall linus-sha1.c sha1.c -o linus-sha1 */ #include <stdio.h> #include <stdlib.h> #include "sha1.h" int main(int argc, char** argv) { if (argc != 2) return 1; const char* filename = argv[1]; FILE *fp = fopen (filename, "r"); if (!fp) return 1; #define BS 4096 /* match coreutils */ blk_SHA_CTX ctx; blk_SHA1_Init(&ctx); size_t nr; char buf[BS]; while ((nr=fread_unlocked(buf, 1, sizeof(buf), fp))) blk_SHA1_Update(&ctx, buf, nr); unsigned char hash[20]; blk_SHA1_Final(hash, &ctx); int i; for (i=0; i<sizeof(hash); i++) printf("%02x",*(hash+i)); printf(" %s\n", filename); return 0; }
>From fa75e818836f763357ff9b7bbde3327e1aabbe47 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?P=C3=A1draig=20Brady?= <P@xxxxxxxxxxxxxx> Date: Sat, 15 Aug 2009 00:17:30 +0100 Subject: [PATCH] block-sha1: suppress signed unsigned comparison warning * block-sha1/sha1.c: Use unsigned ints as the values will never go negative. --- block-sha1/sha1.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block-sha1/sha1.c b/block-sha1/sha1.c index d3121f7..be763d8 100644 --- a/block-sha1/sha1.c +++ b/block-sha1/sha1.c @@ -231,13 +231,13 @@ void blk_SHA1_Init(blk_SHA_CTX *ctx) void blk_SHA1_Update(blk_SHA_CTX *ctx, const void *data, unsigned long len) { - int lenW = ctx->size & 63; + unsigned int lenW = ctx->size & 63; ctx->size += len; /* Read the data into W and process blocks as they get full */ if (lenW) { - int left = 64 - lenW; + unsigned int left = 64 - lenW; if (len < left) left = len; memcpy(lenW + (char *)ctx->W, data, left); -- 1.6.2.5