Re: Distribution of longest common hash prefixes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



>>>>> "Linus" == Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> writes:

Linus> On Mon, 2 Apr 2007, Peter Eriksen wrote:

Linus> #include <stdio.h>
Linus> #include <string.h>


Linus> int h2d(char c)
Linus> {
Linus> 	if ('a' <= c && c <= 'f')
Linus> 		return c-'a'+10;
Linus> 	else
Linus> 		return c-'0';
Linus> }

Linus> int lcprefix(char *a, char *b)
Linus> {
Linus> 	int bits = 0;
Linus> 	unsigned n1, n2;

Linus> 	while (*a == *b) {
Linus> 		bits += 4;
Linus> 		a++;
Linus> 		b++;
Linus> 	}

Linus> 	n1 = h2d(*a);
Linus> 	n2 = h2d(*b);

Linus> 	/* Would make more sense to start from bit 0.. */
Linus> 	while ((n1 & 8) == (n2 & 8)) {
Linus> 		bits++;
Linus> 		n1 <<= 1;
Linus> 		n2 <<= 1;
Linus> 	}
	
Linus> 	return bits;
Linus> }

Linus> int main(int argc, char **argv)
Linus> {
Linus> 	FILE *fp;
Linus> 	char old[41];
Linus> 	char cur[41];
Linus> 	int lcp = 0;
Linus> 	int table[64];
Linus> 	int i;

Linus> 	memset(table, 0, 64*sizeof(int));
Linus> 	memset(old, '0', 40);
Linus> 	old[40] = '\0';

Linus> 	fp = fopen(argv[1], "r");
Linus> 	fscanf(fp, "%s\n", cur);

Linus> 	if (lcp < lcprefix(old, cur)) {
Linus> 		lcp = lcprefix(old, cur);
Linus> 	}

Linus> 	table[lcp]++;
Linus> 	while (fscanf(fp, "%s\n", cur) != EOF) {
Linus> 		int newlcp = lcprefix(old, cur);
Linus> 		table[newlcp]++;
Linus> 		if (lcp < newlcp) {
Linus> 			printf("%s\n%s\n", old, cur);
Linus> 			lcp = newlcp;
Linus> 			printf("lcprefix = %d\n", newlcp);
Linus> 		}
Linus> 		memcpy(old, cur, 40);
Linus> 	}

Linus> 	for(i = 0; i < 64; i++) {
Linus> 		printf("%2d: %2d\n", i, table[i]);
Linus> 	}
	
Linus> 	return 0;
Linus> }

I don't have access to the linux-2.6 kernel, but on git.git at
d8b6a1a10b93666246984a50d64a163e71163aeb I get this:

    $ git-rev-list --objects HEAD | sort | perl -lne '
      substr($_, 40) = "";
      ($p ^ $_) =~ /^(\0*)/;
      $count[length $1]++;
      $p = $_;
      END { print "$_: $count[$_]" for 0..$#count }
    '
    0: 16
    1: 240
    2: 3839
    3: 24458
    4: 8275
    5: 619
    6: 45
    7: 
    8: 1

Yeay Perl. :)

-- 
Randal L. Schwartz - Stonehenge Consulting Services, Inc. - +1 503 777 0095
<merlyn@xxxxxxxxxxxxxx> <URL:http://www.stonehenge.com/merlyn/>
Perl/Unix/security consulting, Technical writing, Comedy, etc. etc.
See PerlTraining.Stonehenge.com for onsite and open-enrollment Perl training!
-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]