On Sat, May 10, 2008 at 01:31:37PM +1000, James Sadler wrote: > Does anybody have a script that can take an existing repo, and create > a new one with garbled-but-equivalent commits? i.e. file and > directory structure is same with names changed, and there is a one-one > relationship between lines of text in new repo and old one except the > lines have been scrambled? It would be a useful tool for distributing > private repositories for debugging reasons. This is only lightly tested, but the script below should do the trick. It works as an index filter which munges all content in such a way that a particular line is always given the same replacement text. That means that diffs will look approximately the same, but will add and remove lines that say "Fake line XXX" instead of the actual content. You can munge the commit messages themselves by just replacing them with some unique text; in the example below, we just replace them with the md5sum of the content. This will leave the original author, committer, and date, which is presumably non-proprietary. -- >8 -- #!/usr/bin/perl # # Obscure a repository while still maintaining the same history # structure and diffs. # # Invoke as: # git filter-branch \ # --msg-filter md5sum \ # --index-filter /path/to/this/script use strict; use IPC::Open2; use DB_File; use Fcntl; tie my %blob_cache, 'DB_File', 'blob-cache', O_RDWR|O_CREAT, 0666; tie my %line_cache, 'DB_File', 'line-cache', O_RDWR|O_CREAT, 0666; open(my $lsfiles, '-|', qw(git ls-files --stage)) or die "unable to open ls-files: $!"; open(my $update, '|-', qw(git update-index --index-info)) or die "unable to open upate-inex: $!"; while(<$lsfiles>) { my ($mode, $hash, $path) = /^(\d+) ([0-9a-f]{40}) \d\t(.*)/ or die "bad ls-files line: $_"; $blob_cache{$hash} = munge($hash) unless exists $blob_cache{$hash}; print $update "$mode $blob_cache{$hash}\t$path\n"; } close($lsfiles); close($update); exit $?; sub munge { my $h = shift; open(my $in, '-|', qw(git show), $h) or die "unable to open git show: $!"; open2(my $hash, my $out, qw(git hash-object -w --stdin)); while(<$in>) { $line_cache{$_} ||= 'Fake line ' . $line_cache{CURRENT}++ . "\n"; print $out $line_cache{$_}; } close($in); close($out); my $r = <$hash>; chomp $r; return $r; } -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html