On Sun, Aug 24, 2008 at 12:18:50PM +0300, Felipe Contreras wrote: > I developed a script that converts a monotone repository into a git > one (exact clone), I want to contribute it so everybody can use it. > > This is the gist of the script: > > mtn update --revision #{@id} --reallyquiet > git ls-files --modified --others --exclude-standard -z | git > update-index --add --remove -z --stdin > git write-tree > git write-raw < /tmp/commit.txt > git update-ref refs/mtn/#{@id} #{@git_id} > > branches.each do |e| > git update-ref refs/heads/#{e} #{@git_id} > end You definitely want to use fast-import, but you probably want to do something a lot closer to fast-export for monotone (read: use its automate stdio interface and avoid expensive calls). Here's a simple monotone to git converter I wrote. You'll need the Monotone::AutomateStdio perl module to use it (which I think I got it from monotone's net.venge.monotone.contrib.lib.automate-stdio branch). It is very fast; it can convert the OpenEmbedded repo in something like 5-10 minutes on my machine. Note that for monotone export to go fast you absolutely /must/ avoid the get_manifest operation. In my converter I use the revision information directly. Getting the renames right with this is a little tricky; IIRC, the ordering that works is: * Rename all renamed files, innermost files first, to temporary names. * Delete all deleted files, innermost first. * Rename all temporary names to permanent names, outermost first. * Add all new/modified files. Conveniently, all of the above can be done by directly emitting fast-import commands, so you don't have to keep track of trees directly. (With one exception, which I'll elaborate on in a different email.) -bcd
#!/usr/bin/perl # Copyright (C) 2007-2008 Brian Downing # This program is licensed under version 2 of the GNU GPL. use strict; use Monotone::AutomateStdio; use Date::Parse; my $D = 0; my $m = Monotone::AutomateStdio->new($ARGV[0]); my $revlist = []; $m->graph($revlist); my $sorted = []; for my $rev (@$revlist) { push(@$sorted, $rev->{revision_id}); } my $leaves = []; $m->leaves($leaves); $m->toposort($sorted, @$sorted); my $marks = {}; my $mark = 1; my $c = 0; sub quote_file { $_ = shift; return $_; s/\\/\\\\/g; s/\n/\\n/g; s/"/\\"/g; return qq("$_"); } sub lprint { my $fh = shift; print @_ if $D; print $fh @_; } sub lprintf { my $fh = shift; printf @_ if $D; printf $fh @_; } my $tmptag = "624d893e-ae1a-42d8-90a9-926a6ceffae8"; open my $fi, '|-', 'git-fast-import --export-marks=file'; for my $rev (@$sorted) { my ($time, $author, $msg) = ("0", "__UNKNOWN__", "__UNKNOWN__"); my @certs; my @branches; $m->certs(\@certs, $rev); for my $cert (@certs) { my ($n, $v) = ($cert->{name}, $cert->{value}); $author = $v if ($n eq 'author'); $time = $v if ($n eq 'date'); $msg = $v if ($n eq 'changelog'); push(@branches, $v) if ($n eq 'branch'); } my $email = $author; $msg .= "\nmtn-revision: $rev\n"; for my $b (sort @branches) { $msg .= "mtn-branch: $b\n"; } $time = str2time($time, 'UTC'); my $mfest = []; $m->get_revision($mfest, $rev); my $orcount = 0; my $add_files = {}; my $add_dirs = {}; my $delete_files = {}; my $from_tmpnames = {}; my $to_tmpnames = {}; my $curtmp = 0; my @parents; for my $e (@$mfest) { if ($e->{type} eq 'old_revision') { push(@parents, $e->{revision_id}); ++$orcount; } next if $orcount > 1; if ($e->{type} eq 'add_file' || $e->{type} eq 'patch') { my $id = $e->{file_id} || $e->{to_file_id}; $add_files->{$e->{name}} = $id; unless ($marks->{$id}) { my $data; $m->get_file(\$data, $id); print "new file $id\n" if $D; print $fi "blob\n"; my $len = length($data); print $fi "mark :$mark\n"; $marks->{$id} = $mark++; print $fi "data $len\n$data\n"; } } elsif ($e->{type} eq 'add_dir') { $add_dirs->{$e->{name}} = 1; } elsif ($e->{type} eq 'delete') { $delete_files->{$e->{name}} = 1; } elsif ($e->{type} eq 'rename') { $curtmp++; $from_tmpnames->{$e->{from_name}} = "__tmp_${tmptag}_$curtmp"; $to_tmpnames->{$e->{to_name}} = "__tmp_${tmptag}_$curtmp"; } } printf("rev $rev (%d/%d, %.2f%)\n", ++$c, scalar(@$sorted), 100*$c/scalar(@$sorted)); print $fi "reset refs/import\n" unless @parents; lprint $fi, "commit refs/import\n"; print $fi "mark :$mark\n"; $marks->{$rev} = $mark++; if ($author =~ m(\s*(.*?\S)\s*<(.*)>\s*)) { $author = $1; $email = $2; } $author =~ s/[<>]/_/g; $email =~ s/[<>]/_/g; $author =~ s/@.*//; print $fi "committer $author <$email> $time +0000\n"; my $len = length($msg); print $fi "data $len\n$msg\n"; my $from = "from"; for my $p (@parents) { lprint $fi, "$from :$marks->{$p}\n"; $from = "merge"; } for my $f (sort { length($b) <=> length ($a) } keys %$from_tmpnames) { lprintf($fi, "R %s %s\n", quote_file($f), quote_file($from_tmpnames->{$f})); } for my $f (sort { length($b) <=> length ($a) } keys %$delete_files) { lprintf($fi, "D %s\n", quote_file($f)); } for my $f (sort { length($a) <=> length ($b) } keys %$to_tmpnames) { lprintf($fi, "R %s %s\n", quote_file($to_tmpnames->{$f}), quote_file($f)); } for my $f (keys %$add_files) { lprintf($fi, "M 0644 :%s %s\n", $marks->{$add_files->{$f}}, quote_file($f)); } for my $f (keys %$add_dirs) { $f .= "/" if $f; lprintf($fi, "M 0644 inline %s\n", quote_file("$f.gitignore")); lprint($fi, "data 0\n\n"); } print $fi "\n"; } my $branches = {}; for my $rev (@$leaves) { my $branch; my @certs; $m->certs(\@certs, $rev); for my $cert (@certs) { my ($n, $v) = ($cert->{name}, $cert->{value}); $branch = $v if ($n eq 'branch'); } my $r = $branches->{$branch}; $branches->{$branch}--; if ($marks->{$rev}) { print $fi "reset refs/heads/$branch$r\n"; print $fi "from :$marks->{$rev}\n\n"; } } close $fi;