NOTE: This is an RFC patch because I'm pretty sure I should be using Git::SVN::Ra::can_do_switch() somewhere, and because I am very likely abusing git-svn internal methods in a way that will produce incorrect results in some cases. Dave -- When an svn repository has multiple related projects checked in as individual directories under trunk: trunk/project1/ trunk/project2/ trunk/project3/ and each project subdirectory is tagged instead of tagging trunk: [...] tags/project1-204 tags/project1-205 [...] tags/project2-395 tags/project2-396 [...] tags/project3-77 tags/project3-78 [...] then git-svn currently imports the entire history of each new tag beginning with r1. This happens because git-svn uses the name of the branch or tag when attempting to fast-forward svn history. For large svn repositories, the time required to import each additional tag grows exponentially. A better approach is to search through all known refs for a ref that has the same repository URL, but with a smaller max revision. This ref could then be used to seed a new ref for the tag being imported, thus bypassing the majority of the work. This approach is implemented by changing find_by_url() to take an additional parameter ($rev) that tells it to return a ref that represents the closest match to the desired repo url while having a revision less than or equal to $rev. When a brand new ref is created in other_gs(), the new find_by_url() behavior is used to find the closest matching ref and use it as a seed. --- git-svn.perl | 45 +++++++++++++++++++++--- t/t9157-git-svn-subdir-import-perf.sh | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 6 deletions(-) create mode 100755 t/t9157-git-svn-subdir-import-perf.sh diff --git a/git-svn.perl b/git-svn.perl index 9b046b6..af46f5f 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -1967,8 +1967,12 @@ sub init_remote_config { $self->{url} = $url; } -sub find_by_url { # repos_root and, path are optional - my ($class, $full_url, $repos_root, $path) = @_; +# Finds an exact match for a ref based on $full_url, $repos_root and +# $path. If no exact match is found and if $rev is specified, the +# closest match with the same url and a revision <= $rev is returned. +# Note that $repos_root, $path and $rev are optional. +sub find_by_url { + my ($class, $full_url, $repos_root, $path, $rev) = @_; return undef unless defined $full_url; remove_username($full_url); @@ -1978,6 +1982,7 @@ sub find_by_url { # repos_root and, path are optional $path = $full_url; $path =~ s#^\Q$repos_root\E(?:/|$)##; } + my ($closest_gs, $closest_max_rev); foreach my $repo_id (keys %$remotes) { my $u = $remotes->{$repo_id}->{url} or next; remove_username($u); @@ -2009,11 +2014,22 @@ sub find_by_url { # repos_root and, path are optional $p =~ s#^\Q$z\E(?:/|$)#$prefix# or next; } foreach my $f (keys %$fetch) { - next if $f ne $p; - return Git::SVN->new($fetch->{$f}, $repo_id, $f); + unless ($rev) { + next if $f ne $p; + return Git::SVN->new($fetch->{$f}, $repo_id, $f); + } + my $gs = Git::SVN->new($fetch->{$f}, $repo_id, $f); + my ($max_rev, $max_commit) = $gs->rev_map_max(1); + next if !$max_rev || !$max_commit; + my ($url) = ::cmt_metadata($max_commit); + next if $url ne $full_url || $max_rev > $rev; + if (!$closest_gs || $closest_max_rev < $max_rev) { + $closest_gs = $gs; + $closest_max_rev = $max_rev; + } } } - undef; + $closest_gs && $rev ? $closest_gs : undef; } sub init { @@ -2969,18 +2985,35 @@ sub other_gs { $u = $url; $repo_id = $self->{repo_id}; } + my $max_commit; while (1) { # It is possible to tag two different subdirectories at # the same revision. If the url for an existing ref # does not match, we must either find a ref with a # matching url or create a new ref by growing a tail. $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1); - my (undef, $max_commit) = $gs->rev_map_max(1); + (undef, $max_commit) = $gs->rev_map_max(1); last if (!$max_commit); my ($url) = ::cmt_metadata($max_commit); last if ($url eq $gs->full_url); $ref_id .= '-'; } + unless ($max_commit) { + # If a brand new ref was created, try to find a matching + # ref with the same url and a smaller revision to use as + # as a seed. This avoids reloading the entire history + # of the repository when the same subdirectory is tagged + # frequently. + my $parent_gs = Git::SVN->find_by_url($new_url, $url, + $branch_from, $r); + if ($parent_gs) { + my ($parent_rev, $parent_commit) = + $parent_gs->rev_map_max(1); + $gs->rev_map_set($parent_rev, $parent_commit); + print STDERR "Using " . $parent_gs->{path} . + " as seed: $ref_id\n" unless $::_q > 1; + } + } print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1; } $gs diff --git a/t/t9157-git-svn-subdir-import-perf.sh b/t/t9157-git-svn-subdir-import-perf.sh new file mode 100755 index 0000000..d28d0e0 --- /dev/null +++ b/t/t9157-git-svn-subdir-import-perf.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +test_description='git svn import subdirectory performance' + +. ./lib-git-svn.sh + +test_expect_success 'setup svn repo' ' + mkdir -p import/trunk/subdir && + mkdir -p import/branches && + mkdir -p import/tags && + echo "base" >import/trunk/subdir/file && + svn_cmd import -m "import for git svn" import "$svnrepo" && + rm -rf import && + + svn_cmd co "$svnrepo/trunk" svn_project && + j=4 && + (cd svn_project && + i=1 && + while [ $i -le $j ]; do + echo "$i" >>subdir/file && + svn_cmd ci -m "trunk change $i" subdir/file && + i=$(($i+1)) + done + ) && + + svn_cmd cp -m "create tag mytag1" "$svnrepo/trunk/subdir" "$svnrepo/tags/mytag1" && + + (cd svn_project && + i=$(($j+1)) && + echo "$i" >>subdir/file && + svn_cmd ci -m "trunk change $i" subdir/file + ) && + + svn_cmd cp -m "create tag mytag2" "$svnrepo/trunk/subdir" "$svnrepo/tags/mytag2" + + (cd svn_project && + i=$(($j+2)) && + echo "$i" >>subdir/file && + svn_cmd ci -m "trunk change $i" subdir/file + ) && + + svn_cmd cp -m "create tag mytag3" "$svnrepo/trunk/subdir" "$svnrepo/tags/mytag3" +' + +test_expect_success 'import subdirectory performance' ' + git svn init --stdlayout "$svnrepo" git_project && + cd git_project && + git svn fetch | tee fetch.txt && + + grep "refs/remotes/tags/mytag2@7" fetch.txt >actual.txt && + grep "^r7" actual.txt >expected.txt && + diff -u expected.txt actual.txt && + + git diff --exit-code tags/mytag1..tags/mytag2^^ && + git diff --exit-code tags/mytag1..tags/mytag3^^^ && + git diff --exit-code tags/mytag2..tags/mytag3^^ +' + +test_done -- 1.7.2.1.158.gbd3a97 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html