For the case of multiple projects sharing a single SVN repository, it is common practice to create the standard SVN directory layout within a subdirectory for each project. In such setups, access control is often used to limit what projects a given user may access. This results in git-svn not being able to minimize the URL of the repository to the repository root URL, which triggered problems since path handling for multi-branch setups was not aware of the path prefix for the project directory relative to the repository root. Fix this deficiency by converting all paths received from get_log to a form relative to the URL that is configured for the SVN remote. Also drop the hardcoded URL minimization flag for the multi-branch case since it is not required anymore and can be requested via a command line parameter if desired. Signed-off-by: Mattias Nissler <mattias.nissler@xxxxxx> --- Hi, I've successfully tested this patch against our SVN repo at work, which is quite large in terms of number of files and disk usage. I hope I didn't break anything, I tried to minimize the chance of doing so by isolating the important changes to the handling of the results of the get_log call. Still, I'd appreciate if someone could review the patch to see whether I might have broken something I didn't think of. Please CC me on replies, I'm not on the list. Enjoy! Mattias git-svn.perl | 83 +++++++++++++++++++++++++-------------------------------- 1 files changed, 36 insertions(+), 47 deletions(-) diff --git a/git-svn.perl b/git-svn.perl index ef1d30d..ed95f05 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -798,10 +798,6 @@ sub cmd_multi_init { usage(1); } - # there are currently some bugs that prevent multi-init/multi-fetch - # setups from working well without this. - $Git::SVN::_minimize_url = 1; - $_prefix = '' unless defined $_prefix; if (defined $url) { $url =~ s#/+$##; @@ -1084,7 +1080,7 @@ sub complete_url_ls_init { "wanted to set to: $gs->{url}\n"; } command_oneline('config', $k, $gs->{url}) unless $orig_url; - my $remote_path = "$ra->{svn_path}/$repo_path"; + my $remote_path = "$gs->{path}/$repo_path"; $remote_path =~ s#/+#/#g; $remote_path =~ s#^/##g; $remote_path .= "/*" if $remote_path !~ /\*/; @@ -2066,16 +2062,6 @@ sub ra { $ra; } -sub rel_path { - my ($self) = @_; - my $repos_root = $self->ra->{repos_root}; - return $self->{path} if ($self->{url} eq $repos_root); - my $url = $self->{url} . - (length $self->{path} ? "/$self->{path}" : $self->{path}); - $url =~ s!^\Q$repos_root\E(?:/+|$)!!g; - $url; -} - # prop_walk(PATH, REV, SUB) # ------------------------- # Recursively traverse PATH at revision REV and invoke SUB for each @@ -2401,10 +2387,7 @@ sub match_paths { if (my $path = $paths->{"/$self->{path}"}) { return ($path->{action} eq 'D') ? 0 : 1; } - my $repos_root = $self->ra->{repos_root}; - my $extended_path = $self->{url} . '/' . $self->{path}; - $extended_path =~ s#^\Q$repos_root\E(/|$)##; - $self->{path_regex} ||= qr/^\/\Q$extended_path\E\//; + $self->{path_regex} ||= qr/^\/\Q$self->{path}\E\//; if (grep /$self->{path_regex}/, keys %$paths) { return 1; } @@ -2427,15 +2410,14 @@ sub find_parent_branch { unless (defined $paths) { my $err_handler = $SVN::Error::handler; $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs; - $self->ra->get_log([$self->{path}], $rev, $rev, 0, 1, 1, sub { - $paths = - Git::SVN::Ra::dup_changed_paths($_[0]) }); + $self->ra->get_log([$self->{path}], $rev, $rev, 0, 1, 1, + sub { $paths = $_[0] }); $SVN::Error::handler = $err_handler; } return undef unless defined $paths; # look for a parent from another branch: - my @b_path_components = split m#/#, $self->rel_path; + my @b_path_components = split m#/#, $self->{path}; my @a_path_components; my $i; while (@b_path_components) { @@ -2453,11 +2435,11 @@ sub find_parent_branch { my $r = $i->{copyfrom_rev}; my $repos_root = $self->ra->{repos_root}; my $url = $self->ra->{url}; - my $new_url = $repos_root . $branch_from; + my $new_url = $url . $branch_from; print STDERR "Found possible branch point: ", "$new_url => ", $self->full_url, ", $r\n"; $branch_from =~ s#^/##; - my $gs = $self->other_gs($new_url, $url, $repos_root, + my $gs = $self->other_gs($new_url, $url, $branch_from, $r, $self->{ref_id}); my ($r0, $parent) = $gs->find_rev_before($r, 1); { @@ -2642,9 +2624,9 @@ sub parse_svn_date { } sub other_gs { - my ($self, $new_url, $url, $repos_root, + my ($self, $new_url, $url, $branch_from, $r, $old_ref_id) = @_; - my $gs = Git::SVN->find_by_url($new_url, $repos_root, $branch_from); + my $gs = Git::SVN->find_by_url($new_url, $url, $branch_from); unless ($gs) { my $ref_id = $old_ref_id; $ref_id =~ s/\@\d+$//; @@ -4280,6 +4262,31 @@ sub get_log { my ($self, @args) = @_; my $pool = SVN::Pool->new; + # svn_log_changed_path_t objects passed to get_log are likely to be + # overwritten even if only the refs are copied to an external variable, + # so we should dup the structures in their entirety. Using an + # externally passed pool (instead of our temporary and quickly cleared + # pool in Git::SVN::Ra) does not help matters at all... + my $receiver = pop @args; + my $prefix = "/".$self->{svn_path}; + $prefix =~ s#/+($)##; + my $prefix_regex = qr#^\Q$prefix\E#; + push(@args, sub { + my ($paths) = $_[0]; + return &$receiver(@_) unless $paths; + $_[0] = (); + foreach my $p (keys %$paths) { + my $i = $paths->{$p}; + # Make path relative to our url, not repos_root + $p =~ s/$prefix_regex//; + my %s = map { $_ => $i->$_; } + qw/copyfrom_path copyfrom_rev action/; + $s{'copyfrom_path'} =~ s/$prefix_regex// if $s{'copyfrom_path'}; + $_[0]{$p} = \%s; + } + &$receiver(@_); + }); + # the limit parameter was not supported in SVN 1.1.x, so we # drop it. Therefore, the receiver callback passed to it # is made aware of this limitation by being wrapped if @@ -4291,6 +4298,7 @@ sub get_log { push(@args, sub { &$receiver(@_) if (--$limit >= 0) }); } } + my $ret = $self->SUPER::get_log(@args, $pool); $pool->clear; $ret; @@ -4448,8 +4456,7 @@ sub gs_fetch_loop_common { }; sub _cb { my ($paths, $r, $author, $date, $log) = @_; - [ dup_changed_paths($paths), - { author => $author, date => $date, log => $log } ]; + [ $paths, { author => $author, date => $date, log => $log } ]; } $self->get_log([$longest_path], $min, $max, 0, 1, 1, sub { $revs{$_[1]} = _cb(@_) }); @@ -4668,24 +4675,6 @@ sub skip_unknown_revs { die "Error from SVN, ($errno): ", $err->expanded_message,"\n"; } -# svn_log_changed_path_t objects passed to get_log are likely to be -# overwritten even if only the refs are copied to an external variable, -# so we should dup the structures in their entirety. Using an externally -# passed pool (instead of our temporary and quickly cleared pool in -# Git::SVN::Ra) does not help matters at all... -sub dup_changed_paths { - my ($paths) = @_; - return undef unless $paths; - my %ret; - foreach my $p (keys %$paths) { - my $i = $paths->{$p}; - my %s = map { $_ => $i->$_ } - qw/copyfrom_path copyfrom_rev action/; - $ret{$p} = \%s; - } - \%ret; -} - package Git::SVN::Log; use strict; use warnings; -- 1.6.2.2
Attachment:
signature.asc
Description: This is a digitally signed message part