[PATCH 9/9] git-svn: Make fetch ~1.7x faster

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We were spending a lot of time forking/execing git-cat-file and
git-hash-object. We now maintain a global Git repository object in order to use
Git.pm's more efficient hash_and_insert_object and cat_blob methods.

Signed-off-by: Adam Roben <aroben@xxxxxxxxx>
---
Eric Wong wrote:
> > +sub hash_object {
> > +   my (undef, $fh) = @_;
> > +
> > +   my ($tmp_fh, $tmp_filename) = tempfile(UNLINK => 1);
> > +   while (my $line = <$fh>) {
> > +           print $tmp_fh $line;
> > +   }
> > +   close($tmp_fh);
> 
> Related to the above.  It's better to sysread()/syswrite() or
> read()/print() in a loop with a predefined buffer size rather than to
> use a readline() since you could be dealing with files with very long
> lines or binaries with no newline characters in them at all.

Fixed.

> > +   _open_hash_object_if_needed();
> > +   print $_hash_object_out $tmp_filename . "\n";
> 
> Minor, but
> 
>         print $_hash_object_out $tmp_filename, "\n";
> 
> avoids creating a new string.

Fixed.

 git-svn.perl |   40 ++++++++++++++++++----------------------
 1 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/git-svn.perl b/git-svn.perl
index 22bb47b..fcb07f5 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -4,7 +4,7 @@
 use warnings;
 use strict;
 use vars qw/	$AUTHOR $VERSION
-		$sha1 $sha1_short $_revision
+		$sha1 $sha1_short $_revision $_repository
 		$_q $_authors %users/;
 $AUTHOR = 'Eric Wong <normalperson@xxxxxxxx>';
 $VERSION = '@@GIT_VERSION@@';
@@ -225,6 +225,7 @@ unless ($cmd =~ /(?:clone|init|multi-init)$/) {
 		}
 		$ENV{GIT_DIR} = $git_dir;
 	}
+	$_repository = Git->repository(Repository => $ENV{GIT_DIR});
 }
 unless ($cmd =~ /^(?:clone|init|multi-init|commit-diff)$/) {
 	Git::SVN::Migration::migration_check();
@@ -332,6 +333,7 @@ sub cmd_init {
 	                       "as a command-line argument\n";
 	init_subdir(@_);
 	do_git_init_db();
+	$_repository = Git->repository(Repository => $ENV{GIT_DIR});
 
 	Git::SVN->init($url);
 }
@@ -2541,6 +2543,7 @@ use vars qw/@ISA/;
 use strict;
 use warnings;
 use Carp qw/croak/;
+use File::Temp qw/tempfile/;
 use IO::File qw//;
 use Digest::MD5;
 
@@ -2683,14 +2686,8 @@ sub apply_textdelta {
 	my $base = IO::File->new_tmpfile;
 	$base->autoflush(1);
 	if ($fb->{blob}) {
-		defined (my $pid = fork) or croak $!;
-		if (!$pid) {
-			open STDOUT, '>&', $base or croak $!;
-			print STDOUT 'link ' if ($fb->{mode_a} == 120000);
-			exec qw/git-cat-file blob/, $fb->{blob} or croak $!;
-		}
-		waitpid $pid, 0;
-		croak $? if $?;
+		my $contents = $::_repository->cat_blob($fb->{blob});
+		print $base $contents;
 
 		if (defined $exp) {
 			seek $base, 0, 0 or croak $!;
@@ -2729,14 +2726,18 @@ sub close_file {
 			$buf eq 'link ' or die "$path has mode 120000",
 			                       "but is not a link\n";
 		}
-		defined(my $pid = open my $out,'-|') or die "Can't fork: $!\n";
-		if (!$pid) {
-			open STDIN, '<&', $fh or croak $!;
-			exec qw/git-hash-object -w --stdin/ or croak $!;
+
+		my ($tmp_fh, $tmp_filename) = File::Temp::tempfile(UNLINK => 1);
+		my $result;
+		while ($result = sysread($fh, my $string, 1024)) {
+			syswrite($tmp_fh, $string, $result);
 		}
-		chomp($hash = do { local $/; <$out> });
-		close $out or croak $!;
+		defined $result or croak $!;
+		close $tmp_fh or croak $!;
+
 		close $fh or croak $!;
+
+		$hash = $::_repository->hash_and_insert_object($tmp_filename);
 		$hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
 		close $fb->{base} or croak $!;
 	} else {
@@ -3063,13 +3064,8 @@ sub chg_file {
 	} elsif ($m->{mode_a} =~ /^120/ && $m->{mode_b} !~ /^120/) {
 		$self->change_file_prop($fbat,'svn:special',undef);
 	}
-	defined(my $pid = fork) or croak $!;
-	if (!$pid) {
-		open STDOUT, '>&', $fh or croak $!;
-		exec qw/git-cat-file blob/, $m->{sha1_b} or croak $!;
-	}
-	waitpid $pid, 0;
-	croak $? if $?;
+	my $blob = $::_repository->cat_blob($m->{sha1_b});
+	print $fh $blob;
 	$fh->flush == 0 or croak $!;
 	seek $fh, 0, 0 or croak $!;
 
-- 
1.5.3.4.1337.g8e67d-dirty

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux