[PATCH 2/3] Make filter-branch --glob-filter much faster by not calling 'cat'

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The main loop of munge_blobs() had to fork-exec "cat" every time through the
loop, even when a blob was already cached.  Let's use the sh builtin 'read'
instead for a huge speedup.

cd git
time git filter-branch --blob-filter 'tr a-z A-Z' HEAD~10..HEAD

(original --blob-filter)
real    3m58.569s
user    0m22.900s
sys     3m32.030s

(with 'cat' calls removed)
real	1m11.931s
user	0m8.520s
sys	1m2.900s

(with 'cat' calls removed and blob cache already filled)
real	0m19.660s
user	0m3.930s
sys	0m15.720s

Signed-off-by: Avery Pennarun <apenwarr@xxxxxxxxx>
---
 git-filter-branch.sh |   18 +++++++++++-------
 1 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/git-filter-branch.sh b/git-filter-branch.sh
index 980c431..37ac99d 100755
--- a/git-filter-branch.sh
+++ b/git-filter-branch.sh
@@ -57,16 +57,18 @@ eval "$functions"
 munge_blobs() {
 	while read mode sha1 stage path
 	do
-		if ! test -r "$workdir/../blob-cache/$sha1"
+		if ! test -r "$cachedir/$sha1"
 		then
-			new=`git cat-file blob $sha1 |
-			     eval "$filter_blob" |
-			     git hash-object -w --stdin`
-			printf $new >$workdir/../blob-cache/$sha1
+			new=$(git cat-file blob $sha1 |
+			      eval "$filter_blob" |
+			      git hash-object -w --stdin)
+			printf $new >$cachedir/$sha1
+		else
+			read new <"$cachedir/$sha1"
 		fi
 		printf "%s %s\t%s\n" \
 			"$mode" \
-			$(cat "$workdir/../blob-cache/$sha1") \
+			"$new" \
 			"$path"
 	done
 }
@@ -108,6 +110,7 @@ USAGE="[--env-filter <command>] [--tree-filter <command>] \
 [--index-filter <command>] [--parent-filter <command>] \
 [--msg-filter <command>] [--commit-filter <command>] \
 [--tag-name-filter <command>] [--subdirectory-filter <directory>] \
+[--blob-filter <command>] \
 [--original <namespace>] [-d <directory>] [-f | --force] \
 [<rev-list options>...]"
 
@@ -249,7 +252,8 @@ ret=0
 mkdir ../map || die "Could not create map/ directory"
 
 # cache rewritten blobs for blob filter
-mkdir ../blob-cache || die "Could not create blob-cache/ directory"
+cachedir="$workdir/../blob-cache"
+mkdir "$cachedir" || die "Could not create blob-cache/ directory"
 
 case "$filter_subdir" in
 "")
-- 
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux