[PATCH 2/2] repack: add --filter=<filter-spec> option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: John Cai <johncai86@xxxxxxxxx>

Currently, repack does not work with partial clones. When repack is run
on a partially cloned repository, it grabs all missing objects from
promisor remotes. This also means that when gc is run for repository
maintenance on a partially cloned repository, it will end up getting
missing objects, which is not what we want.

In order to make repack work with partial clone, teach repack a new
option --filter, which takes a <filter-spec> argument. repack will skip
any objects that are matched by <filter-spec> similar to how the clone
command will skip fetching certain objects.

The final goal of this feature, is to be able to store objects on a
server other than the regular git server itself.

There are several scripts added so we can test the process of using a
remote helper to upload blobs to an http server:

- t/lib-httpd/list.sh lists blobs uploaded to the http server.
- t/lib-httpd/upload.sh uploads blobs to the http server.
- t/t0410/git-remote-testhttpgit a remote helper that can access blobs
  onto from an http server. Copied over from t/t5801/git-remote-testhttpgit
  and modified to upload blobs to an http server.
- t/t0410/lib-http-promisor.sh convenience functions for uploading
  blobs

Based-on-patch-by: Christian Couder <chriscool@xxxxxxxxxxxxx>
Signed-off-by: John Cai <johncai86@xxxxxxxxx>
---
 Documentation/git-repack.txt   |   5 +
 builtin/repack.c               |  10 ++
 t/lib-httpd.sh                 |   2 +
 t/lib-httpd/apache.conf        |   8 ++
 t/lib-httpd/list.sh            |  43 +++++++++
 t/lib-httpd/upload.sh          |  46 +++++++++
 t/t0410-partial-clone.sh       |  52 ++++++++++
 t/t0410/git-remote-testhttpgit | 170 +++++++++++++++++++++++++++++++++
 t/t7700-repack.sh              |  20 ++++
 9 files changed, 356 insertions(+)
 create mode 100644 t/lib-httpd/list.sh
 create mode 100644 t/lib-httpd/upload.sh
 create mode 100755 t/t0410/git-remote-testhttpgit

diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index ee30edc178a..e394ec52ab1 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -126,6 +126,11 @@ depth is 4095.
 	a larger and slower repository; see the discussion in
 	`pack.packSizeLimit`.
 
+--filter=<filter-spec>::
+	Omits certain objects (usually blobs) from the resulting
+	packfile. See linkgit:git-rev-list[1] for valid
+	`<filter-spec>` forms.
+
 -b::
 --write-bitmap-index::
 	Write a reachability bitmap index as part of the repack. This
diff --git a/builtin/repack.c b/builtin/repack.c
index da1e364a756..9c2e5bcfe3b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -152,6 +152,7 @@ struct pack_objects_args {
 	const char *depth;
 	const char *threads;
 	const char *max_pack_size;
+	const char *filter;
 	int no_reuse_delta;
 	int no_reuse_object;
 	int quiet;
@@ -172,6 +173,8 @@ static void prepare_pack_objects(struct child_process *cmd,
 		strvec_pushf(&cmd->args, "--threads=%s", args->threads);
 	if (args->max_pack_size)
 		strvec_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
+	if (args->filter)
+		strvec_pushf(&cmd->args, "--filter=%s", args->filter);
 	if (args->no_reuse_delta)
 		strvec_pushf(&cmd->args, "--no-reuse-delta");
 	if (args->no_reuse_object)
@@ -660,6 +663,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 				N_("limits the maximum number of threads")),
 		OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
 				N_("maximum size of each packfile")),
+		OPT_STRING(0, "filter", &po_args.filter, N_("args"),
+				N_("object filtering")),
 		OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
 				N_("repack objects in packs marked with .keep")),
 		OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
@@ -819,6 +824,11 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 		if (line.len != the_hash_algo->hexsz)
 			die(_("repack: Expecting full hex object ID lines only from pack-objects."));
 		string_list_append(&names, line.buf);
+		if (po_args.filter) {
+			char *promisor_name = mkpathdup("%s-%s.promisor", packtmp,
+							line.buf);
+			write_promisor_file(promisor_name, NULL, 0);
+		}
 	}
 	fclose(out);
 	ret = finish_command(&cmd);
diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh
index 782891908d7..fc6587c6d39 100644
--- a/t/lib-httpd.sh
+++ b/t/lib-httpd.sh
@@ -136,6 +136,8 @@ prepare_httpd() {
 	install_script error-smart-http.sh
 	install_script error.sh
 	install_script apply-one-time-perl.sh
+	install_script upload.sh
+	install_script list.sh
 
 	ln -s "$LIB_HTTPD_MODULE_PATH" "$HTTPD_ROOT_PATH/modules"
 
diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf
index 497b9b9d927..1ea382750f0 100644
--- a/t/lib-httpd/apache.conf
+++ b/t/lib-httpd/apache.conf
@@ -129,6 +129,8 @@ ScriptAlias /broken_smart/ broken-smart-http.sh/
 ScriptAlias /error_smart/ error-smart-http.sh/
 ScriptAlias /error/ error.sh/
 ScriptAliasMatch /one_time_perl/(.*) apply-one-time-perl.sh/$1
+ScriptAlias /upload/ upload.sh/
+ScriptAlias /list/ list.sh/
 <Directory ${GIT_EXEC_PATH}>
 	Options FollowSymlinks
 </Directory>
@@ -156,6 +158,12 @@ ScriptAliasMatch /one_time_perl/(.*) apply-one-time-perl.sh/$1
 <Files ${GIT_EXEC_PATH}/git-http-backend>
 	Options ExecCGI
 </Files>
+<Files upload.sh>
+  Options ExecCGI
+</Files>
+<Files list.sh>
+  Options ExecCGI
+</Files>
 
 RewriteEngine on
 RewriteRule ^/dumb-redir/(.*)$ /dumb/$1 [R=301]
diff --git a/t/lib-httpd/list.sh b/t/lib-httpd/list.sh
new file mode 100644
index 00000000000..e63406be3b2
--- /dev/null
+++ b/t/lib-httpd/list.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+# Used in the httpd test server to be called by a remote helper to list objects.
+
+FILES_DIR="www/files"
+
+OLDIFS="$IFS"
+IFS='&'
+set -- $QUERY_STRING
+IFS="$OLDIFS"
+
+while test $# -gt 0
+do
+	key=${1%%=*}
+	val=${1#*=}
+
+	case "$key" in
+	"sha1") sha1="$val" ;;
+	*) echo >&2 "unknown key '$key'" ;;
+	esac
+
+	shift
+done
+
+if test -d "$FILES_DIR"
+then
+	if test -z "$sha1"
+	then
+		echo 'Status: 200 OK'
+		echo
+		ls "$FILES_DIR" | tr '-' ' '
+	else
+		if test -f "$FILES_DIR/$sha1"-*
+		then
+			echo 'Status: 200 OK'
+			echo
+			cat "$FILES_DIR/$sha1"-*
+		else
+			echo 'Status: 404 Not Found'
+			echo
+		fi
+	fi
+fi
diff --git a/t/lib-httpd/upload.sh b/t/lib-httpd/upload.sh
new file mode 100644
index 00000000000..202de63b2dc
--- /dev/null
+++ b/t/lib-httpd/upload.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+# In part from http://codereview.stackexchange.com/questions/79549/bash-cgi-upload-file
+# Used in the httpd test server to for a remote helper to call to upload blobs.
+
+FILES_DIR="www/files"
+
+OLDIFS="$IFS"
+IFS='&'
+set -- $QUERY_STRING
+IFS="$OLDIFS"
+
+while test $# -gt 0
+do
+	key=${1%%=*}
+	val=${1#*=}
+
+	case "$key" in
+	"sha1") sha1="$val" ;;
+	"type") type="$val" ;;
+	"size") size="$val" ;;
+	"delete") delete=1 ;;
+	*) echo >&2 "unknown key '$key'" ;;
+	esac
+
+	shift
+done
+
+case "$REQUEST_METHOD" in
+POST)
+	if test "$delete" = "1"
+	then
+		rm -f "$FILES_DIR/$sha1-$size-$type"
+	else
+		mkdir -p "$FILES_DIR"
+		cat >"$FILES_DIR/$sha1-$size-$type"
+	fi
+
+	echo 'Status: 204 No Content'
+	echo
+	;;
+
+*)
+	echo 'Status: 405 Method Not Allowed'
+	echo
+esac
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index f17abd298c8..731f6bebc64 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -30,6 +30,31 @@ promise_and_delete () {
 	delete_object repo "$HASH"
 }
 
+upload_blob() {
+	SERVER_REPO="$1"
+	HASH="$2"
+
+	test -n "$HASH" || die "Invalid argument '$HASH'"
+	HASH_SIZE=$(git -C "$SERVER_REPO" cat-file -s "$HASH") || {
+		echo >&2 "Cannot get blob size of '$HASH'"
+		return 1
+	}
+
+	UPLOAD_URL="http://127.0.0.1:$LIB_HTTPD_PORT/upload/?sha1=$HASH&size=$HASH_SIZE&type=blob";
+
+	git -C "$SERVER_REPO" cat-file blob "$HASH" >object &&
+	curl --data-binary @object --include "$UPLOAD_URL"
+}
+
+upload_blobs_from_stdin() {
+	SERVER_REPO="$1"
+	while read -r blob
+	do
+		echo "uploading $blob"
+		upload_blob "$SERVER_REPO" "$blob" || return
+	done
+}
+
 test_expect_success 'extensions.partialclone without filter' '
 	test_create_repo server &&
 	git clone --filter="blob:none" "file://$(pwd)/server" client &&
@@ -668,6 +693,33 @@ test_expect_success 'fetching of missing objects from an HTTP server' '
 	grep "$HASH" out
 '
 
+PATH="$TEST_DIRECTORY/t0410:$PATH"
+
+test_expect_success 'fetch of missing objects through remote helper' '
+	rm -rf origin server &&
+	test_create_repo origin &&
+	dd if=/dev/zero of=origin/file1 bs=801k count=1 &&
+	git -C origin add file1 &&
+	git -C origin commit -m "large blob" &&
+	sha="$(git -C origin rev-parse :file1)" &&
+	expected="?$(git -C origin rev-parse :file1)" &&
+	git clone --bare --no-local origin server &&
+	git -C server remote add httpremote "testhttpgit::${PWD}/server" &&
+	git -C server config remote.httpremote.promisor true &&
+	git -C server config --remove-section remote.origin &&
+	git -C server rev-list --all --objects --filter-print-omitted \
+		--filter=blob:limit=800k | perl -ne "print if s/^[~]//" \
+		>large_blobs.txt &&
+	upload_blobs_from_stdin server <large_blobs.txt &&
+	git -C server -c repack.writebitmaps=false repack -a -d \
+		--filter=blob:limit=800k &&
+	git -C server rev-list --objects --all --missing=print >objects &&
+	grep "$expected" objects &&
+	HTTPD_URL=$HTTPD_URL git -C server show $sha &&
+	git -C server rev-list --objects --all --missing=print >objects &&
+	grep "$sha" objects
+'
+
 # DO NOT add non-httpd-specific tests here, because the last part of this
 # test script is only executed when httpd is available and enabled.
 
diff --git a/t/t0410/git-remote-testhttpgit b/t/t0410/git-remote-testhttpgit
new file mode 100755
index 00000000000..e5e187243ed
--- /dev/null
+++ b/t/t0410/git-remote-testhttpgit
@@ -0,0 +1,170 @@
+#!/bin/sh
+# Copyright (c) 2012 Felipe Contreras
+# Copyright (c) 2020 Christian Couder
+
+# This is a git remote helper that can be used to store blobs on an http server
+
+# The first argument can be a url when the fetch/push command was a url
+# instead of a configured remote. In this case, use a generic alias.
+if test "$1" = "testhttpgit::$2"; then
+	alias=_
+else
+	alias=$1
+fi
+url=$2
+
+unset GIT_DIR
+
+h_refspec="refs/heads/*:refs/testhttpgit/$alias/heads/*"
+t_refspec="refs/tags/*:refs/testhttpgit/$alias/tags/*"
+
+if test -n "$GIT_REMOTE_TESTHTTPGIT_NOREFSPEC"
+then
+	h_refspec=""
+	t_refspec=""
+fi
+
+die () {
+	echo >&2 "fatal: $*"
+	echo "fatal: $*" >>/tmp/t0430.txt
+	echo >>/tmp/t0430.txt
+	exit 1
+}
+
+force=
+
+mark_count_tmp=$(mktemp -t git-remote-http-mark-count_XXXXXX) || die "Failed to create temp file"
+echo "1" >"$mark_count_tmp"
+
+get_mark_count() {
+	mark=$(cat "$mark_count_tmp")
+	echo "$mark"
+	mark=$((mark+1))
+	echo "$mark" >"$mark_count_tmp"	
+}
+
+export_blob_from_file() {
+	file="$1"
+	echo "blob"
+	echo "mark :$(get_mark_count)"
+	size=$(wc -c <"$file") || return
+	echo "data $size"
+	cat "$file" || return
+	echo
+}
+
+while read line
+do
+	case $line in
+	capabilities)
+		echo 'import'
+		echo 'export'
+		test -n "$h_refspec" && echo "refspec $h_refspec"
+		test -n "$t_refspec" && echo "refspec $t_refspec"
+		test -n "$GIT_REMOTE_TESTHTTPGIT_SIGNED_TAGS" && echo "signed-tags"
+		test -n "$GIT_REMOTE_TESTHTTPGIT_NO_PRIVATE_UPDATE" && echo "no-private-update"
+		echo 'option'
+		echo
+		;;
+	list)
+		git -C "$url" for-each-ref --format='? %(refname)' 'refs/heads/' 'refs/tags/'
+		head=$(git -C "$url" symbolic-ref HEAD)
+		echo "@$head HEAD"
+		echo
+		;;
+	import*)
+		# read all import lines
+		while true
+		do
+			ref="${line#* }"
+			refs="$refs $ref"
+			read line
+			test "${line%% *}" != "import" && break
+		done
+
+		echo "refs: $refs" >>/tmp/t0430.txt
+
+		if test -n "$GIT_REMOTE_TESTHTTPGIT_FAILURE"
+		then
+			echo "feature done"
+			exit 1
+		fi
+
+		echo "feature done"
+
+		tmpdir=$(mktemp -d -t git-remote-http-import_XXXXXX) || die "Failed to create temp directory"
+
+		for ref in $refs
+		do
+			get_url="$HTTPD_URL/list/?sha1=$ref"
+			echo "curl url: $get_url" >>/tmp/t0430.txt
+			echo "curl output: $tmpdir/$ref" >>/tmp/t0430.txt
+			curl -s -o "$tmpdir/$ref" "$get_url" ||
+				die "curl '$get_url' failed"
+			echo "exporting from: $tmpdir/$ref" >>/tmp/t0430.txt
+			export_blob_from_file "$tmpdir/$ref" ||
+				die "failed to export blob from '$tmpdir/$ref'"
+			echo "done exporting" >>/tmp/t0430.txt
+		done
+
+		echo "done"
+		;;
+	export)
+		if test -n "$GIT_REMOTE_TESTHTTPGIT_FAILURE"
+		then
+			# consume input so fast-export doesn't get SIGPIPE;
+			# git would also notice that case, but we want
+			# to make sure we are exercising the later
+			# error checks
+			while read line; do
+				test "done" = "$line" && break
+			done
+			exit 1
+		fi
+
+		before=$(git -C "$url" for-each-ref --format=' %(refname) %(objectname) ')
+
+		git -C "$url" fast-import \
+			${force:+--force} \
+			${testhttpgitmarks:+"--import-marks=$testhttpgitmarks"} \
+			${testhttpgitmarks:+"--export-marks=$testhttpgitmarks"} \
+			--quiet
+
+		# figure out which refs were updated
+		git -C "$url" for-each-ref --format='%(refname) %(objectname)' |
+		while read ref a
+		do
+			case "$before" in
+			*" $ref $a "*)
+				continue ;;	# unchanged
+			esac
+			if test -z "$GIT_REMOTE_TESTHTTPGIT_PUSH_ERROR"
+			then
+				echo "ok $ref"
+			else
+				echo "error $ref $GIT_REMOTE_TESTHTTPGIT_PUSH_ERROR"
+			fi
+		done
+
+		echo
+		;;
+	option\ *)
+		read cmd opt val <<-EOF
+		$line
+		EOF
+		case $opt in
+		force)
+			test $val = "true" && force="true" || force=
+			echo "ok"
+			;;
+		*)
+			echo "unsupported"
+			;;
+		esac
+		;;
+	'')
+		exit
+		;;
+	esac
+done
+
diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh
index e489869dd94..78cc1858cb6 100755
--- a/t/t7700-repack.sh
+++ b/t/t7700-repack.sh
@@ -237,6 +237,26 @@ test_expect_success 'auto-bitmaps do not complain if unavailable' '
 	test_must_be_empty actual
 '
 
+test_expect_success 'repack with filter does not fetch from remote' '
+	rm -rf server client &&
+	test_create_repo server &&
+	git -C server config uploadpack.allowFilter true &&
+	git -C server config uploadpack.allowAnySHA1InWant true &&
+	echo content1 >server/file1 &&
+	git -C server add file1 &&
+	git -C server commit -m initial_commit &&
+	expected="?$(git -C server rev-parse :file1)" &&
+	git clone --bare --no-local server client &&
+	git -C client config remote.origin.promisor true &&
+	git -C client -c repack.writebitmaps=false repack -a -d --filter=blob:none &&
+	git -C client rev-list --objects --all --missing=print >objects &&
+	grep "$expected" objects &&
+	git -C client repack -a -d &&
+	expected="$(git -C server rev-parse :file1)" &&
+	git -C client rev-list --objects --all --missing=print >objects &&
+	grep "$expected" objects
+'
+
 objdir=.git/objects
 midx=$objdir/pack/multi-pack-index
 
-- 
gitgitgadget



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux