[PATCH v3 3/3] object-info: add option for retrieving object info

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Sometimes it is useful to get information about an object without
having to download it completely. The server logic has already been
implemented as “a2ba162cda (object-info: support for retrieving object
info, 2021-04-20)”. This patch implements the client option for
it. Currently, only 'size' is supported, however future patches can
implement additional options.

Add ‘--object-info’ option to fetch. This option allows the client to
make an object-info command request to a server that supports protocol
v2. If the server is v2, but does not allow for the object-info
command request, fetch the objects as if it were a standard fetch
(however without changing any refs). Therefore, hook `fetch
object-info` into transport_fetch_refs() to easily fallback if needed.

A previous patch added the `transfer.advertiseObjectInfo` config
option, of which this patch utilizes to test the fallback.

---
 Documentation/fetch-options.txt |   5 ++
 Documentation/git-fetch.txt     |   1 +
 builtin/fetch.c                 |  36 ++++++++-
 fetch-pack.c                    |  42 +++++++++-
 fetch-pack.h                    |   9 +++
 t/t5583-fetch-object-info.sh    | 138 ++++++++++++++++++++++++++++++++
 transport-helper.c              |   8 +-
 transport-internal.h            |   1 +
 transport.c                     |  75 ++++++++++++++++-
 transport.h                     |   9 +++
 10 files changed, 315 insertions(+), 9 deletions(-)
 create mode 100755 t/t5583-fetch-object-info.sh

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index f903683189..f1ca95c32d 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -299,3 +299,8 @@ endif::git-pull[]
 -6::
 --ipv6::
 	Use IPv6 addresses only, ignoring IPv4 addresses.
+
+--object-info=[<arguments>]::
+	Fetches only the relevant information passed in arguments from a
+	specific remote and set of object ids. Object 'size' is currently
+	the only supported argument.
diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
index 550c16ca61..a13d2ba7ad 100644
--- a/Documentation/git-fetch.txt
+++ b/Documentation/git-fetch.txt
@@ -13,6 +13,7 @@ SYNOPSIS
 'git fetch' [<options>] <group>
 'git fetch' --multiple [<options>] [(<repository> | <group>)...]
 'git fetch' --all [<options>]
+'git fetch' --object-info=[<arguments>] <remote> [<object-ids>]
 
 
 DESCRIPTION
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 4d12c2fd4d..0b21bebfca 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -29,6 +29,9 @@
 #include "commit-graph.h"
 #include "shallow.h"
 #include "worktree.h"
+#include "protocol.h"
+#include "pkt-line.h"
+#include "connect.h"
 
 #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
 
@@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = {
 	N_("git fetch [<options>] <group>"),
 	N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
 	N_("git fetch --all [<options>]"),
+	N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"),
 	NULL
 };
 
@@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 static int fetch_write_commit_graph = -1;
 static int stdin_refspecs = 0;
 static int negotiate_only;
+static struct string_list object_info_options = STRING_LIST_INIT_NODUP;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = {
 		 N_("write the commit-graph after fetching")),
 	OPT_BOOL(0, "stdin", &stdin_refspecs,
 		 N_("accept refspecs from stdin")),
+	OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"),
+		 N_("command request arguments")),
 	OPT_END()
 };
 
@@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	struct remote *remote = NULL;
 	int result = 0;
 	int prune_tags_ok = 1;
+	struct oid_array object_info_oids = OID_ARRAY_INIT;
 
 	packet_trace_identity("fetch");
 
@@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 	if (dry_run)
 		write_fetch_head = 0;
 
-	if (all) {
+	if (object_info_options.nr > 0) {
+		if (argc == 0 || argc == 1) {
+			die(_("must supply remote and object ids when using --object-info"));
+		} else {
+			struct object_id oid;
+			remote = remote_get(argv[0]);
+			for (i = 1; i < argc; i++) {
+				if (get_oid(argv[i], &oid))
+					return error(_("malformed object id '%s'"), argv[i]);
+				oid_array_append(&object_info_oids, &oid);
+			}
+		}
+	} else if (all) {
 		if (argc == 1)
 			die(_("fetch --all does not take a repository argument"));
 		else if (argc > 1)
@@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
 		}
 	}
 
-	if (negotiate_only) {
+	if (object_info_options.nr > 0) {
+		if (!remote)
+			die(_("must supply remote when using --object-info"));
+		gtransport = prepare_transport(remote, 1);
+		if (gtransport->smart_options) {
+			gtransport->smart_options->object_info = 1;
+			gtransport->smart_options->object_info_oids = &object_info_oids;
+			gtransport->smart_options->object_info_options = &object_info_options;
+		}
+		if (server_options.nr)
+			gtransport->server_options = &server_options;
+		result = transport_fetch_refs(gtransport, NULL);
+	} else if (negotiate_only) {
 		struct oidset acked_commits = OIDSET_INIT;
 		struct oidset_iter iter;
 		const struct object_id *oid;
diff --git a/fetch-pack.c b/fetch-pack.c
index b709a61baf..36e3d1c5d0 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf,
 	packet_buf_delim(req_buf);
 }
 
+void send_object_info_request(int fd_out, struct object_info_args *args)
+{
+	struct strbuf req_buf = STRBUF_INIT;
+	int i;
+
+	write_command_and_capabilities(&req_buf, args->server_options, "object-info");
+
+	if (string_list_has_string(args->object_info_options, "size"))
+		packet_buf_write(&req_buf, "size");
+
+	for (i = 0; i < args->oids->nr; i++) {
+		packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i]));
+	}
+
+	packet_buf_flush(&req_buf);
+	if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0)
+		die_errno(_("unable to write request to remote"));
+
+	strbuf_release(&req_buf);
+}
+
 static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
@@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	if (args->depth > 0 || args->deepen_since || args->deepen_not)
 		args->deepen = 1;
 
+	if (args->object_info) {
+		state = FETCH_SEND_REQUEST;
+	}
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_CHECK_LOCAL:
@@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			/* Filter 'ref' by 'sought' and those that aren't local */
 			mark_complete_and_common_ref(negotiator, args, &ref);
 			filter_refs(args, &ref, sought, nr_sought);
-			if (everything_local(args, &ref))
+			if (!args->object_info && everything_local(args, &ref))
 				state = FETCH_DONE;
 			else
 				state = FETCH_SEND_REQUEST;
@@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
 		}
 		args->connectivity_checked = 1;
 	}
-
-	update_shallow(args, sought, nr_sought, &si);
+	
+	if (args->object_info) {
+		struct ref *ref_cpy_reader = ref_cpy;
+		unsigned long size = 0;
+		while (ref_cpy_reader) {
+			oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size);
+			printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size);
+			ref_cpy_reader = ref_cpy_reader->next;
+		}
+	}
+	else {
+		update_shallow(args, sought, nr_sought, &si);
+	}
 cleanup:
 	clear_shallow_info(&si);
 	oid_array_clear(&shallows_scratch);
diff --git a/fetch-pack.h b/fetch-pack.h
index 7f94a2a583..23e69bb218 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -42,6 +42,7 @@ struct fetch_pack_args {
 	unsigned update_shallow:1;
 	unsigned reject_shallow_remote:1;
 	unsigned deepen:1;
+	unsigned object_info:1;
 
 	/*
 	 * Indicate that the remote of this request is a promisor remote. The
@@ -68,6 +69,12 @@ struct fetch_pack_args {
 	unsigned connectivity_checked:1;
 };
 
+struct object_info_args {
+	const struct string_list *object_info_options;
+	const struct string_list *server_options;
+	const struct oid_array *oids;
+};
+
 /*
  * sought represents remote references that should be updated from.
  * On return, the names that were found on the remote will have been
@@ -101,4 +108,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips,
  */
 int report_unmatched_refs(struct ref **sought, int nr_sought);
 
+void send_object_info_request(int fd_out, struct object_info_args *args);
+
 #endif
diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh
new file mode 100755
index 0000000000..e5db6d953c
--- /dev/null
+++ b/t/t5583-fetch-object-info.sh
@@ -0,0 +1,138 @@
+#!/bin/sh
+
+test_description='test git fetch object-info version 2'
+
+TEST_NO_CREATE_REPO=1
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+
+# Test fetch object-info with 'git://' transport
+
+. "$TEST_DIRECTORY"/lib-git-daemon.sh
+start_git_daemon --export-all --enable=receive-pack
+daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent
+
+
+test_expect_success 'create repo to be served by git-daemon' '
+	git init "$daemon_parent" &&
+	test_commit -C "$daemon_parent" message1 a.txt
+'
+
+test_expect_success 'fetch object-info with git:// using protocol v2' '
+	(
+		cd "$daemon_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info=size "$GIT_DAEMON_URL/parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+stop_git_daemon
+
+# Test protocol v2 with 'http://' transport
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'create repo to be served by http:// transport' '
+	git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true &&
+	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt &&
+	test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message2 b.txt
+'
+
+test_expect_success 'fetch object-info with http:// using protocol v2' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'fetch object-info for multiple objects' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id1=$(git rev-parse message1:a.txt) &&
+		object_id2=$(git rev-parse message2:b.txt) &&
+		length1=$(wc -c <a.txt) &&
+		length2=$(wc -c <b.txt) &&
+
+		printf "%s %d\n" "$object_id1" "$length1" >expect &&
+		printf "%s %d\n" "$object_id2" "$length2" >>expect &&
+		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id1" "$object_id2" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'fetch object-info fallbacks to standard fetch if object-info is not supported by the server' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git config objectinfo.advertise false &&
+		git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'fetch object-info fails on server with legacy protocol' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		test_must_fail git -c protocol.version=0 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
+		test_i18ngrep "object-info requires protocol v2" err
+	)
+'
+
+test_expect_success 'fetch object-info fails on malformed OID' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" &&
+		malformed_object_id="this_id_is_not_valid" &&
+		test_must_fail git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$malformed_object_id" 2>err &&
+		test_i18ngrep "malformed object id '$malformed_object_id'" err
+	)
+'
+
+test_expect_success 'fetch object-info fails on missing OID' '
+	git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo &&
+	test_commit -C missing_oid_repo message3 c.txt &&
+	(
+		cd missing_oid_repo &&
+		object_id=$(git rev-parse message3:c.txt) &&
+		test_must_fail env GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err &&
+		test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err
+	)
+'
+
+# Test fetch object-info with 'file://' transport
+
+test_expect_success 'create repo to be served by file:// transport' '
+	git init server &&
+	test_commit -C server message1 a.txt &&
+	git -C server config protocol.version 2
+'
+
+test_expect_success 'fetch object-info with file:// using protocol v2' '
+	(
+		cd server &&
+		object_id=$(git rev-parse message1:a.txt) &&
+		length=$(wc -c <a.txt) &&
+
+		printf "%s %d\n" "$object_id" "$length" >expect &&
+		git -c protocol.version=2 fetch --object-info=size "file://$(pwd)" "$object_id" >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_done
diff --git a/transport-helper.c b/transport-helper.c
index a0297b0986..64c175e904 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -686,13 +686,17 @@ static int fetch_refs(struct transport *transport,
 
 	/*
 	 * If we reach here, then the server, the client, and/or the transport
-	 * helper does not support protocol v2. --negotiate-only requires
-	 * protocol v2.
+	 * helper does not support protocol v2. --negotiate-only and --object-info
+	 * require protocol v2.
 	 */
 	if (data->transport_options.acked_commits) {
 		warning(_("--negotiate-only requires protocol v2"));
 		return -1;
 	}
+	if (transport->smart_options->object_info) {
+		warning(_("--object-info requires protocol v2"));
+		return -1;
+	}
 
 	if (!data->get_refs_list_called)
 		get_refs_list_using_list(transport, 0);
diff --git a/transport-internal.h b/transport-internal.h
index c4ca0b733a..04fa015011 100644
--- a/transport-internal.h
+++ b/transport-internal.h
@@ -59,6 +59,7 @@ struct transport_vtable {
 	 * use. disconnect() releases these resources.
 	 **/
 	int (*disconnect)(struct transport *connection);
+	int (*fetch_object_info)(struct transport *transport, struct oid_array *oids);
 };
 
 #endif
diff --git a/transport.c b/transport.c
index 70e9840a90..65a1b1fdb3 100644
--- a/transport.c
+++ b/transport.c
@@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push,
 	return refs;
 }
 
+/*
+ * Fetches object info if server supports object-info command
+ * Make sure to fallback to normal fetch if this fails
+ */
+static int fetch_object_info(struct transport *transport)
+{
+	struct git_transport_data *data = transport->data;
+	struct object_info_args args;
+	struct packet_reader reader;
+
+	memset(&args, 0, sizeof(args));
+	args.server_options = transport->server_options;
+	args.object_info_options = transport->smart_options->object_info_options;
+	args.oids = transport->smart_options->object_info_oids;
+
+	connect_setup(transport, 0);
+	packet_reader_init(&reader, data->fd[0], NULL, 0,
+			PACKET_READ_CHOMP_NEWLINE |
+			PACKET_READ_DIE_ON_ERR_PACKET);
+	data->version = discover_version(&reader);
+
+	transport->hash_algo = reader.hash_algo;
+
+	switch (data->version) {
+	case protocol_v2:
+		if (!server_supports_v2("object-info", 0))
+			return 0;
+		send_object_info_request(data->fd[1], &args);
+		break;
+	case protocol_v1:
+	case protocol_v0:
+		die(_("wrong protocol version. expected v2"));
+	case protocol_unknown_version:
+		BUG("unknown protocol version");
+	}
+
+	if (packet_reader_read(&reader) != PACKET_READ_NORMAL) {
+		die(_("error reading object info header"));
+	}
+	if (strcmp(reader.line, "size")) {
+		die(_("expected 'size', received '%s'"), reader.line);
+	}
+	while (packet_reader_read(&reader) == PACKET_READ_NORMAL) {
+		printf("%s\n", reader.line);
+	}
+	check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected");
+
+	return 1;
+}
+
+static int append_oid_to_ref(const struct object_id *oid, void *data)
+{
+	struct ref *ref = data;
+	struct ref *temp_ref = xcalloc(1, sizeof (struct ref));
+	temp_ref->old_oid = *oid;
+	temp_ref->exact_oid = 1;
+	ref->next = temp_ref;
+	ref = ref->next;
+	return 0;
+}
+
 static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
 					struct transport_ls_refs_options *options)
 {
@@ -364,6 +425,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	struct ref *refs = NULL;
 	struct fetch_pack_args args;
 	struct ref *refs_tmp = NULL;
+	struct ref *wanted_refs = xcalloc(1, sizeof (struct ref));
 
 	memset(&args, 0, sizeof(args));
 	args.uploadpack = data->options.uploadpack;
@@ -388,8 +450,17 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.server_options = transport->server_options;
 	args.negotiation_tips = data->options.negotiation_tips;
 	args.reject_shallow_remote = transport->smart_options->reject_shallow;
+	args.object_info = transport->smart_options->object_info;
 
-	if (!data->got_remote_heads) {
+	if (transport->smart_options && transport->smart_options->object_info) {
+		struct ref *temp_ref = wanted_refs;
+		if (fetch_object_info(transport)) {
+			goto cleanup;
+		}
+		oid_array_for_each(transport->smart_options->object_info_oids, append_oid_to_ref, temp_ref);
+		wanted_refs = wanted_refs->next;
+		transport->remote_refs = wanted_refs;
+	} else if (!data->got_remote_heads) {
 		int i;
 		int must_list_refs = 0;
 		for (i = 0; i < nr_heads; i++) {
@@ -406,7 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	else if (data->version <= protocol_v1)
 		die_if_server_options(transport);
 
-	if (data->options.acked_commits) {
+	if (data->options.acked_commits && !transport->smart_options->object_info) {
 		if (data->version < protocol_v2) {
 			warning(_("--negotiate-only requires protocol v2"));
 			ret = -1;
diff --git a/transport.h b/transport.h
index a0bc6a1e9e..3bba16f86e 100644
--- a/transport.h
+++ b/transport.h
@@ -30,6 +30,12 @@ struct git_transport_options {
 	 */
 	unsigned connectivity_checked:1;
 
+	/*
+	 * Transport will attempt to pull only object-info. Fallbacks
+	 * to pulling entire object if object-info is not supported
+	 */
+	unsigned object_info : 1;
+
 	int depth;
 	const char *deepen_since;
 	const struct string_list *deepen_not;
@@ -53,6 +59,9 @@ struct git_transport_options {
 	 * common commits to this oidset instead of fetching any packfiles.
 	 */
 	struct oidset *acked_commits;
+
+	struct oid_array *object_info_oids;
+	struct string_list *object_info_options;
 };
 
 enum transport_family {
-- 
2.33.0.664.g0785eb7698





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux