(Just a small nit before I respond. If you could reply with only the code you're referring to, that would be much appreciated). > Would it be useful to check for the existence of the object to prove it got fetched? The object is not saved into the filesystem after fetching for only object-info so checking for its existence wouldn't prove anything. > Since the server is being used as the client, we wouldn't be able to confirm that > the object got fetched I agree that this test case can be written better to confirm that the object was fetched through the standard channel. I think I could use GIT_TRACE_PACKET=1 with the output piped to a log file and then grepping that log file for packet information resembling a standard fetch to confirm. (See t5702-protocol-v2.sh as a reference) > But, as a larger question for the list, I'm wondering if it's best practice to separate client from > server in tests, or if it's an accepted style to just use one repository in cases > when it doesn't really matter? I am as well! On Wed, Mar 30, 2022 at 3:06 PM John Cai <johncai86@xxxxxxxxx> wrote: > > > > On 28 Mar 2022, at 15:11, Calvin Wan wrote: > > > Sometimes it is useful to get information about an object without > > having to download it completely. The server logic has already been > > implemented as “a2ba162cda (object-info: support for retrieving object > > info, 2021-04-20)”. This patch implements the client option for > > it. Currently, only 'size' is supported, however future patches can > > implement additional options. > > > > Add ‘--object-info’ option to fetch. This option allows the client to > > make an object-info command request to a server that supports protocol > > v2. If the server is v2, but does not allow for the object-info > > command request, fetch the objects as if it were a standard fetch > > (however without changing any refs). Therefore, hook `fetch > > object-info` into transport_fetch_refs() to easily fallback if needed. > > > > A previous patch added the `transfer.advertiseObjectInfo` config > > option, of which this patch utilizes to test the fallback. > > > > --- > > Documentation/fetch-options.txt | 5 ++ > > Documentation/git-fetch.txt | 1 + > > builtin/fetch.c | 36 ++++++++- > > fetch-pack.c | 42 +++++++++- > > fetch-pack.h | 9 +++ > > t/t5583-fetch-object-info.sh | 138 ++++++++++++++++++++++++++++++++ > > transport-helper.c | 8 +- > > transport-internal.h | 1 + > > transport.c | 75 ++++++++++++++++- > > transport.h | 9 +++ > > 10 files changed, 315 insertions(+), 9 deletions(-) > > create mode 100755 t/t5583-fetch-object-info.sh > > > > diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt > > index f903683189..f1ca95c32d 100644 > > --- a/Documentation/fetch-options.txt > > +++ b/Documentation/fetch-options.txt > > @@ -299,3 +299,8 @@ endif::git-pull[] > > -6:: > > --ipv6:: > > Use IPv6 addresses only, ignoring IPv4 addresses. > > + > > +--object-info=[<arguments>]:: > > + Fetches only the relevant information passed in arguments from a > > + specific remote and set of object ids. Object 'size' is currently > > + the only supported argument. > > diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt > > index 550c16ca61..a13d2ba7ad 100644 > > --- a/Documentation/git-fetch.txt > > +++ b/Documentation/git-fetch.txt > > @@ -13,6 +13,7 @@ SYNOPSIS > > 'git fetch' [<options>] <group> > > 'git fetch' --multiple [<options>] [(<repository> | <group>)...] > > 'git fetch' --all [<options>] > > +'git fetch' --object-info=[<arguments>] <remote> [<object-ids>] > > > > > > DESCRIPTION > > diff --git a/builtin/fetch.c b/builtin/fetch.c > > index 4d12c2fd4d..0b21bebfca 100644 > > --- a/builtin/fetch.c > > +++ b/builtin/fetch.c > > @@ -29,6 +29,9 @@ > > #include "commit-graph.h" > > #include "shallow.h" > > #include "worktree.h" > > +#include "protocol.h" > > +#include "pkt-line.h" > > +#include "connect.h" > > > > #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000) > > > > @@ -37,6 +40,7 @@ static const char * const builtin_fetch_usage[] = { > > N_("git fetch [<options>] <group>"), > > N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"), > > N_("git fetch --all [<options>]"), > > + N_("git fetch --object-info=[<arguments>] <remote> [<object-ids>]"), > > NULL > > }; > > > > @@ -86,6 +90,7 @@ static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP; > > static int fetch_write_commit_graph = -1; > > static int stdin_refspecs = 0; > > static int negotiate_only; > > +static struct string_list object_info_options = STRING_LIST_INIT_NODUP; > > > > static int git_fetch_config(const char *k, const char *v, void *cb) > > { > > @@ -221,6 +226,8 @@ static struct option builtin_fetch_options[] = { > > N_("write the commit-graph after fetching")), > > OPT_BOOL(0, "stdin", &stdin_refspecs, > > N_("accept refspecs from stdin")), > > + OPT_STRING_LIST(0, "object-info", &object_info_options, N_("option"), > > + N_("command request arguments")), > > OPT_END() > > }; > > > > @@ -2087,6 +2094,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) > > struct remote *remote = NULL; > > int result = 0; > > int prune_tags_ok = 1; > > + struct oid_array object_info_oids = OID_ARRAY_INIT; > > > > packet_trace_identity("fetch"); > > > > @@ -2168,7 +2176,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) > > if (dry_run) > > write_fetch_head = 0; > > > > - if (all) { > > + if (object_info_options.nr > 0) { > > + if (argc == 0 || argc == 1) { > > + die(_("must supply remote and object ids when using --object-info")); > > + } else { > > + struct object_id oid; > > + remote = remote_get(argv[0]); > > + for (i = 1; i < argc; i++) { > > + if (get_oid(argv[i], &oid)) > > + return error(_("malformed object id '%s'"), argv[i]); > > + oid_array_append(&object_info_oids, &oid); > > + } > > + } > > + } else if (all) { > > if (argc == 1) > > die(_("fetch --all does not take a repository argument")); > > else if (argc > 1) > > @@ -2199,7 +2219,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) > > } > > } > > > > - if (negotiate_only) { > > + if (object_info_options.nr > 0) { > > + if (!remote) > > + die(_("must supply remote when using --object-info")); > > + gtransport = prepare_transport(remote, 1); > > + if (gtransport->smart_options) { > > + gtransport->smart_options->object_info = 1; > > + gtransport->smart_options->object_info_oids = &object_info_oids; > > + gtransport->smart_options->object_info_options = &object_info_options; > > + } > > + if (server_options.nr) > > + gtransport->server_options = &server_options; > > + result = transport_fetch_refs(gtransport, NULL); > > + } else if (negotiate_only) { > > struct oidset acked_commits = OIDSET_INIT; > > struct oidset_iter iter; > > const struct object_id *oid; > > diff --git a/fetch-pack.c b/fetch-pack.c > > index b709a61baf..36e3d1c5d0 100644 > > --- a/fetch-pack.c > > +++ b/fetch-pack.c > > @@ -1269,6 +1269,27 @@ static void write_command_and_capabilities(struct strbuf *req_buf, > > packet_buf_delim(req_buf); > > } > > > > +void send_object_info_request(int fd_out, struct object_info_args *args) > > +{ > > + struct strbuf req_buf = STRBUF_INIT; > > + int i; > > + > > + write_command_and_capabilities(&req_buf, args->server_options, "object-info"); > > + > > + if (string_list_has_string(args->object_info_options, "size")) > > + packet_buf_write(&req_buf, "size"); > > + > > + for (i = 0; i < args->oids->nr; i++) { > > + packet_buf_write(&req_buf, "oid %s\n", oid_to_hex(&args->oids->oid[i])); > > + } > > + > > + packet_buf_flush(&req_buf); > > + if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0) > > + die_errno(_("unable to write request to remote")); > > + > > + strbuf_release(&req_buf); > > +} > > + > > static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, > > struct fetch_pack_args *args, > > const struct ref *wants, struct oidset *common, > > @@ -1604,6 +1625,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, > > if (args->depth > 0 || args->deepen_since || args->deepen_not) > > args->deepen = 1; > > > > + if (args->object_info) { > > + state = FETCH_SEND_REQUEST; > > + } > > + > > while (state != FETCH_DONE) { > > switch (state) { > > case FETCH_CHECK_LOCAL: > > @@ -1613,7 +1638,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, > > /* Filter 'ref' by 'sought' and those that aren't local */ > > mark_complete_and_common_ref(negotiator, args, &ref); > > filter_refs(args, &ref, sought, nr_sought); > > - if (everything_local(args, &ref)) > > + if (!args->object_info && everything_local(args, &ref)) > > state = FETCH_DONE; > > else > > state = FETCH_SEND_REQUEST; > > @@ -1999,8 +2024,19 @@ struct ref *fetch_pack(struct fetch_pack_args *args, > > } > > args->connectivity_checked = 1; > > } > > - > > - update_shallow(args, sought, nr_sought, &si); > > + > > + if (args->object_info) { > > + struct ref *ref_cpy_reader = ref_cpy; > > + unsigned long size = 0; > > + while (ref_cpy_reader) { > > + oid_object_info(the_repository, &(ref_cpy_reader->old_oid), &size); > > + printf("%s %li\n", oid_to_hex(&(ref_cpy_reader->old_oid)), size); > > + ref_cpy_reader = ref_cpy_reader->next; > > + } > > + } > > + else { > > + update_shallow(args, sought, nr_sought, &si); > > + } > > cleanup: > > clear_shallow_info(&si); > > oid_array_clear(&shallows_scratch); > > diff --git a/fetch-pack.h b/fetch-pack.h > > index 7f94a2a583..23e69bb218 100644 > > --- a/fetch-pack.h > > +++ b/fetch-pack.h > > @@ -42,6 +42,7 @@ struct fetch_pack_args { > > unsigned update_shallow:1; > > unsigned reject_shallow_remote:1; > > unsigned deepen:1; > > + unsigned object_info:1; > > > > /* > > * Indicate that the remote of this request is a promisor remote. The > > @@ -68,6 +69,12 @@ struct fetch_pack_args { > > unsigned connectivity_checked:1; > > }; > > > > +struct object_info_args { > > + const struct string_list *object_info_options; > > + const struct string_list *server_options; > > + const struct oid_array *oids; > > +}; > > + > > /* > > * sought represents remote references that should be updated from. > > * On return, the names that were found on the remote will have been > > @@ -101,4 +108,6 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips, > > */ > > int report_unmatched_refs(struct ref **sought, int nr_sought); > > > > +void send_object_info_request(int fd_out, struct object_info_args *args); > > + > > #endif > > diff --git a/t/t5583-fetch-object-info.sh b/t/t5583-fetch-object-info.sh > > new file mode 100755 > > index 0000000000..e5db6d953c > > --- /dev/null > > +++ b/t/t5583-fetch-object-info.sh > > @@ -0,0 +1,138 @@ > > +#!/bin/sh > > + > > +test_description='test git fetch object-info version 2' > > + > > +TEST_NO_CREATE_REPO=1 > > + > > +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main > > +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME > > + > > +. ./test-lib.sh > > + > > +# Test fetch object-info with 'git://' transport > > + > > +. "$TEST_DIRECTORY"/lib-git-daemon.sh > > +start_git_daemon --export-all --enable=receive-pack > > +daemon_parent=$GIT_DAEMON_DOCUMENT_ROOT_PATH/parent > > + > > + > > +test_expect_success 'create repo to be served by git-daemon' ' > > + git init "$daemon_parent" && > > + test_commit -C "$daemon_parent" message1 a.txt > > +' > > + > > +test_expect_success 'fetch object-info with git:// using protocol v2' ' > > + ( > > + cd "$daemon_parent" && > > + object_id=$(git rev-parse message1:a.txt) && > > + length=$(wc -c <a.txt) && > > + > > + printf "%s %d\n" "$object_id" "$length" >expect && > > + git -c protocol.version=2 fetch --object-info=size "$GIT_DAEMON_URL/parent" "$object_id" >actual && > > + test_cmp expect actual > > + ) > > +' > > +stop_git_daemon > > + > > +# Test protocol v2 with 'http://' transport > > + > > +. "$TEST_DIRECTORY"/lib-httpd.sh > > +start_httpd > > + > > +test_expect_success 'create repo to be served by http:// transport' ' > > + git init "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && > > + git -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" config http.receivepack true && > > + test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message1 a.txt && > > + test_commit -C "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" message2 b.txt > > +' > > + > > +test_expect_success 'fetch object-info with http:// using protocol v2' ' > > + ( > > + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && > > + object_id=$(git rev-parse message1:a.txt) && > > + length=$(wc -c <a.txt) && > > + > > + printf "%s %d\n" "$object_id" "$length" >expect && > > + git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual && > > + test_cmp expect actual > > + ) > > +' > > + > > +test_expect_success 'fetch object-info for multiple objects' ' > > + ( > > + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && > > + object_id1=$(git rev-parse message1:a.txt) && > > + object_id2=$(git rev-parse message2:b.txt) && > > + length1=$(wc -c <a.txt) && > > + length2=$(wc -c <b.txt) && > > + > > + printf "%s %d\n" "$object_id1" "$length1" >expect && > > + printf "%s %d\n" "$object_id2" "$length2" >>expect && > > + git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id1" "$object_id2" >actual && > > + test_cmp expect actual > > + ) > > +' > > + > > +test_expect_success 'fetch object-info fallbacks to standard fetch if object-info is not supported by the server' ' > > + ( > > + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && > > + object_id=$(git rev-parse message1:a.txt) && > > + length=$(wc -c <a.txt) && > > + > > + printf "%s %d\n" "$object_id" "$length" >expect && > > + git config objectinfo.advertise false && > > + git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" >actual && > > + test_cmp expect actual > > + ) > > I noticed this test doesn't confirm that standard fetch was actually used. Would > it be useful to check for the existence of the object to prove it got fetched? > > Also, is seems the way this test is set up, it's fetching from itself. > This is logically fine in most cases that client == server, but it was a little > confusing at first because it looked like the invocation of git config was on > the client rather than the server, until I realized they are the same. > > Since the server is being used as the client, we wouldn't be able to confirm that > the object got fetched. I realize for most of these tests, it doesn't really matter > to conflate client with server. I think this is the one exception. > > But, as a larger question for the list, I'm wondering if it's best practice to separate client from > server in tests, or if it's an accepted style to just use one repository in cases > when it doesn't really matter? > > Thanks > John > > > +' > > + > > +test_expect_success 'fetch object-info fails on server with legacy protocol' ' > > + ( > > + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && > > + object_id=$(git rev-parse message1:a.txt) && > > + test_must_fail git -c protocol.version=0 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err && > > + test_i18ngrep "object-info requires protocol v2" err > > + ) > > +' > > + > > +test_expect_success 'fetch object-info fails on malformed OID' ' > > + ( > > + cd "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" && > > + malformed_object_id="this_id_is_not_valid" && > > + test_must_fail git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$malformed_object_id" 2>err && > > + test_i18ngrep "malformed object id '$malformed_object_id'" err > > + ) > > +' > > + > > +test_expect_success 'fetch object-info fails on missing OID' ' > > + git clone "$HTTPD_DOCUMENT_ROOT_PATH/http_parent" missing_oid_repo && > > + test_commit -C missing_oid_repo message3 c.txt && > > + ( > > + cd missing_oid_repo && > > + object_id=$(git rev-parse message3:c.txt) && > > + test_must_fail env GIT_TRACE_PACKET=1 git -c protocol.version=2 fetch --object-info=size "$HTTPD_URL/smart/http_parent" "$object_id" 2>err && > > + test_i18ngrep "fatal: remote error: upload-pack: not our ref $object_id" err > > + ) > > +' > > + > > +# Test fetch object-info with 'file://' transport > > + > > +test_expect_success 'create repo to be served by file:// transport' ' > > + git init server && > > + test_commit -C server message1 a.txt && > > + git -C server config protocol.version 2 > > +' > > + > > +test_expect_success 'fetch object-info with file:// using protocol v2' ' > > + ( > > + cd server && > > + object_id=$(git rev-parse message1:a.txt) && > > + length=$(wc -c <a.txt) && > > + > > + printf "%s %d\n" "$object_id" "$length" >expect && > > + git -c protocol.version=2 fetch --object-info=size "file://$(pwd)" "$object_id" >actual && > > + test_cmp expect actual > > + ) > > +' > > + > > +test_done > > diff --git a/transport-helper.c b/transport-helper.c > > index a0297b0986..64c175e904 100644 > > --- a/transport-helper.c > > +++ b/transport-helper.c > > @@ -686,13 +686,17 @@ static int fetch_refs(struct transport *transport, > > > > /* > > * If we reach here, then the server, the client, and/or the transport > > - * helper does not support protocol v2. --negotiate-only requires > > - * protocol v2. > > + * helper does not support protocol v2. --negotiate-only and --object-info > > + * require protocol v2. > > */ > > if (data->transport_options.acked_commits) { > > warning(_("--negotiate-only requires protocol v2")); > > return -1; > > } > > + if (transport->smart_options->object_info) { > > + warning(_("--object-info requires protocol v2")); > > + return -1; > > + } > > > > if (!data->get_refs_list_called) > > get_refs_list_using_list(transport, 0); > > diff --git a/transport-internal.h b/transport-internal.h > > index c4ca0b733a..04fa015011 100644 > > --- a/transport-internal.h > > +++ b/transport-internal.h > > @@ -59,6 +59,7 @@ struct transport_vtable { > > * use. disconnect() releases these resources. > > **/ > > int (*disconnect)(struct transport *connection); > > + int (*fetch_object_info)(struct transport *transport, struct oid_array *oids); > > }; > > > > #endif > > diff --git a/transport.c b/transport.c > > index 70e9840a90..65a1b1fdb3 100644 > > --- a/transport.c > > +++ b/transport.c > > @@ -350,6 +350,67 @@ static struct ref *handshake(struct transport *transport, int for_push, > > return refs; > > } > > > > +/* > > + * Fetches object info if server supports object-info command > > + * Make sure to fallback to normal fetch if this fails > > + */ > > +static int fetch_object_info(struct transport *transport) > > +{ > > + struct git_transport_data *data = transport->data; > > + struct object_info_args args; > > + struct packet_reader reader; > > + > > + memset(&args, 0, sizeof(args)); > > + args.server_options = transport->server_options; > > + args.object_info_options = transport->smart_options->object_info_options; > > + args.oids = transport->smart_options->object_info_oids; > > + > > + connect_setup(transport, 0); > > + packet_reader_init(&reader, data->fd[0], NULL, 0, > > + PACKET_READ_CHOMP_NEWLINE | > > + PACKET_READ_DIE_ON_ERR_PACKET); > > + data->version = discover_version(&reader); > > + > > + transport->hash_algo = reader.hash_algo; > > + > > + switch (data->version) { > > + case protocol_v2: > > + if (!server_supports_v2("object-info", 0)) > > + return 0; > > + send_object_info_request(data->fd[1], &args); > > + break; > > + case protocol_v1: > > + case protocol_v0: > > + die(_("wrong protocol version. expected v2")); > > + case protocol_unknown_version: > > + BUG("unknown protocol version"); > > + } > > + > > + if (packet_reader_read(&reader) != PACKET_READ_NORMAL) { > > + die(_("error reading object info header")); > > + } > > + if (strcmp(reader.line, "size")) { > > + die(_("expected 'size', received '%s'"), reader.line); > > + } > > + while (packet_reader_read(&reader) == PACKET_READ_NORMAL) { > > + printf("%s\n", reader.line); > > + } > > + check_stateless_delimiter(transport->stateless_rpc, &reader, "stateless delimiter expected"); > > + > > + return 1; > > +} > > + > > +static int append_oid_to_ref(const struct object_id *oid, void *data) > > +{ > > + struct ref *ref = data; > > + struct ref *temp_ref = xcalloc(1, sizeof (struct ref)); > > + temp_ref->old_oid = *oid; > > + temp_ref->exact_oid = 1; > > + ref->next = temp_ref; > > + ref = ref->next; > > + return 0; > > +} > > + > > static struct ref *get_refs_via_connect(struct transport *transport, int for_push, > > struct transport_ls_refs_options *options) > > { > > @@ -364,6 +425,7 @@ static int fetch_refs_via_pack(struct transport *transport, > > struct ref *refs = NULL; > > struct fetch_pack_args args; > > struct ref *refs_tmp = NULL; > > + struct ref *wanted_refs = xcalloc(1, sizeof (struct ref)); > > > > memset(&args, 0, sizeof(args)); > > args.uploadpack = data->options.uploadpack; > > @@ -388,8 +450,17 @@ static int fetch_refs_via_pack(struct transport *transport, > > args.server_options = transport->server_options; > > args.negotiation_tips = data->options.negotiation_tips; > > args.reject_shallow_remote = transport->smart_options->reject_shallow; > > + args.object_info = transport->smart_options->object_info; > > > > - if (!data->got_remote_heads) { > > + if (transport->smart_options && transport->smart_options->object_info) { > > + struct ref *temp_ref = wanted_refs; > > + if (fetch_object_info(transport)) { > > + goto cleanup; > > + } > > + oid_array_for_each(transport->smart_options->object_info_oids, append_oid_to_ref, temp_ref); > > + wanted_refs = wanted_refs->next; > > + transport->remote_refs = wanted_refs; > > + } else if (!data->got_remote_heads) { > > int i; > > int must_list_refs = 0; > > for (i = 0; i < nr_heads; i++) { > > @@ -406,7 +477,7 @@ static int fetch_refs_via_pack(struct transport *transport, > > else if (data->version <= protocol_v1) > > die_if_server_options(transport); > > > > - if (data->options.acked_commits) { > > + if (data->options.acked_commits && !transport->smart_options->object_info) { > > if (data->version < protocol_v2) { > > warning(_("--negotiate-only requires protocol v2")); > > ret = -1; > > diff --git a/transport.h b/transport.h > > index a0bc6a1e9e..3bba16f86e 100644 > > --- a/transport.h > > +++ b/transport.h > > @@ -30,6 +30,12 @@ struct git_transport_options { > > */ > > unsigned connectivity_checked:1; > > > > + /* > > + * Transport will attempt to pull only object-info. Fallbacks > > + * to pulling entire object if object-info is not supported > > + */ > > + unsigned object_info : 1; > > + > > int depth; > > const char *deepen_since; > > const struct string_list *deepen_not; > > @@ -53,6 +59,9 @@ struct git_transport_options { > > * common commits to this oidset instead of fetching any packfiles. > > */ > > struct oidset *acked_commits; > > + > > + struct oid_array *object_info_oids; > > + struct string_list *object_info_options; > > }; > > > > enum transport_family { > > -- > > 2.33.0.664.g0785eb7698