On Mon, Nov 25, 2024 at 12:36:15AM -0500, Eric Ju wrote: > diff --git a/fetch-object-info.c b/fetch-object-info.c > new file mode 100644 > index 0000000000..2aa9f2b70d > --- /dev/null > +++ b/fetch-object-info.c > @@ -0,0 +1,92 @@ > +#include "git-compat-util.h" > +#include "gettext.h" > +#include "hex.h" > +#include "pkt-line.h" > +#include "connect.h" > +#include "oid-array.h" > +#include "object-store-ll.h" > +#include "fetch-object-info.h" > +#include "string-list.h" > + > +/** > + * send_object_info_request sends git-cat-file object-info command and its > + * arguments into the request buffer. > + */ > +static void send_object_info_request(const int fd_out, struct object_info_args *args) > +{ > + struct strbuf req_buf = STRBUF_INIT; > + > + write_command_and_capabilities(&req_buf, "object-info", args->server_options); > + > + if (unsorted_string_list_has_string(args->object_info_options, "size")) > + packet_buf_write(&req_buf, "size"); Do we have a document somewhere that spells out the wire format that client- and server-side talk with each other? If so it would be nice to point it out in the commit message so that I know where to look, and otherwise we should document it. Without such a doc it's hard to figure out whether this is correct. > + if (args->oids) { > + for (size_t i = 0; i < args->oids->nr; i++) > + packet_buf_write(&req_buf, "oid %s", oid_to_hex(&args->oids->oid[i])); > + } Nit: needless curly braces. > + packet_buf_flush(&req_buf); > + if (write_in_full(fd_out, req_buf.buf, req_buf.len) < 0) > + die_errno(_("unable to write request to remote")); So we write the whole request into `req_buf` first before sending it to the remote. Isn't that quite inefficient memory wise? In other words, couldn't we instead stream the request line by line or at least in batches to the file descriptor? > + strbuf_release(&req_buf); > +} > + > +/** Nit: s|/**|/*| > + * fetch_object_info sends git-cat-file object-info command into the request buf > + * and read the results from packets. > + */ > +int fetch_object_info(const enum protocol_version version, struct object_info_args *args, > + struct packet_reader *reader, struct object_info *object_info_data, > + const int stateless_rpc, const int fd_out) > +{ > + int size_index = -1; > + > + switch (version) { > + case protocol_v2: > + if (!server_supports_v2("object-info")) > + die(_("object-info capability is not enabled on the server")); > + send_object_info_request(fd_out, args); > + break; > + case protocol_v1: > + case protocol_v0: > + die(_("wrong protocol version. expected v2")); > + case protocol_unknown_version: > + BUG("unknown protocol version"); > + } > + > + for (size_t i = 0; i < args->object_info_options->nr; i++) { > + if (packet_reader_read(reader) != PACKET_READ_NORMAL) { > + check_stateless_delimiter(stateless_rpc, reader, "stateless delimiter expected"); > + return -1; > + } > + if (unsorted_string_list_has_string(args->object_info_options, reader->line)) { Hum. Does this result in quadratic runtime behaviour? > + if (!strcmp(reader->line, "size")) { > + size_index = i; > + for (size_t j = 0; j < args->oids->nr; j++) > + object_info_data[j].sizep = xcalloc(1, sizeof(long)); This might be a bit more future proof in case the `sizep` type were ever to change: object_info_data[j].sizep = xcalloc(1, sizeof(*object_info_data[j].sizep)); It also allows you to skip double-checking whether you picked the correct type. In fact, the type is actually an `unsigned long`, which is confusing but ultimately does not make much of a difference because it should have the same size. > + } > + continue; > + } > + return -1; > + } > + > + for (size_t i = 0; packet_reader_read(reader) == PACKET_READ_NORMAL && i < args->oids->nr; i++){ > + struct string_list object_info_values = STRING_LIST_INIT_DUP; > + > + string_list_split(&object_info_values, reader->line, ' ', -1); > + if (0 <= size_index) { > + if (!strcmp(object_info_values.items[1 + size_index].string, "")) > + die("object-info: not our ref %s", > + object_info_values.items[0].string); > + > + *object_info_data[i].sizep = strtoul(object_info_values.items[1 + size_index].string, NULL, 10); We're completely missing error handling for strtoul(3p) here. That function is also discouraged nowadays because error handling is hard to do correct. We have `strtoul_ui()` and friends, but don't have a variant yet that know to return an `unsigned long`. We might backfill that omission and then use it instead. > diff --git a/transport-helper.c b/transport-helper.c > index bc27653cde..bf0a1877c7 100644 > --- a/transport-helper.c > +++ b/transport-helper.c > @@ -728,6 +728,13 @@ static int fetch_refs(struct transport *transport, > free_refs(dummy); > } > > + /* fail the command explicitly to avoid further commands input. */ > + if (transport->smart_options->object_info) > + die(_("remote-object-info requires protocol v2")); The code path that checks for for protocol v2 with "--negotiate-only" knows to warn and return an error. Should we do the same here? > diff --git a/transport.c b/transport.c > index 47fda6a773..746ec19ddc 100644 > --- a/transport.c > +++ b/transport.c > @@ -9,6 +9,7 @@ > #include "hook.h" > #include "pkt-line.h" > #include "fetch-pack.h" > +#include "fetch-object-info.h" > #include "remote.h" > #include "connect.h" > #include "send-pack.h" > @@ -444,8 +445,33 @@ static int fetch_refs_via_pack(struct transport *transport, > args.server_options = transport->server_options; > args.negotiation_tips = data->options.negotiation_tips; > args.reject_shallow_remote = transport->smart_options->reject_shallow; > + args.object_info = transport->smart_options->object_info; > + > + if (transport->smart_options > + && transport->smart_options->object_info > + && transport->smart_options->object_info_oids->nr > 0) { Formatting is wrong here: if (transport->smart_options && transport->smart_options->object_info && transport->smart_options->object_info_oids->nr > 0) { Patrick