On 24/06/28 03:05PM, Eric Ju wrote: > From: Calvin Wan <calvinwan@xxxxxxxxxx> > > Since the `info` command in cat-file --batch-command prints object info > for a given object, it is natural to add another command in cat-file > --batch-command to print object info for a given object from a remote. > Add `remote-object-info` to cat-file --batch-command. > > While `info` takes object ids one at a time, this creates overhead when > making requests to a server so `remote-object-info` instead can take > multiple object ids at once. > > cat-file --batch-command is generally implemented in the following > manner: > > - Receive and parse input from user > - Call respective function attached to command > - Set batch mode state, get object info, print object info > > In --buffer mode, this changes to: > > - Receive and parse input from user > - Store respective function attached to command in a queue > - After flush, loop through commands in queue > - Call respective function attached to command > - Set batch mode state, get object info, print object info So the problem is that there is overhead associated with getting object info from the remote. Therefore, remote-object-info also supports batching objects together. This seems reasonable. > > Notice how the getting and printing of object info is accomplished one > at a time. As described above, this creates a problem for making > requests to a server. Therefore, `remote-object-info` is implemented in > the following manner: > > - Receive and parse input from user > If command is `remote-object-info`: > - Get object info from remote > - Loop through object info > - Call respective function attached to `info` > - Set batch mode state, use passed in object info, print object > info > Else: > - Call respective function attached to command > - Parse input, get object info, print object info > > And finally for --buffer mode `remote-object-info`: > - Receive and parse input from user > - Store respective function attached to command in a queue > - After flush, loop through commands in queue: > If command is `remote-object-info`: > - Get object info from remote > - Loop through object info > - Call respective function attached to `info` > - Set batch mode state, use passed in object info, print > object info > Else: > - Call respective function attached to command > - Set batch mode state, get object info, print object info > > To summarize, `remote-object-info` gets object info from the remote and > then generates multiple `info` commands with the object info passed in. > > In order for remote-object-info to avoid remote communication overhead > in the non-buffer mode, the objects are passed in as such: Even in non-buffer mode, having separate remote-object-info commands would result in additional overhead correct? From my understanding each command is executed sequently, so multiples of remote-object-info would always result in additional overhead. > > remote-object-info <remote> <oid> <oid> ... <oid> > > rather than > > remote-object-info <remote> <oid> > remote-object-info <remote> <oid> > ... > remote-object-info <remote> <oid> > > Signed-off-by: Calvin Wan <calvinwan@xxxxxxxxxx> > Signed-off-by: Eric Ju <eric.peijian@xxxxxxxxx> > Helped-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx> > Helped-by: Christian Couder <chriscool@xxxxxxxxxxxxx> I think the sign-offs are supposed to go at the bottom. [snip] > @@ -526,51 +533,118 @@ static void batch_one_object(const char *obj_name, > (opt->follow_symlinks ? GET_OID_FOLLOW_SYMLINKS : 0); > enum get_oid_result result; > > - result = get_oid_with_context(the_repository, obj_name, > - flags, &data->oid, &ctx); > - if (result != FOUND) { > - switch (result) { > - case MISSING_OBJECT: > - printf("%s missing%c", obj_name, opt->output_delim); > - break; > - case SHORT_NAME_AMBIGUOUS: > - printf("%s ambiguous%c", obj_name, opt->output_delim); > - break; > - case DANGLING_SYMLINK: > - printf("dangling %"PRIuMAX"%c%s%c", > - (uintmax_t)strlen(obj_name), > - opt->output_delim, obj_name, opt->output_delim); > - break; > - case SYMLINK_LOOP: > - printf("loop %"PRIuMAX"%c%s%c", > - (uintmax_t)strlen(obj_name), > - opt->output_delim, obj_name, opt->output_delim); > - break; > - case NOT_DIR: > - printf("notdir %"PRIuMAX"%c%s%c", > - (uintmax_t)strlen(obj_name), > - opt->output_delim, obj_name, opt->output_delim); > - break; > - default: > - BUG("unknown get_sha1_with_context result %d\n", > - result); > - break; > + if (!opt->use_remote_info) { When using the remote-object-info command, the object in question is supposed to be on the remote and may not exist locally. Therefore we skip over `get_oid_with_context()`. > + result = get_oid_with_context(the_repository, obj_name, > + flags, &data->oid, &ctx); > + if (result != FOUND) { > + switch (result) { > + case MISSING_OBJECT: > + printf("%s missing%c", obj_name, opt->output_delim); > + break; > + case SHORT_NAME_AMBIGUOUS: > + printf("%s ambiguous%c", obj_name, opt->output_delim); > + break; > + case DANGLING_SYMLINK: > + printf("dangling %"PRIuMAX"%c%s%c", > + (uintmax_t)strlen(obj_name), > + opt->output_delim, obj_name, opt->output_delim); > + break; > + case SYMLINK_LOOP: > + printf("loop %"PRIuMAX"%c%s%c", > + (uintmax_t)strlen(obj_name), > + opt->output_delim, obj_name, opt->output_delim); > + break; > + case NOT_DIR: > + printf("notdir %"PRIuMAX"%c%s%c", > + (uintmax_t)strlen(obj_name), > + opt->output_delim, obj_name, opt->output_delim); > + break; > + default: > + BUG("unknown get_sha1_with_context result %d\n", > + result); > + break; > + } > + fflush(stdout); > + return; > } > - fflush(stdout); > - return; > - } > > - if (ctx.mode == 0) { > - printf("symlink %"PRIuMAX"%c%s%c", > - (uintmax_t)ctx.symlink_path.len, > - opt->output_delim, ctx.symlink_path.buf, opt->output_delim); > - fflush(stdout); > - return; > + if (ctx.mode == 0) { > + printf("symlink %"PRIuMAX"%c%s%c", > + (uintmax_t)ctx.symlink_path.len, > + opt->output_delim, ctx.symlink_path.buf, opt->output_delim); > + fflush(stdout); > + return; > + } > } > > batch_object_write(obj_name, scratch, opt, data, NULL, 0); > } > > +static int get_remote_info(struct batch_options *opt, int argc, const char **argv) > +{ > + int retval = 0; > + struct remote *remote = NULL; > + struct object_id oid; > + struct string_list object_info_options = STRING_LIST_INIT_NODUP; > + static struct transport *gtransport; > + > + /* > + * Change the format to "%(objectname) %(objectsize)" when > + * remote-object-info command is used. Once we start supporting objecttype > + * the default format should change to DEFAULT_FORMAT > + */ > + if (!opt->format) { > + opt->format = "%(objectname) %(objectsize)"; > + } We should omit the parenthesis for single line if statements. > + > + remote = remote_get(argv[0]); > + if (!remote) > + die(_("must supply valid remote when using remote-object-info")); > + oid_array_clear(&object_info_oids); > + for (size_t i = 1; i < argc; i++) { > + if (get_oid_hex(argv[i], &oid)) > + die(_("Not a valid object name %s"), argv[i]); > + oid_array_append(&object_info_oids, &oid); > + } > + > + gtransport = transport_get(remote, NULL); > + if (gtransport->smart_options) { > + int include_size = 0; > + > + CALLOC_ARRAY(remote_object_info, object_info_oids.nr); > + gtransport->smart_options->object_info = 1; > + gtransport->smart_options->object_info_oids = &object_info_oids; > + /* > + * 'size' is the only option currently supported. > + * Other options that are passed in the format will exit with error. > + */ > + if (strstr(opt->format, "%(objectsize)")) { > + string_list_append(&object_info_options, "size"); > + include_size = 1; > + } > + if (strstr(opt->format, "%(objecttype)")) { > + die(_("objecttype is currently not supported with remote-object-info")); > + } Another single line if statement above that should omit the parenthesis. > + if (strstr(opt->format, "%(objectsize:disk)")) > + die(_("objectsize:disk is currently not supported with remote-object-info")); > + if (strstr(opt->format, "%(deltabase)")) > + die(_("deltabase is currently not supported with remote-object-info")); > + if (object_info_options.nr > 0) { > + gtransport->smart_options->object_info_options = &object_info_options; > + for (size_t i = 0; i < object_info_oids.nr; i++) { > + if (include_size) > + remote_object_info[i].sizep = xcalloc(1, sizeof(long)); > + } > + gtransport->smart_options->object_info_data = &remote_object_info; > + retval = transport_fetch_refs(gtransport, NULL); > + } > + } else { > + retval = -1; > + } > + > + return retval; > +} > + > struct object_cb_data { > struct batch_options *opt; > struct expand_data *expand; > @@ -642,6 +716,7 @@ typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *, > struct queued_cmd { > parse_cmd_fn_t fn; > char *line; > + const char *name; Since special handling is needed for the remote-object-info command, we record the queued command names to check against later. > }; > > static void parse_cmd_contents(struct batch_options *opt, > @@ -662,6 +737,55 @@ static void parse_cmd_info(struct batch_options *opt, > batch_one_object(line, output, opt, data); > } > > +static const struct parse_cmd { > + const char *name; > + parse_cmd_fn_t fn; > + unsigned takes_args; > +} commands[] = { > + { "contents", parse_cmd_contents, 1 }, > + { "info", parse_cmd_info, 1 }, > + { "remote-object-info", parse_cmd_info, 1 }, > + { "flush", NULL, 0 }, > +}; > + > +static void parse_remote_info(struct batch_options *opt, > + char *line, > + struct strbuf *output, > + struct expand_data *data, > + const struct parse_cmd *p_cmd, > + struct queued_cmd *q_cmd) It seems a little confusing to me that `parse_remote_info()` accepts both a `parse_cmd` and `queued_cmd`, but only expects to use one or the other. It looks like this is done because `dispatch_calls()` already accepts `queued_cmd`, but now needs to call `parse_remote_info()`. Since it is only the underlying command function that is needed by `parse_remote_info()` > +{ > + int count; > + const char **argv; > + > + count = split_cmdline(line, &argv); > + if (get_remote_info(opt, count, argv)) > + goto cleanup; > + opt->use_remote_info = 1; > + data->skip_object_info = 1; > + data->mark_query = 0; > + for (size_t i = 0; i < object_info_oids.nr; i++) { > + if (remote_object_info[i].sizep) > + data->size = *remote_object_info[i].sizep; > + if (remote_object_info[i].typep) > + data->type = *remote_object_info[i].typep; > + > + data->oid = object_info_oids.oid[i]; > + if (p_cmd) > + p_cmd->fn(opt, argv[i+1], output, data); > + else > + q_cmd->fn(opt, argv[i+1], output, data); > + } > + opt->use_remote_info = 0; > + data->skip_object_info = 0; > + data->mark_query = 1; > + > +cleanup: > + for (size_t i = 0; i < object_info_oids.nr; i++) > + free_object_info_contents(&remote_object_info[i]); > + free(remote_object_info); > +} > + > static void dispatch_calls(struct batch_options *opt, > struct strbuf *output, > struct expand_data *data, > @@ -671,8 +795,12 @@ static void dispatch_calls(struct batch_options *opt, > if (!opt->buffer_output) > die(_("flush is only for --buffer mode")); > > - for (int i = 0; i < nr; i++) > - cmd[i].fn(opt, cmd[i].line, output, data); > + for (int i = 0; i < nr; i++) { > + if (!strcmp(cmd[i].name, "remote-object-info")) > + parse_remote_info(opt, cmd[i].line, output, data, NULL, &cmd[i]); If we adapt `parse_remote_info()` to accept the command function we could pass cmd->fn here instead. > + else > + cmd[i].fn(opt, cmd[i].line, output, data); > + } > > fflush(stdout); > } > @@ -685,17 +813,6 @@ static void free_cmds(struct queued_cmd *cmd, size_t *nr) > *nr = 0; > } > > - > -static const struct parse_cmd { > - const char *name; > - parse_cmd_fn_t fn; > - unsigned takes_args; > -} commands[] = { > - { "contents", parse_cmd_contents, 1}, > - { "info", parse_cmd_info, 1}, > - { "flush", NULL, 0}, > -}; > - > static void batch_objects_command(struct batch_options *opt, > struct strbuf *output, > struct expand_data *data) > @@ -740,11 +857,17 @@ static void batch_objects_command(struct batch_options *opt, > dispatch_calls(opt, output, data, queued_cmd, nr); > free_cmds(queued_cmd, &nr); > } else if (!opt->buffer_output) { > - cmd->fn(opt, p, output, data); > + if (!strcmp(cmd->name, "remote-object-info")) { > + char *line = xstrdup_or_null(p); > + parse_remote_info(opt, line, output, data, cmd, NULL); Same here, if we adapt `parse_remote_info()` to accept the command function we could pass cmd->fn here instead. > + } else { > + cmd->fn(opt, p, output, data); > + } > } else { > ALLOC_GROW(queued_cmd, nr + 1, alloc); > call.fn = cmd->fn; > call.line = xstrdup_or_null(p); > + call.name = cmd->name; > queued_cmd[nr++] = call; > } > } > @@ -761,8 +884,6 @@ static void batch_objects_command(struct batch_options *opt, > strbuf_release(&input); > } > > -#define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)" > - > static int batch_objects(struct batch_options *opt) > { > struct strbuf input = STRBUF_INIT; [snip]