Re: [PATCH 6/6] cat-file: add remote-object-info to batch-command

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 24/06/28 03:05PM, Eric Ju wrote:
> From: Calvin Wan <calvinwan@xxxxxxxxxx>
> 
> Since the `info` command in cat-file --batch-command prints object info
> for a given object, it is natural to add another command in cat-file
> --batch-command to print object info for a given object from a remote.
> Add `remote-object-info` to cat-file --batch-command.
> 
> While `info` takes object ids one at a time, this creates overhead when
> making requests to a server so `remote-object-info` instead can take
> multiple object ids at once.
> 
> cat-file --batch-command is generally implemented in the following
> manner:
> 
>  - Receive and parse input from user
>  - Call respective function attached to command
>  - Set batch mode state, get object info, print object info
> 
> In --buffer mode, this changes to:
> 
>  - Receive and parse input from user
>  - Store respective function attached to command in a queue
>  - After flush, loop through commands in queue
>     - Call respective function attached to command
>     - Set batch mode state, get object info, print object info

So the problem is that there is overhead associated with getting object
info from the remote. Therefore, remote-object-info also supports
batching objects together. This seems reasonable.

> 
> Notice how the getting and printing of object info is accomplished one
> at a time. As described above, this creates a problem for making
> requests to a server. Therefore, `remote-object-info` is implemented in
> the following manner:
> 
>  - Receive and parse input from user
>  If command is `remote-object-info`:
>     - Get object info from remote
>     - Loop through object info
>         - Call respective function attached to `info`
>         - Set batch mode state, use passed in object info, print object
>           info
>  Else:
>     - Call respective function attached to command
>     - Parse input, get object info, print object info
> 
> And finally for --buffer mode `remote-object-info`:
>  - Receive and parse input from user
>  - Store respective function attached to command in a queue
>  - After flush, loop through commands in queue:
>     If command is `remote-object-info`:
>         - Get object info from remote
>         - Loop through object info
>             - Call respective function attached to `info`
>             - Set batch mode state, use passed in object info, print
>               object info
>     Else:
>         - Call respective function attached to command
>         - Set batch mode state, get object info, print object info
> 
> To summarize, `remote-object-info` gets object info from the remote and
> then generates multiple `info` commands with the object info passed in.
> 
> In order for remote-object-info to avoid remote communication overhead
> in the non-buffer mode, the objects are passed in as such:

Even in non-buffer mode, having separate remote-object-info commands
would result in additional overhead correct? From my understanding each
command is executed sequently, so multiples of remote-object-info would
always result in additional overhead.

> 
> remote-object-info <remote> <oid> <oid> ... <oid>
> 
> rather than
> 
> remote-object-info <remote> <oid>
> remote-object-info <remote> <oid>
> ...
> remote-object-info <remote> <oid>
> 
> Signed-off-by: Calvin Wan <calvinwan@xxxxxxxxxx>
> Signed-off-by: Eric Ju  <eric.peijian@xxxxxxxxx>
> Helped-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx>
> Helped-by: Christian Couder <chriscool@xxxxxxxxxxxxx>

I think the sign-offs are supposed to go at the bottom.

[snip]
> @@ -526,51 +533,118 @@ static void batch_one_object(const char *obj_name,
>  		(opt->follow_symlinks ? GET_OID_FOLLOW_SYMLINKS : 0);
>  	enum get_oid_result result;
>  
> -	result = get_oid_with_context(the_repository, obj_name,
> -				      flags, &data->oid, &ctx);
> -	if (result != FOUND) {
> -		switch (result) {
> -		case MISSING_OBJECT:
> -			printf("%s missing%c", obj_name, opt->output_delim);
> -			break;
> -		case SHORT_NAME_AMBIGUOUS:
> -			printf("%s ambiguous%c", obj_name, opt->output_delim);
> -			break;
> -		case DANGLING_SYMLINK:
> -			printf("dangling %"PRIuMAX"%c%s%c",
> -			       (uintmax_t)strlen(obj_name),
> -			       opt->output_delim, obj_name, opt->output_delim);
> -			break;
> -		case SYMLINK_LOOP:
> -			printf("loop %"PRIuMAX"%c%s%c",
> -			       (uintmax_t)strlen(obj_name),
> -			       opt->output_delim, obj_name, opt->output_delim);
> -			break;
> -		case NOT_DIR:
> -			printf("notdir %"PRIuMAX"%c%s%c",
> -			       (uintmax_t)strlen(obj_name),
> -			       opt->output_delim, obj_name, opt->output_delim);
> -			break;
> -		default:
> -			BUG("unknown get_sha1_with_context result %d\n",
> -			       result);
> -			break;
> +	if (!opt->use_remote_info) {

When using the remote-object-info command, the object in question is
supposed to be on the remote and may not exist locally. Therefore we
skip over `get_oid_with_context()`.

> +		result = get_oid_with_context(the_repository, obj_name,
> +						flags, &data->oid, &ctx);
> +		if (result != FOUND) {
> +			switch (result) {
> +			case MISSING_OBJECT:
> +				printf("%s missing%c", obj_name, opt->output_delim);
> +				break;
> +			case SHORT_NAME_AMBIGUOUS:
> +				printf("%s ambiguous%c", obj_name, opt->output_delim);
> +				break;
> +			case DANGLING_SYMLINK:
> +				printf("dangling %"PRIuMAX"%c%s%c",
> +					(uintmax_t)strlen(obj_name),
> +					opt->output_delim, obj_name, opt->output_delim);
> +				break;
> +			case SYMLINK_LOOP:
> +				printf("loop %"PRIuMAX"%c%s%c",
> +					(uintmax_t)strlen(obj_name),
> +					opt->output_delim, obj_name, opt->output_delim);
> +				break;
> +			case NOT_DIR:
> +				printf("notdir %"PRIuMAX"%c%s%c",
> +					(uintmax_t)strlen(obj_name),
> +					opt->output_delim, obj_name, opt->output_delim);
> +				break;
> +			default:
> +				BUG("unknown get_sha1_with_context result %d\n",
> +					result);
> +				break;
> +			}
> +			fflush(stdout);
> +			return;
>  		}
> -		fflush(stdout);
> -		return;
> -	}
>  
> -	if (ctx.mode == 0) {
> -		printf("symlink %"PRIuMAX"%c%s%c",
> -		       (uintmax_t)ctx.symlink_path.len,
> -		       opt->output_delim, ctx.symlink_path.buf, opt->output_delim);
> -		fflush(stdout);
> -		return;
> +		if (ctx.mode == 0) {
> +			printf("symlink %"PRIuMAX"%c%s%c",
> +				(uintmax_t)ctx.symlink_path.len,
> +				opt->output_delim, ctx.symlink_path.buf, opt->output_delim);
> +			fflush(stdout);
> +			return;
> +		}
>  	}
>  
>  	batch_object_write(obj_name, scratch, opt, data, NULL, 0);
>  }
>  
> +static int get_remote_info(struct batch_options *opt, int argc, const char **argv)
> +{
> +	int retval = 0;
> +	struct remote *remote = NULL;
> +	struct object_id oid;
> +	struct string_list object_info_options = STRING_LIST_INIT_NODUP;
> +	static struct transport *gtransport;
> +
> +	/*
> +	 * Change the format to "%(objectname) %(objectsize)" when
> +	 * remote-object-info command is used. Once we start supporting objecttype
> +	 * the default format should change to DEFAULT_FORMAT
> +	*/
> +	if (!opt->format) {
> +		opt->format = "%(objectname) %(objectsize)";
> +	}

We should omit the parenthesis for single line if statements.

> +
> +	remote = remote_get(argv[0]);
> +	if (!remote)
> +		die(_("must supply valid remote when using remote-object-info"));
> +	oid_array_clear(&object_info_oids);
> +	for (size_t i = 1; i < argc; i++) {
> +		if (get_oid_hex(argv[i], &oid))
> +			die(_("Not a valid object name %s"), argv[i]);
> +		oid_array_append(&object_info_oids, &oid);
> +	}
> +
> +	gtransport = transport_get(remote, NULL);
> +	if (gtransport->smart_options) {
> +		int include_size = 0;
> +
> +		CALLOC_ARRAY(remote_object_info, object_info_oids.nr);
> +		gtransport->smart_options->object_info = 1;
> +		gtransport->smart_options->object_info_oids = &object_info_oids;
> +		/*
> +		 * 'size' is the only option currently supported.
> +		 * Other options that are passed in the format will exit with error.
> +		 */
> +		if (strstr(opt->format, "%(objectsize)")) {
> +			string_list_append(&object_info_options, "size");
> +			include_size = 1;
> +		}
> +		if (strstr(opt->format, "%(objecttype)")) {
> +			die(_("objecttype is currently not supported with remote-object-info"));
> +		}

Another single line if statement above that should omit the parenthesis.

> +		if (strstr(opt->format, "%(objectsize:disk)"))
> +			die(_("objectsize:disk is currently not supported with remote-object-info"));
> +		if (strstr(opt->format, "%(deltabase)"))
> +			die(_("deltabase is currently not supported with remote-object-info"));
> +		if (object_info_options.nr > 0) {
> +			gtransport->smart_options->object_info_options = &object_info_options;
> +			for (size_t i = 0; i < object_info_oids.nr; i++) {
> +				if (include_size)
> +					remote_object_info[i].sizep = xcalloc(1, sizeof(long));
> +			}
> +			gtransport->smart_options->object_info_data = &remote_object_info;
> +			retval = transport_fetch_refs(gtransport, NULL);
> +		}
> +	} else {
> +		retval = -1;
> +	}
> +
> +	return retval;
> +}
> +
>  struct object_cb_data {
>  	struct batch_options *opt;
>  	struct expand_data *expand;
> @@ -642,6 +716,7 @@ typedef void (*parse_cmd_fn_t)(struct batch_options *, const char *,
>  struct queued_cmd {
>  	parse_cmd_fn_t fn;
>  	char *line;
> +	const char *name;

Since special handling is needed for the remote-object-info command, we
record the queued command names to check against later.

>  };
>  
>  static void parse_cmd_contents(struct batch_options *opt,
> @@ -662,6 +737,55 @@ static void parse_cmd_info(struct batch_options *opt,
>  	batch_one_object(line, output, opt, data);
>  }
>  
> +static const struct parse_cmd {
> +	const char *name;
> +	parse_cmd_fn_t fn;
> +	unsigned takes_args;
> +} commands[] = {
> +	{ "contents", parse_cmd_contents, 1 },
> +	{ "info", parse_cmd_info, 1 },
> +	{ "remote-object-info", parse_cmd_info, 1 },
> +	{ "flush", NULL, 0 },
> +};
> +
> +static void parse_remote_info(struct batch_options *opt,
> +			   char *line,
> +			   struct strbuf *output,
> +			   struct expand_data *data,
> +			   const struct parse_cmd *p_cmd,
> +			   struct queued_cmd *q_cmd)

It seems a little confusing to me that `parse_remote_info()` accepts 
both a `parse_cmd` and `queued_cmd`, but only expects to use one or the
other. It looks like this is done because `dispatch_calls()` already 
accepts `queued_cmd`, but now needs to call `parse_remote_info()`. 

Since it is only the underlying command function that is needed by
`parse_remote_info()`

> +{
> +	int count;
> +	const char **argv;
> +
> +	count = split_cmdline(line, &argv);
> +	if (get_remote_info(opt, count, argv))
> +		goto cleanup;
> +	opt->use_remote_info = 1;
> +	data->skip_object_info = 1;
> +	data->mark_query = 0;
> +	for (size_t i = 0; i < object_info_oids.nr; i++) {
> +		if (remote_object_info[i].sizep)
> +			data->size = *remote_object_info[i].sizep;
> +		if (remote_object_info[i].typep)
> +			data->type = *remote_object_info[i].typep;
> +
> +		data->oid = object_info_oids.oid[i];
> +		if (p_cmd)
> +			p_cmd->fn(opt, argv[i+1], output, data);
> +		else
> +			q_cmd->fn(opt, argv[i+1], output, data);
> +	}
> +	opt->use_remote_info = 0;
> +	data->skip_object_info = 0;
> +	data->mark_query = 1;
> +
> +cleanup:
> +	for (size_t i = 0; i < object_info_oids.nr; i++)
> +		free_object_info_contents(&remote_object_info[i]);
> +	free(remote_object_info);
> +}
> +
>  static void dispatch_calls(struct batch_options *opt,
>  		struct strbuf *output,
>  		struct expand_data *data,
> @@ -671,8 +795,12 @@ static void dispatch_calls(struct batch_options *opt,
>  	if (!opt->buffer_output)
>  		die(_("flush is only for --buffer mode"));
>  
> -	for (int i = 0; i < nr; i++)
> -		cmd[i].fn(opt, cmd[i].line, output, data);
> +	for (int i = 0; i < nr; i++) {
> +		if (!strcmp(cmd[i].name, "remote-object-info"))
> +			parse_remote_info(opt, cmd[i].line, output, data, NULL, &cmd[i]);

If we adapt `parse_remote_info()` to accept the command function we
could pass cmd->fn here instead.

> +		else
> +			cmd[i].fn(opt, cmd[i].line, output, data);
> +	}
>  
>  	fflush(stdout);
>  }
> @@ -685,17 +813,6 @@ static void free_cmds(struct queued_cmd *cmd, size_t *nr)
>  	*nr = 0;
>  }
>  
> -
> -static const struct parse_cmd {
> -	const char *name;
> -	parse_cmd_fn_t fn;
> -	unsigned takes_args;
> -} commands[] = {
> -	{ "contents", parse_cmd_contents, 1},
> -	{ "info", parse_cmd_info, 1},
> -	{ "flush", NULL, 0},
> -};
> -
>  static void batch_objects_command(struct batch_options *opt,
>  				    struct strbuf *output,
>  				    struct expand_data *data)
> @@ -740,11 +857,17 @@ static void batch_objects_command(struct batch_options *opt,
>  			dispatch_calls(opt, output, data, queued_cmd, nr);
>  			free_cmds(queued_cmd, &nr);
>  		} else if (!opt->buffer_output) {
> -			cmd->fn(opt, p, output, data);
> +			if (!strcmp(cmd->name, "remote-object-info")) {
> +				char *line = xstrdup_or_null(p);
> +				parse_remote_info(opt, line, output, data, cmd, NULL);

Same here, if we adapt `parse_remote_info()` to accept the command 
function we could pass cmd->fn here instead.

> +			} else {
> +				cmd->fn(opt, p, output, data);
> +			}
>  		} else {
>  			ALLOC_GROW(queued_cmd, nr + 1, alloc);
>  			call.fn = cmd->fn;
>  			call.line = xstrdup_or_null(p);
> +			call.name = cmd->name;
>  			queued_cmd[nr++] = call;
>  		}
>  	}
> @@ -761,8 +884,6 @@ static void batch_objects_command(struct batch_options *opt,
>  	strbuf_release(&input);
>  }
>  
> -#define DEFAULT_FORMAT "%(objectname) %(objecttype) %(objectsize)"
> -
>  static int batch_objects(struct batch_options *opt)
>  {
>  	struct strbuf input = STRBUF_INIT;
[snip]




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux