Re: [GSoC] [PATCH v2 1/2] submodule--helper: introduce add-clone subcommand

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2021-06-08 15:26:54+0530, Atharva Raykar <raykar.ath@xxxxxxxxx> wrote:
> Let's add a new "add-clone" subcommand to `git submodule--helper` with
> the goal of converting part of the shell code in git-submodule.sh
> related to `git submodule add` into C code. This new subcommand clones
> the repository that is to be added, and checks out to the appropriate
> branch.
> 
> This is meant to be a faithful conversion that leaves the behaviour of
> 'submodule add' unchanged. The only minor change is that if a submodule name has
> been supplied with a name that clashes with a local submodule, the message shown
> to the user ("A git directory for 'foo' is found locally...") is prepended with
> "error" for clarity.
> 
> This is part of a series of changes that will result in all of 'submodule add'
> being converted to C.
> 
> Signed-off-by: Atharva Raykar <raykar.ath@xxxxxxxxx>
> Mentored-by: Christian Couder <christian.couder@xxxxxxxxx>
> Mentored-by: Shourya Shukla <shouryashukla.oo@xxxxxxxxx>
> Based-on-patch-by: Shourya Shukla <shouryashukla.oo@xxxxxxxxx>
> Based-on-patch-by: Prathamesh Chavan <pc44800@xxxxxxxxx>
> ---
>  builtin/submodule--helper.c | 199 ++++++++++++++++++++++++++++++++++++
>  git-submodule.sh            |  38 +------
>  2 files changed, 200 insertions(+), 37 deletions(-)
> 
> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index d55f6262e9..c9cb535312 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -2745,6 +2745,204 @@ static int module_set_branch(int argc, const char **argv, const char *prefix)
>  	return !!ret;
>  }
>  
> +struct add_data {
> +	const char *prefix;
> +	const char *branch;
> +	const char *reference_path;
> +	const char *sm_path;
> +	const char *sm_name;
> +	const char *repo;
> +	const char *realrepo;
> +	int depth;
> +	unsigned int force: 1;
> +	unsigned int quiet: 1;
> +	unsigned int progress: 1;
> +	unsigned int dissociate: 1;
> +};
> +#define ADD_DATA_INIT { .depth = -1 }
> +
> +static char *parse_token(char **begin, const char *end, int *tok_len)
> +{
> +	char *tok_start, *pos = *begin;
> +	while (pos != end && (*pos != ' ' && *pos != '\t' && *pos != '\n'))
> +		pos++;
> +	tok_start = *begin;
> +	*tok_len = pos - *begin;
> +	*begin = pos + 1;
> +	return tok_start;
> +}
> +
> +static char *get_next_line(char *const begin, const char *const end)
> +{
> +	char *pos = begin;
> +	while (pos != end && *pos++ != '\n');
> +	return pos;
> +}

On my first glance, this function looks like a reinvention of strchr(3).
Except that, this function also has a second parameter for "end".
Maybe it has a specical use-case?

> +
> +static void show_fetch_remotes(FILE *output, const char *sm_name, const char *git_dir_path)
> +{
> +	struct child_process cp_remote = CHILD_PROCESS_INIT;
> +	struct strbuf sb_remote_out = STRBUF_INIT;
> +
> +	cp_remote.git_cmd = 1;
> +	strvec_pushf(&cp_remote.env_array,
> +		     "GIT_DIR=%s", git_dir_path);
> +	strvec_push(&cp_remote.env_array, "GIT_WORK_TREE=.");
> +	strvec_pushl(&cp_remote.args, "remote", "-v", NULL);
> +	if (!capture_command(&cp_remote, &sb_remote_out, 0)) {
> +		char *line;
> +		char *begin = sb_remote_out.buf;
> +		char *end = sb_remote_out.buf + sb_remote_out.len;
> +		while (begin != end && (line = get_next_line(begin, end))) {

And this is the only use-case.  Because you also want to check if you
reached the last token or not.  I guess you really meant to write:

	while ((line = strchr(begin, '\n')) != NULL) {

Anyway, I would name the "line" variable as "nextline"

> +			int namelen = 0, urllen = 0, taillen = 0;
> +			char *name = parse_token(&begin, line, &namelen);
> +			char *url = parse_token(&begin, line, &urllen);
> +			char *tail = parse_token(&begin, line, &taillen);
> +			if (!memcmp(tail, "(fetch)", 7))
> +				fprintf(output, "  %.*s\t%.*s\n",
> +					namelen, name, urllen, url);

I think this whole block is better replaced with strip_suffix_mem and
fprintf.

Overral I would replace the block inside capture_command with:

-----8<-----
	char *nextline;
	char *line = sb_remote_out.buf;
	while ((nextline = strchr(line, '\n')) != NULL) {
		size_t len = nextline - line;
		if (strip_suffix_mem(line, &len, "(fetch)"))
			fprintf(output, "  %.*s\n", (int)len, line);
		line = nextline + 1;
	}
---->8-----

And get rid of parse_token and get_next_line functions.



> +		}
> +	}
> +
> +	strbuf_release(&sb_remote_out);
> +}
> +
> +static int add_submodule(const struct add_data *add_data)
> +{
> +	char *submod_gitdir_path;
> +	/* perhaps the path already exists and is already a git repo, else clone it */
> +	if (is_directory(add_data->sm_path)) {
> +		submod_gitdir_path = xstrfmt("%s/.git", add_data->sm_path);
> +		if (is_directory(submod_gitdir_path) || file_exists(submod_gitdir_path))
> +			printf(_("Adding existing path at '%s' to index\n"),
> +			       add_data->sm_path);
> +		else
> +			die(_("'%s' already exists and is not a valid git repo"),
> +			    add_data->sm_path);
> +		free(submod_gitdir_path);
> +	} else {
> +		struct strvec clone_args = STRVEC_INIT;
> +		struct child_process cp = CHILD_PROCESS_INIT;
> +		submod_gitdir_path = xstrfmt(".git/modules/%s", add_data->sm_name);
> +
> +		if (is_directory(submod_gitdir_path)) {
> +			if (!add_data->force) {
> +				error(_("A git directory for '%s' is found "
> +					"locally with remote(s):"), add_data->sm_name);

We don't capitalise first character of error message.
IOW, downcase "A git".

Well, it's bug-for-bug with shell implementation, so it doesn't matter much, anyway.

> +				show_fetch_remotes(stderr, add_data->sm_name,
> +						   submod_gitdir_path);
> +				fprintf(stderr,
> +					_("If you want to reuse this local git "
> +					  "directory instead of cloning again from\n"
> +					  "  %s\n"
> +					  "use the '--force' option. If the local git "
> +					  "directory is not the correct repo\n"
> +					  "or if you are unsure what this means, choose "
> +					  "another name with the '--name' option.\n"),
> +					add_data->realrepo);

Is there any reason we can't use "error" here?

> +				free(submod_gitdir_path);
> +				return 1;
> +			} else {
> +				printf(_("Reactivating local git directory for "
> +					 "submodule '%s'\n"), add_data->sm_name);
> +			}
> +		}
> +		free(submod_gitdir_path);
> +
> +		strvec_pushl(&clone_args, "clone", "--path", add_data->sm_path, "--name",
> +			     add_data->sm_name, "--url", add_data->realrepo, NULL);
> +		if (add_data->quiet)
> +			strvec_push(&clone_args, "--quiet");
> +		if (add_data->progress)
> +			strvec_push(&clone_args, "--progress");
> +		if (add_data->prefix)
> +			strvec_pushl(&clone_args, "--prefix", add_data->prefix, NULL);
> +		if (add_data->reference_path)
> +			strvec_pushl(&clone_args, "--reference",
> +				     add_data->reference_path, NULL);
> +		if (add_data->dissociate)
> +			strvec_push(&clone_args, "--dissociate");
> +		if (add_data->depth >= 0)
> +			strvec_pushf(&clone_args, "--depth=%d", add_data->depth);
> +
> +		if (module_clone(clone_args.nr, clone_args.v, add_data->prefix)) {
> +			strvec_clear(&clone_args);
> +			return -1;
> +		}
> +		strvec_clear(&clone_args);
> +
> +		prepare_submodule_repo_env(&cp.env_array);
> +		cp.git_cmd = 1;
> +		cp.dir = add_data->sm_path;
> +		strvec_pushl(&cp.args, "checkout", "-f", "-q", NULL);
> +
> +		if (add_data->branch) {
> +			strvec_pushl(&cp.args, "-B", add_data->branch, NULL);
> +			strvec_pushf(&cp.args, "origin/%s", add_data->branch);
> +		}
> +
> +		if (run_command(&cp))
> +			die(_("unable to checkout submodule '%s'"), add_data->sm_path);
> +	}
> +	return 0;
> +}
> +
> +static int add_clone(int argc, const char **argv, const char *prefix)
> +{
> +	int force = 0, quiet = 0, dissociate = 0, progress = 0;
> +	struct add_data add_data = ADD_DATA_INIT;
> +
> +	struct option options[] = {
> +		OPT_STRING('b', "branch", &add_data.branch,
> +			   N_("branch"),
> +			   N_("branch of repository to checkout on cloning")),
> +		OPT_STRING(0, "prefix", &add_data.prefix,
> +			   N_("path"),
> +			   N_("alternative anchor for relative paths")),
> +		OPT_STRING(0, "path", &add_data.sm_path,
> +			   N_("path"),
> +			   N_("where the new submodule will be cloned to")),
> +		OPT_STRING(0, "name", &add_data.sm_name,
> +			   N_("string"),
> +			   N_("name of the new submodule")),
> +		OPT_STRING(0, "url", &add_data.realrepo,
> +			   N_("string"),
> +			   N_("url where to clone the submodule from")),
> +		OPT_STRING(0, "reference", &add_data.reference_path,
> +			   N_("repo"),
> +			   N_("reference repository")),
> +		OPT_BOOL(0, "dissociate", &dissociate,
> +			 N_("use --reference only while cloning")),
> +		OPT_INTEGER(0, "depth", &add_data.depth,
> +			    N_("depth for shallow clones")),
> +		OPT_BOOL(0, "progress", &progress,
> +			 N_("force cloning progress")),
> +		OPT_BOOL('f', "force", &force,
> +			 N_("allow adding an otherwise ignored submodule path")),

We have OPT__FORCE, too.

> +		OPT__QUIET(&quiet, "Suppress output for cloning a submodule"),

And, please downcase "Suppress".

> +		OPT_END()
> +	};
> +
> +	const char *const usage[] = {
> +		N_("git submodule--helper add-clone [--prefix=<path>] [--quiet] [--force] "
> +		   "[--reference <repository>] [--depth <depth>] [-b|--branch <branch>]"
> +		   "[--progress] [--dissociate] --url <url> --path <path> --name <name>"),

I think it's too crowded here, I guess it's better to write:

	N_("git submodule--helper add-clone [<options>...] --url <url> --path <path> --name <name>"),

> +		NULL
> +	};
> +
> +	argc = parse_options(argc, argv, prefix, options, usage, 0);

>From above usage, I think url, path, name is required, should we have a check for them, here?
> +
> +	add_data.progress = !!progress;
> +	add_data.dissociate = !!dissociate;
> +	add_data.force = !!force;
> +	add_data.quiet = !!quiet;
> +
> +	if (add_submodule(&add_data))
> +		return 1;
> +
> +	return 0;
> +}
> +
>  #define SUPPORT_SUPER_PREFIX (1<<0)
>  
>  struct cmd_struct {
> @@ -2757,6 +2955,7 @@ static struct cmd_struct commands[] = {
>  	{"list", module_list, 0},
>  	{"name", module_name, 0},
>  	{"clone", module_clone, 0},
> +	{"add-clone", add_clone, 0},
>  	{"update-module-mode", module_update_module_mode, 0},
>  	{"update-clone", update_clone, 0},
>  	{"ensure-core-worktree", ensure_core_worktree, 0},
> diff --git a/git-submodule.sh b/git-submodule.sh
> index 4678378424..f71e1e5495 100755
> --- a/git-submodule.sh
> +++ b/git-submodule.sh
> @@ -241,43 +241,7 @@ cmd_add()
>  		die "$(eval_gettext "'$sm_name' is not a valid submodule name")"
>  	fi
>  
> -	# perhaps the path exists and is already a git repo, else clone it
> -	if test -e "$sm_path"
> -	then
> -		if test -d "$sm_path"/.git || test -f "$sm_path"/.git
> -		then
> -			eval_gettextln "Adding existing repo at '\$sm_path' to the index"
> -		else
> -			die "$(eval_gettext "'\$sm_path' already exists and is not a valid git repo")"
> -		fi
> -
> -	else
> -		if test -d ".git/modules/$sm_name"
> -		then
> -			if test -z "$force"
> -			then
> -				eval_gettextln >&2 "A git directory for '\$sm_name' is found locally with remote(s):"
> -				GIT_DIR=".git/modules/$sm_name" GIT_WORK_TREE=. git remote -v | grep '(fetch)' | sed -e s,^,"  ", -e s,' (fetch)',, >&2
> -				die "$(eval_gettextln "\
> -If you want to reuse this local git directory instead of cloning again from
> -  \$realrepo
> -use the '--force' option. If the local git directory is not the correct repo
> -or you are unsure what this means choose another name with the '--name' option.")"
> -			else
> -				eval_gettextln "Reactivating local git directory for submodule '\$sm_name'."
> -			fi
> -		fi
> -		git submodule--helper clone ${GIT_QUIET:+--quiet} ${progress:+"--progress"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
> -		(
> -			sanitize_submodule_env
> -			cd "$sm_path" &&
> -			# ash fails to wordsplit ${branch:+-b "$branch"...}
> -			case "$branch" in
> -			'') git checkout -f -q ;;
> -			?*) git checkout -f -q -B "$branch" "origin/$branch" ;;
> -			esac
> -		) || die "$(eval_gettext "Unable to checkout submodule '\$sm_path'")"
> -	fi
> +	git submodule--helper add-clone ${GIT_QUIET:+--quiet} ${force:+"--force"} ${progress:+"--progress"} ${branch:+--branch "$branch"} --prefix "$wt_prefix" --path "$sm_path" --name "$sm_name" --url "$realrepo" ${reference:+"$reference"} ${dissociate:+"--dissociate"} ${depth:+"$depth"} || exit
>  	git config submodule."$sm_name".url "$realrepo"
>  
>  	git add --no-warn-embedded-repo $force "$sm_path" ||
> -- 
> 2.31.1
> 

-- 
Danh



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux