Re: [PATCH v2 3/6] Implement `scalar diagnose`

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




Am 06.02.22 um 23:39 schrieb Johannes Schindelin via GitGitGadget:
> From: Johannes Schindelin <johannes.schindelin@xxxxxx>
>
> Over the course of Scalar's development, it became obvious that there is
> a need for a command that can gather all kinds of useful information
> that can help identify the most typical problems with large
> worktrees/repositories.
>
> The `diagnose` command is the culmination of this hard-won knowledge: it
> gathers the installed hooks, the config, a couple statistics describing
> the data shape, among other pieces of information, and then wraps
> everything up in a tidy, neat `.zip` archive.
>
> Note: originally, Scalar was implemented in C# using the .NET API, where
> we had the luxury of a comprehensive standard library that includes
> basic functionality such as writing a `.zip` file. In the C version, we
> lack such a commodity. Rather than introducing a dependency on, say,
> libzip, we slightly abuse Git's `archive` command: Instead of writing
> the `.zip` file directly, we stage the file contents in a Git index of a
> temporary, bare repository, only to let `git archive` have at it, and
> finally removing the temporary repository.
>
> Also note: Due to the frequently-spawned `git hash-object` processes,
> this command is quite a bit slow on Windows. Should it turn out to be a
> big problem, the lack of a batch mode of the `hash-object` command could
> potentially be worked around via using `git fast-import` with a crafted
> `stdin`.

The two paragraphs above are not in sync with the patch.

>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@xxxxxx>
> ---
>  contrib/scalar/scalar.c          | 143 +++++++++++++++++++++++++++++++
>  contrib/scalar/scalar.txt        |  12 +++
>  contrib/scalar/t/t9099-scalar.sh |  14 +++
>  3 files changed, 169 insertions(+)
>
> diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c
> index 00dcd4b50ef..30ce0799c7a 100644
> --- a/contrib/scalar/scalar.c
> +++ b/contrib/scalar/scalar.c
> @@ -11,6 +11,7 @@
>  #include "dir.h"
>  #include "packfile.h"
>  #include "help.h"
> +#include "archive.h"
>
>  /*
>   * Remove the deepest subdirectory in the provided path string. Path must not
> @@ -261,6 +262,44 @@ static int unregister_dir(void)
>  	return res;
>  }
>
> +static int add_directory_to_archiver(struct strvec *archiver_args,
> +					  const char *path, int recurse)
> +{
> +	int at_root = !*path;
> +	DIR *dir = opendir(at_root ? "." : path);
> +	struct dirent *e;
> +	struct strbuf buf = STRBUF_INIT;
> +	size_t len;
> +	int res = 0;
> +
> +	if (!dir)
> +		return error(_("could not open directory '%s'"), path);
> +
> +	if (!at_root)
> +		strbuf_addf(&buf, "%s/", path);
> +	len = buf.len;
> +	strvec_pushf(archiver_args, "--prefix=%s", buf.buf);
> +
> +	while (!res && (e = readdir(dir))) {
> +		if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name))
> +			continue;
> +
> +		strbuf_setlen(&buf, len);
> +		strbuf_addstr(&buf, e->d_name);
> +
> +		if (e->d_type == DT_REG)
> +			strvec_pushf(archiver_args, "--add-file=%s", buf.buf);
> +		else if (e->d_type != DT_DIR)
> +			res = -1;
> +		else if (recurse)
> +		     add_directory_to_archiver(archiver_args, buf.buf, recurse);
> +	}
> +
> +	closedir(dir);
> +	strbuf_release(&buf);
> +	return res;
> +}
> +
>  /* printf-style interface, expects `<key>=<value>` argument */
>  static int set_config(const char *fmt, ...)
>  {
> @@ -501,6 +540,109 @@ cleanup:
>  	return res;
>  }
>
> +static int cmd_diagnose(int argc, const char **argv)
> +{
> +	struct option options[] = {
> +		OPT_END(),
> +	};
> +	const char * const usage[] = {
> +		N_("scalar diagnose [<enlistment>]"),
> +		NULL
> +	};
> +	struct strbuf zip_path = STRBUF_INIT;
> +	struct strvec archiver_args = STRVEC_INIT;
> +	char **argv_copy = NULL;
> +	int stdout_fd = -1, archiver_fd = -1;
> +	time_t now = time(NULL);
> +	struct tm tm;
> +	struct strbuf path = STRBUF_INIT, buf = STRBUF_INIT;
> +	size_t off;
> +	int res = 0;
> +
> +	argc = parse_options(argc, argv, NULL, options,
> +			     usage, 0);
> +
> +	setup_enlistment_directory(argc, argv, usage, options, &zip_path);
> +
> +	strbuf_addstr(&zip_path, "/.scalarDiagnostics/scalar_");
> +	strbuf_addftime(&zip_path,
> +			"%Y%m%d_%H%M%S", localtime_r(&now, &tm), 0, 0);
> +	strbuf_addstr(&zip_path, ".zip");
> +	switch (safe_create_leading_directories(zip_path.buf)) {
> +	case SCLD_EXISTS:
> +	case SCLD_OK:
> +		break;
> +	default:
> +		error_errno(_("could not create directory for '%s'"),
> +			    zip_path.buf);
> +		goto diagnose_cleanup;
> +	}
> +	stdout_fd = dup(1);
> +	if (stdout_fd < 0) {
> +		res = error_errno(_("could not duplicate stdout"));
> +		goto diagnose_cleanup;
> +	}
> +
> +	archiver_fd = xopen(zip_path.buf, O_CREAT | O_WRONLY | O_TRUNC, 0666);
> +	if (archiver_fd < 0 || dup2(archiver_fd, 1) < 0) {
> +		res = error_errno(_("could not redirect output"));
> +		goto diagnose_cleanup;
> +	}
> +
> +	init_zip_archiver();
> +	strvec_pushl(&archiver_args, "scalar-diagnose", "--format=zip", NULL);
> +
> +	strbuf_reset(&buf);
> +	strbuf_addstr(&buf,
> +		      "--add-file-with-content=diagnostics.log:"
> +		      "Collecting diagnostic info\n\n");
> +	get_version_info(&buf, 1);
> +
> +	strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree);
> +	off = strchr(buf.buf, ':') + 1 - buf.buf;
> +	write_or_die(stdout_fd, buf.buf + off, buf.len - off);
> +	strvec_push(&archiver_args, buf.buf);

Fun trick to reuse the buffer for both the ZIP entry and stdout. :)  I'd
have omitted the option from buf and added it like this, for simplicity:

	strvec_pushf(&archiver_args,
		     "--add-file-with-content=diagnostics.log:%s", buf.buf);

Just a thought.

> +
> +	if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) ||
> +	    (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) ||
> +	    (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) ||
> +	    (res = add_directory_to_archiver(&archiver_args, ".git/logs", 1)) ||
> +	    (res = add_directory_to_archiver(&archiver_args, ".git/objects/info", 0)))
> +		goto diagnose_cleanup;
> +
> +	strvec_pushl(&archiver_args, "--prefix=",
> +		     oid_to_hex(the_hash_algo->empty_tree), "--", NULL);
> +
> +	/* `write_archive()` modifies the `argv` passed to it. Let it. */
> +	argv_copy = xmemdupz(archiver_args.v,
> +			     sizeof(char *) * archiver_args.nr);

Leaking the whole thing would be fine as well for this command, but
cleaning up is tidier, of course.

> +	res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL,
> +			    the_repository, NULL, 0);

Ah -- no shell means no command line length limits. :)

> +	if (res) {
> +		error(_("failed to write archive"));
> +		goto diagnose_cleanup;
> +	}
> +
> +	if (!res)
> +		printf("\n"
> +		       "Diagnostics complete.\n"
> +		       "All of the gathered info is captured in '%s'\n",
> +		       zip_path.buf);

Is this message appended to the ZIP file or does it go to stdout?

In any case: mixing write(2) and stdio(3) is not a good idea.  Using
fwrite(3) instead of write_or_die above and doing the stdout dup(2)
dance only tightly around the write_archive call would help, I think.

> +
> +diagnose_cleanup:
> +	if (archiver_fd >= 0) {
> +		close(1);
> +		dup2(stdout_fd, 1);
> +	}
> +	free(argv_copy);
> +	strvec_clear(&archiver_args);
> +	strbuf_release(&zip_path);
> +	strbuf_release(&path);
> +	strbuf_release(&buf);
> +
> +	return res;
> +}
> +
>  static int cmd_list(int argc, const char **argv)
>  {
>  	if (argc != 1)
> @@ -802,6 +944,7 @@ static struct {
>  	{ "reconfigure", cmd_reconfigure },
>  	{ "delete", cmd_delete },
>  	{ "version", cmd_version },
> +	{ "diagnose", cmd_diagnose },
>  	{ NULL, NULL},
>  };
>
> diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt
> index f416d637289..22583fe046e 100644
> --- a/contrib/scalar/scalar.txt
> +++ b/contrib/scalar/scalar.txt
> @@ -14,6 +14,7 @@ scalar register [<enlistment>]
>  scalar unregister [<enlistment>]
>  scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>]
>  scalar reconfigure [ --all | <enlistment> ]
> +scalar diagnose [<enlistment>]
>  scalar delete <enlistment>
>
>  DESCRIPTION
> @@ -129,6 +130,17 @@ reconfigure the enlistment.
>  With the `--all` option, all enlistments currently registered with Scalar
>  will be reconfigured. Use this option after each Scalar upgrade.
>
> +Diagnose
> +~~~~~~~~
> +
> +diagnose [<enlistment>]::
> +    When reporting issues with Scalar, it is often helpful to provide the
> +    information gathered by this command, including logs and certain
> +    statistics describing the data shape of the current enlistment.
> ++
> +The output of this command is a `.zip` file that is written into
> +a directory adjacent to the worktree in the `src` directory.
> +
>  Delete
>  ~~~~~~
>
> diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh
> index 9d83fdf25e8..bbd07a44426 100755
> --- a/contrib/scalar/t/t9099-scalar.sh
> +++ b/contrib/scalar/t/t9099-scalar.sh
> @@ -90,4 +90,18 @@ test_expect_success '`scalar [...] <dir>` errors out when dir is missing' '
>  	grep "cloned. does not exist" err
>  '
>
> +SQ="'"
> +test_expect_success UNZIP 'scalar diagnose' '
> +	scalar clone "file://$(pwd)" cloned --single-branch &&
> +	scalar diagnose cloned >out &&
> +	sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" <out >zip_path &&
> +	zip_path=$(cat zip_path) &&
> +	test -n "$zip_path" &&
> +	unzip -v "$zip_path" &&
> +	folder=${zip_path%.zip} &&
> +	test_path_is_missing "$folder" &&
> +	unzip -p "$zip_path" diagnostics.log >out &&
> +	test_file_not_empty out
> +'
> +
>  test_done




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux