Am 06.02.22 um 23:39 schrieb Johannes Schindelin via GitGitGadget: > From: Johannes Schindelin <johannes.schindelin@xxxxxx> > > Over the course of Scalar's development, it became obvious that there is > a need for a command that can gather all kinds of useful information > that can help identify the most typical problems with large > worktrees/repositories. > > The `diagnose` command is the culmination of this hard-won knowledge: it > gathers the installed hooks, the config, a couple statistics describing > the data shape, among other pieces of information, and then wraps > everything up in a tidy, neat `.zip` archive. > > Note: originally, Scalar was implemented in C# using the .NET API, where > we had the luxury of a comprehensive standard library that includes > basic functionality such as writing a `.zip` file. In the C version, we > lack such a commodity. Rather than introducing a dependency on, say, > libzip, we slightly abuse Git's `archive` command: Instead of writing > the `.zip` file directly, we stage the file contents in a Git index of a > temporary, bare repository, only to let `git archive` have at it, and > finally removing the temporary repository. > > Also note: Due to the frequently-spawned `git hash-object` processes, > this command is quite a bit slow on Windows. Should it turn out to be a > big problem, the lack of a batch mode of the `hash-object` command could > potentially be worked around via using `git fast-import` with a crafted > `stdin`. The two paragraphs above are not in sync with the patch. > > Signed-off-by: Johannes Schindelin <johannes.schindelin@xxxxxx> > --- > contrib/scalar/scalar.c | 143 +++++++++++++++++++++++++++++++ > contrib/scalar/scalar.txt | 12 +++ > contrib/scalar/t/t9099-scalar.sh | 14 +++ > 3 files changed, 169 insertions(+) > > diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c > index 00dcd4b50ef..30ce0799c7a 100644 > --- a/contrib/scalar/scalar.c > +++ b/contrib/scalar/scalar.c > @@ -11,6 +11,7 @@ > #include "dir.h" > #include "packfile.h" > #include "help.h" > +#include "archive.h" > > /* > * Remove the deepest subdirectory in the provided path string. Path must not > @@ -261,6 +262,44 @@ static int unregister_dir(void) > return res; > } > > +static int add_directory_to_archiver(struct strvec *archiver_args, > + const char *path, int recurse) > +{ > + int at_root = !*path; > + DIR *dir = opendir(at_root ? "." : path); > + struct dirent *e; > + struct strbuf buf = STRBUF_INIT; > + size_t len; > + int res = 0; > + > + if (!dir) > + return error(_("could not open directory '%s'"), path); > + > + if (!at_root) > + strbuf_addf(&buf, "%s/", path); > + len = buf.len; > + strvec_pushf(archiver_args, "--prefix=%s", buf.buf); > + > + while (!res && (e = readdir(dir))) { > + if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name)) > + continue; > + > + strbuf_setlen(&buf, len); > + strbuf_addstr(&buf, e->d_name); > + > + if (e->d_type == DT_REG) > + strvec_pushf(archiver_args, "--add-file=%s", buf.buf); > + else if (e->d_type != DT_DIR) > + res = -1; > + else if (recurse) > + add_directory_to_archiver(archiver_args, buf.buf, recurse); > + } > + > + closedir(dir); > + strbuf_release(&buf); > + return res; > +} > + > /* printf-style interface, expects `<key>=<value>` argument */ > static int set_config(const char *fmt, ...) > { > @@ -501,6 +540,109 @@ cleanup: > return res; > } > > +static int cmd_diagnose(int argc, const char **argv) > +{ > + struct option options[] = { > + OPT_END(), > + }; > + const char * const usage[] = { > + N_("scalar diagnose [<enlistment>]"), > + NULL > + }; > + struct strbuf zip_path = STRBUF_INIT; > + struct strvec archiver_args = STRVEC_INIT; > + char **argv_copy = NULL; > + int stdout_fd = -1, archiver_fd = -1; > + time_t now = time(NULL); > + struct tm tm; > + struct strbuf path = STRBUF_INIT, buf = STRBUF_INIT; > + size_t off; > + int res = 0; > + > + argc = parse_options(argc, argv, NULL, options, > + usage, 0); > + > + setup_enlistment_directory(argc, argv, usage, options, &zip_path); > + > + strbuf_addstr(&zip_path, "/.scalarDiagnostics/scalar_"); > + strbuf_addftime(&zip_path, > + "%Y%m%d_%H%M%S", localtime_r(&now, &tm), 0, 0); > + strbuf_addstr(&zip_path, ".zip"); > + switch (safe_create_leading_directories(zip_path.buf)) { > + case SCLD_EXISTS: > + case SCLD_OK: > + break; > + default: > + error_errno(_("could not create directory for '%s'"), > + zip_path.buf); > + goto diagnose_cleanup; > + } > + stdout_fd = dup(1); > + if (stdout_fd < 0) { > + res = error_errno(_("could not duplicate stdout")); > + goto diagnose_cleanup; > + } > + > + archiver_fd = xopen(zip_path.buf, O_CREAT | O_WRONLY | O_TRUNC, 0666); > + if (archiver_fd < 0 || dup2(archiver_fd, 1) < 0) { > + res = error_errno(_("could not redirect output")); > + goto diagnose_cleanup; > + } > + > + init_zip_archiver(); > + strvec_pushl(&archiver_args, "scalar-diagnose", "--format=zip", NULL); > + > + strbuf_reset(&buf); > + strbuf_addstr(&buf, > + "--add-file-with-content=diagnostics.log:" > + "Collecting diagnostic info\n\n"); > + get_version_info(&buf, 1); > + > + strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree); > + off = strchr(buf.buf, ':') + 1 - buf.buf; > + write_or_die(stdout_fd, buf.buf + off, buf.len - off); > + strvec_push(&archiver_args, buf.buf); Fun trick to reuse the buffer for both the ZIP entry and stdout. :) I'd have omitted the option from buf and added it like this, for simplicity: strvec_pushf(&archiver_args, "--add-file-with-content=diagnostics.log:%s", buf.buf); Just a thought. > + > + if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) || > + (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) || > + (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) || > + (res = add_directory_to_archiver(&archiver_args, ".git/logs", 1)) || > + (res = add_directory_to_archiver(&archiver_args, ".git/objects/info", 0))) > + goto diagnose_cleanup; > + > + strvec_pushl(&archiver_args, "--prefix=", > + oid_to_hex(the_hash_algo->empty_tree), "--", NULL); > + > + /* `write_archive()` modifies the `argv` passed to it. Let it. */ > + argv_copy = xmemdupz(archiver_args.v, > + sizeof(char *) * archiver_args.nr); Leaking the whole thing would be fine as well for this command, but cleaning up is tidier, of course. > + res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL, > + the_repository, NULL, 0); Ah -- no shell means no command line length limits. :) > + if (res) { > + error(_("failed to write archive")); > + goto diagnose_cleanup; > + } > + > + if (!res) > + printf("\n" > + "Diagnostics complete.\n" > + "All of the gathered info is captured in '%s'\n", > + zip_path.buf); Is this message appended to the ZIP file or does it go to stdout? In any case: mixing write(2) and stdio(3) is not a good idea. Using fwrite(3) instead of write_or_die above and doing the stdout dup(2) dance only tightly around the write_archive call would help, I think. > + > +diagnose_cleanup: > + if (archiver_fd >= 0) { > + close(1); > + dup2(stdout_fd, 1); > + } > + free(argv_copy); > + strvec_clear(&archiver_args); > + strbuf_release(&zip_path); > + strbuf_release(&path); > + strbuf_release(&buf); > + > + return res; > +} > + > static int cmd_list(int argc, const char **argv) > { > if (argc != 1) > @@ -802,6 +944,7 @@ static struct { > { "reconfigure", cmd_reconfigure }, > { "delete", cmd_delete }, > { "version", cmd_version }, > + { "diagnose", cmd_diagnose }, > { NULL, NULL}, > }; > > diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt > index f416d637289..22583fe046e 100644 > --- a/contrib/scalar/scalar.txt > +++ b/contrib/scalar/scalar.txt > @@ -14,6 +14,7 @@ scalar register [<enlistment>] > scalar unregister [<enlistment>] > scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>] > scalar reconfigure [ --all | <enlistment> ] > +scalar diagnose [<enlistment>] > scalar delete <enlistment> > > DESCRIPTION > @@ -129,6 +130,17 @@ reconfigure the enlistment. > With the `--all` option, all enlistments currently registered with Scalar > will be reconfigured. Use this option after each Scalar upgrade. > > +Diagnose > +~~~~~~~~ > + > +diagnose [<enlistment>]:: > + When reporting issues with Scalar, it is often helpful to provide the > + information gathered by this command, including logs and certain > + statistics describing the data shape of the current enlistment. > ++ > +The output of this command is a `.zip` file that is written into > +a directory adjacent to the worktree in the `src` directory. > + > Delete > ~~~~~~ > > diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh > index 9d83fdf25e8..bbd07a44426 100755 > --- a/contrib/scalar/t/t9099-scalar.sh > +++ b/contrib/scalar/t/t9099-scalar.sh > @@ -90,4 +90,18 @@ test_expect_success '`scalar [...] <dir>` errors out when dir is missing' ' > grep "cloned. does not exist" err > ' > > +SQ="'" > +test_expect_success UNZIP 'scalar diagnose' ' > + scalar clone "file://$(pwd)" cloned --single-branch && > + scalar diagnose cloned >out && > + sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" <out >zip_path && > + zip_path=$(cat zip_path) && > + test -n "$zip_path" && > + unzip -v "$zip_path" && > + folder=${zip_path%.zip} && > + test_path_is_missing "$folder" && > + unzip -p "$zip_path" diagnostics.log >out && > + test_file_not_empty out > +' > + > test_done