On Wed, Jan 26, 2022 at 3:37 PM Johannes Schindelin via GitGitGadget <gitgitgadget@xxxxxxxxx> wrote: > > From: Johannes Schindelin <johannes.schindelin@xxxxxx> > > Over the course of Scalar's development, it became obvious that there is > a need for a command that can gather all kinds of useful information > that can help identify the most typical problems with large > worktrees/repositories. > > The `diagnose` command is the culmination of this hard-won knowledge: it > gathers the installed hooks, the config, a couple statistics describing > the data shape, among other pieces of information, and then wraps > everything up in a tidy, neat `.zip` archive. > > Note: originally, Scalar was implemented in C# using the .NET API, where > we had the luxury of a comprehensive standard library that includes > basic functionality such as writing a `.zip` file. In the C version, we > lack such a commodity. Rather than introducing a dependency on, say, > libzip, we slightly abuse Git's `archive` command: Instead of writing > the `.zip` file directly, we stage the file contents in a Git index of a > temporary, bare repository, only to let `git archive` have at it, and > finally removing the temporary repository. > > Also note: Due to the frequently-spawned `git hash-object` processes, > this command is quite a bit slow on Windows. Should it turn out to be a > big problem, the lack of a batch mode of the `hash-object` command could > potentially be worked around via using `git fast-import` with a crafted > `stdin`. hash-object and update-index processes, right? You spawn one of each for each object. I was you investigate the fast-import idea because it gets rid of the N hash-object processes, the N update-index processes, and the write-tree process, instead giving you a single fast-import process as a preliminary to calling out to git archive. It'd also have the advantage of providing just one pack instead of many loose objects. But René's suggestion to use and extend archive's ability to handle untracked files sounds like a better idea. > > Signed-off-by: Johannes Schindelin <johannes.schindelin@xxxxxx> > --- > contrib/scalar/scalar.c | 170 +++++++++++++++++++++++++++++++ > contrib/scalar/scalar.txt | 12 +++ > contrib/scalar/t/t9099-scalar.sh | 13 +++ > 3 files changed, 195 insertions(+) > > diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c > index 1ce9c2b00e8..13f2b0f4d5a 100644 > --- a/contrib/scalar/scalar.c > +++ b/contrib/scalar/scalar.c > @@ -259,6 +259,108 @@ static int unregister_dir(void) > return res; > } > > +static int stage(const char *git_dir, struct strbuf *buf, const char *path) > +{ > + struct strbuf cacheinfo = STRBUF_INIT; > + struct child_process cp = CHILD_PROCESS_INIT; > + int res; > + > + strbuf_addstr(&cacheinfo, "100644,"); > + > + cp.git_cmd = 1; > + strvec_pushl(&cp.args, "--git-dir", git_dir, > + "hash-object", "-w", "--stdin", NULL); > + res = pipe_command(&cp, buf->buf, buf->len, &cacheinfo, 256, NULL, 0); > + if (!res) { > + strbuf_rtrim(&cacheinfo); > + strbuf_addch(&cacheinfo, ','); > + /* We cannot stage `.git`, use `_git` instead. */ > + if (starts_with(path, ".git/")) > + strbuf_addf(&cacheinfo, "_%s", path + 1); > + else > + strbuf_addstr(&cacheinfo, path); > + > + child_process_init(&cp); > + cp.git_cmd = 1; > + strvec_pushl(&cp.args, "--git-dir", git_dir, > + "update-index", "--add", "--cacheinfo", > + cacheinfo.buf, NULL); > + res = run_command(&cp); > + } > + > + strbuf_release(&cacheinfo); > + return res; > +} > + > +static int stage_file(const char *git_dir, const char *path) > +{ > + struct strbuf buf = STRBUF_INIT; > + int res; > + > + if (strbuf_read_file(&buf, path, 0) < 0) > + return error(_("could not read '%s'"), path); > + > + res = stage(git_dir, &buf, path); > + > + strbuf_release(&buf); > + return res; > +} > + > +static int stage_directory(const char *git_dir, const char *path, int recurse) > +{ > + int at_root = !*path; > + DIR *dir = opendir(at_root ? "." : path); > + struct dirent *e; > + struct strbuf buf = STRBUF_INIT; > + size_t len; > + int res = 0; > + > + if (!dir) > + return error(_("could not open directory '%s'"), path); > + > + if (!at_root) > + strbuf_addf(&buf, "%s/", path); > + len = buf.len; > + > + while (!res && (e = readdir(dir))) { > + if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name)) > + continue; > + > + strbuf_setlen(&buf, len); > + strbuf_addstr(&buf, e->d_name); > + > + if ((e->d_type == DT_REG && stage_file(git_dir, buf.buf)) || > + (e->d_type == DT_DIR && recurse && > + stage_directory(git_dir, buf.buf, recurse))) > + res = -1; > + } > + > + closedir(dir); > + strbuf_release(&buf); > + return res; > +} > + > +static int index_to_zip(const char *git_dir) > +{ > + struct child_process cp = CHILD_PROCESS_INIT; > + struct strbuf oid = STRBUF_INIT; > + > + cp.git_cmd = 1; > + strvec_pushl(&cp.args, "--git-dir", git_dir, "write-tree", NULL); > + if (pipe_command(&cp, NULL, 0, &oid, the_hash_algo->hexsz + 1, > + NULL, 0)) > + return error(_("could not write temporary tree object")); > + > + strbuf_rtrim(&oid); > + child_process_init(&cp); > + cp.git_cmd = 1; > + strvec_pushl(&cp.args, "--git-dir", git_dir, "archive", "-o", NULL); > + strvec_pushf(&cp.args, "%s.zip", git_dir); > + strvec_pushl(&cp.args, oid.buf, "--", NULL); > + strbuf_release(&oid); > + return run_command(&cp); > +} > + > /* printf-style interface, expects `<key>=<value>` argument */ > static int set_config(const char *fmt, ...) > { > @@ -499,6 +601,73 @@ cleanup: > return res; > } > > +static int cmd_diagnose(int argc, const char **argv) > +{ > + struct option options[] = { > + OPT_END(), > + }; > + const char * const usage[] = { > + N_("scalar diagnose [<enlistment>]"), > + NULL > + }; > + struct strbuf tmp_dir = STRBUF_INIT; > + time_t now = time(NULL); > + struct tm tm; > + struct strbuf path = STRBUF_INIT, buf = STRBUF_INIT; > + int res = 0; > + > + argc = parse_options(argc, argv, NULL, options, > + usage, 0); > + > + setup_enlistment_directory(argc, argv, usage, options, &buf); > + > + strbuf_addstr(&buf, "/.scalarDiagnostics/scalar_"); > + strbuf_addftime(&buf, "%Y%m%d_%H%M%S", localtime_r(&now, &tm), 0, 0); > + if (run_git("init", "-q", "-b", "dummy", "--bare", buf.buf, NULL)) { > + res = error(_("could not initialize temporary repository: %s"), > + buf.buf); > + goto diagnose_cleanup; > + } > + strbuf_realpath(&tmp_dir, buf.buf, 1); > + > + strbuf_reset(&buf); > + strbuf_addf(&buf, "Collecting diagnostic info into temp folder %s\n\n", > + tmp_dir.buf); > + > + get_version_info(&buf, 1); > + > + strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree); > + fwrite(buf.buf, buf.len, 1, stdout); > + > + if ((res = stage(tmp_dir.buf, &buf, "diagnostics.log"))) > + goto diagnose_cleanup; > + > + if ((res = stage_directory(tmp_dir.buf, ".git", 0)) || > + (res = stage_directory(tmp_dir.buf, ".git/hooks", 0)) || > + (res = stage_directory(tmp_dir.buf, ".git/info", 0)) || > + (res = stage_directory(tmp_dir.buf, ".git/logs", 1)) || > + (res = stage_directory(tmp_dir.buf, ".git/objects/info", 0))) > + goto diagnose_cleanup; > + > + res = index_to_zip(tmp_dir.buf); > + > + if (!res) > + res = remove_dir_recursively(&tmp_dir, 0); > + > + if (!res) > + printf("\n" > + "Diagnostics complete.\n" > + "All of the gathered info is captured in '%s.zip'\n", > + tmp_dir.buf); > + > +diagnose_cleanup: > + strbuf_release(&tmp_dir); > + strbuf_release(&path); > + strbuf_release(&buf); > + > + return res; > +} > + > static int cmd_list(int argc, const char **argv) > { > if (argc != 1) > @@ -800,6 +969,7 @@ static struct { > { "reconfigure", cmd_reconfigure }, > { "delete", cmd_delete }, > { "version", cmd_version }, > + { "diagnose", cmd_diagnose }, > { NULL, NULL}, > }; > > diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt > index f416d637289..22583fe046e 100644 > --- a/contrib/scalar/scalar.txt > +++ b/contrib/scalar/scalar.txt > @@ -14,6 +14,7 @@ scalar register [<enlistment>] > scalar unregister [<enlistment>] > scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>] > scalar reconfigure [ --all | <enlistment> ] > +scalar diagnose [<enlistment>] > scalar delete <enlistment> > > DESCRIPTION > @@ -129,6 +130,17 @@ reconfigure the enlistment. > With the `--all` option, all enlistments currently registered with Scalar > will be reconfigured. Use this option after each Scalar upgrade. > > +Diagnose > +~~~~~~~~ > + > +diagnose [<enlistment>]:: > + When reporting issues with Scalar, it is often helpful to provide the > + information gathered by this command, including logs and certain > + statistics describing the data shape of the current enlistment. > ++ > +The output of this command is a `.zip` file that is written into > +a directory adjacent to the worktree in the `src` directory. > + > Delete > ~~~~~~ > > diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh > index 2e1502ad45e..ecd06e207c2 100755 > --- a/contrib/scalar/t/t9099-scalar.sh > +++ b/contrib/scalar/t/t9099-scalar.sh > @@ -65,6 +65,19 @@ test_expect_success 'scalar clone' ' > ) > ' > > +SQ="'" > +test_expect_success UNZIP 'scalar diagnose' ' > + scalar diagnose cloned >out && > + sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" <out >zip_path && > + zip_path=$(cat zip_path) && > + test -n "$zip_path" && > + unzip -v "$zip_path" && > + folder=${zip_path%.zip} && > + test_path_is_missing "$folder" && > + unzip -p "$zip_path" diagnostics.log >out && > + test_file_not_empty out > +' > + > test_expect_success 'scalar reconfigure' ' > git init one/src && > scalar register one && > -- > gitgitgadget >