[PATCH v1 03/25] structured-logging: add structured logging framework

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx>

Teach git to optionally generate structured logging data in JSON using
the json-writer API.  "cmd_start" and "cmd_end" events are generated.

Structured logging is only available when git is built with
STRUCTURED_LOGGING=1.

Structured logging is only enabled when the config setting "slog.path"
is set to an absolute pathname.

Signed-off-by: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx>
---
 Documentation/config.txt |   8 ++
 compat/mingw.h           |   7 +
 config.c                 |   3 +
 git-compat-util.h        |   9 ++
 git.c                    |   8 +-
 structured-logging.c     | 366 +++++++++++++++++++++++++++++++++++++++++++++++
 structured-logging.h     |  82 +++++++++++
 usage.c                  |   4 +
 8 files changed, 486 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ab641bf..c79f2bf 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3168,6 +3168,14 @@ showbranch.default::
 	The default set of branches for linkgit:git-show-branch[1].
 	See linkgit:git-show-branch[1].
 
+slog.path::
+	(EXPERIMENTAL) Enable structured logging to a file.  This must be
+	an absolute path.  (Git must be compiled with STRUCTURED_LOGGING=1.)
+
+slog.pretty::
+	(EXPERIMENTAL) Pretty-print structured log data when true.
+	(Git must be compiled with STRUCTURED_LOGGING=1.)
+
 splitIndex.maxPercentChange::
 	When the split index feature is used, this specifies the
 	percent of entries the split index can contain compared to the
diff --git a/compat/mingw.h b/compat/mingw.h
index 571019d..d8d8cd3 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -144,8 +144,15 @@ static inline int fcntl(int fd, int cmd, ...)
 	errno = EINVAL;
 	return -1;
 }
+
 /* bash cannot reliably detect negative return codes as failure */
+#if defined(STRUCTURED_LOGGING)
+#include "structured-logging.h"
+#define exit(code) exit(strlog_exit_code((code) & 0xff))
+#else
 #define exit(code) exit((code) & 0xff)
+#endif
+
 #define sigemptyset(x) (void)0
 static inline int sigaddset(sigset_t *set, int signum)
 { return 0; }
diff --git a/config.c b/config.c
index fbbf0f8..b27b024 100644
--- a/config.c
+++ b/config.c
@@ -1476,6 +1476,9 @@ int git_default_config(const char *var, const char *value, void *dummy)
 		return 0;
 	}
 
+	if (starts_with(var, "slog."))
+		return slog_default_config(var, value);
+
 	/* Add other config variables here and to Documentation/config.txt. */
 	return 0;
 }
diff --git a/git-compat-util.h b/git-compat-util.h
index 9a64998..f5352fd 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1239,4 +1239,13 @@ extern void unleak_memory(const void *ptr, size_t len);
 #define UNLEAK(var) do {} while (0)
 #endif
 
+#include "structured-logging.h"
+#if defined(STRUCTURED_LOGGING) && !defined(exit)
+/*
+ * Intercept all calls to exit() so that exit-code can be included
+ * in the "cmd_exit" message written by the at-exit routine.
+ */
+#define exit(code) exit(slog_exit_code(code))
+#endif
+
 #endif
diff --git a/git.c b/git.c
index c2f48d5..024a40d 100644
--- a/git.c
+++ b/git.c
@@ -413,6 +413,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 		setup_work_tree();
 
 	trace_argv_printf(argv, "trace: built-in: git");
+	slog_set_command_name(p->cmd);
 
 	status = p->fn(argc, argv, prefix);
 	if (status)
@@ -700,7 +701,7 @@ static int run_argv(int *argcp, const char ***argv)
 	return done_alias;
 }
 
-int cmd_main(int argc, const char **argv)
+static int real_cmd_main(int argc, const char **argv)
 {
 	const char *cmd;
 	int done_help = 0;
@@ -779,3 +780,8 @@ int cmd_main(int argc, const char **argv)
 
 	return 1;
 }
+
+int cmd_main(int argc, const char **argv)
+{
+	return slog_wrap_main(real_cmd_main, argc, argv);
+}
diff --git a/structured-logging.c b/structured-logging.c
index 702fd84..afa2224 100644
--- a/structured-logging.c
+++ b/structured-logging.c
@@ -1,3 +1,10 @@
+#include "cache.h"
+#include "config.h"
+#include "version.h"
+#include "json-writer.h"
+#include "sigchain.h"
+#include "argv-array.h"
+
 #if !defined(STRUCTURED_LOGGING)
 /*
  * Structured logging is not available.
@@ -6,4 +13,363 @@
 
 #else
 
+#define SLOG_VERSION 0
+
+static uint64_t my__start_time;
+static uint64_t my__exit_time;
+static int my__is_config_loaded;
+static int my__is_enabled;
+static int my__is_pretty;
+static int my__signal;
+static int my__exit_code;
+static int my__pid;
+static int my__wrote_start_event;
+static int my__log_fd = -1;
+
+static char *my__log_path;
+static char *my__command_name;
+static char *my__sub_command_name;
+
+static struct argv_array my__argv = ARGV_ARRAY_INIT;
+static struct json_writer my__errors = JSON_WRITER_INIT;
+
+/*
+ * Write a single event to the structured log file.
+ */
+static void emit_event(struct json_writer *jw, const char *event_name)
+{
+	if (my__log_fd == -1) {
+		my__log_fd = open(my__log_path,
+				  O_WRONLY | O_APPEND | O_CREAT,
+				  0644);
+		if (my__log_fd == -1) {
+			warning("slog: could not open '%s' for logging: %s",
+				my__log_path, strerror(errno));
+			my__is_enabled = 0;
+			return;
+		}
+	}
+
+	/*
+	 * A properly authored JSON string does not have a final NL
+	 * (even when pretty-printing is enabled).  Structured logging
+	 * output should look like a series of terminated forms one
+	 * per line.  Temporarily append a NL to the buffer so that
+	 * the disk write happens atomically.
+	 */
+	strbuf_addch(&jw->json, '\n');
+	if (write(my__log_fd, jw->json.buf, jw->json.len) != jw->json.len)
+		warning("slog: could not write event '%s': %s",
+			event_name, strerror(errno));
+
+	strbuf_setlen(&jw->json, jw->json.len - 1);
+}
+
+static void emit_start_event(void)
+{
+	struct json_writer jw = JSON_WRITER_INIT;
+
+	/* build "cmd_start" event message */
+	jw_object_begin(&jw, my__is_pretty);
+	{
+		jw_object_string(&jw, "event", "cmd_start");
+		jw_object_intmax(&jw, "clock_us", (intmax_t)my__start_time);
+		jw_object_intmax(&jw, "pid", (intmax_t)my__pid);
+
+		if (my__command_name && *my__command_name)
+			jw_object_string(&jw, "command", my__command_name);
+		if (my__sub_command_name && *my__sub_command_name)
+			jw_object_string(&jw, "sub_command", my__sub_command_name);
+
+		jw_object_inline_begin_array(&jw, "argv");
+		{
+			int k;
+			for (k = 0; k < my__argv.argc; k++)
+				jw_array_string(&jw, my__argv.argv[k]);
+		}
+		jw_end(&jw);
+	}
+	jw_end(&jw);
+
+	emit_event(&jw, "cmd_start");
+	jw_release(&jw);
+
+	my__wrote_start_event = 1;
+}
+
+static void emit_exit_event(void)
+{
+	struct json_writer jw = JSON_WRITER_INIT;
+	uint64_t atexit_time = getnanotime() / 1000;
+
+	/* close unterminated forms */
+	if (my__errors.json.len)
+		jw_end(&my__errors);
+
+	/* build "cmd_exit" event message */
+	jw_object_begin(&jw, my__is_pretty);
+	{
+		jw_object_string(&jw, "event", "cmd_exit");
+		jw_object_intmax(&jw, "clock_us", (intmax_t)atexit_time);
+		jw_object_intmax(&jw, "pid", (intmax_t)my__pid);
+
+		if (my__command_name && *my__command_name)
+			jw_object_string(&jw, "command", my__command_name);
+		if (my__sub_command_name && *my__sub_command_name)
+			jw_object_string(&jw, "sub_command", my__sub_command_name);
+
+		jw_object_inline_begin_array(&jw, "argv");
+		{
+			int k;
+			for (k = 0; k < my__argv.argc; k++)
+				jw_array_string(&jw, my__argv.argv[k]);
+		}
+		jw_end(&jw);
+
+		jw_object_inline_begin_object(&jw, "result");
+		{
+			jw_object_intmax(&jw, "exit_code", my__exit_code);
+			if (my__errors.json.len)
+				jw_object_sub_jw(&jw, "errors", &my__errors);
+
+			if (my__signal)
+				jw_object_intmax(&jw, "signal", my__signal);
+
+			if (my__exit_time > 0)
+				jw_object_intmax(&jw, "elapsed_core_us",
+						 my__exit_time - my__start_time);
+
+			jw_object_intmax(&jw, "elapsed_total_us",
+					 atexit_time - my__start_time);
+		}
+		jw_end(&jw);
+
+		jw_object_inline_begin_object(&jw, "version");
+		{
+			jw_object_string(&jw, "git", git_version_string);
+			jw_object_intmax(&jw, "slog", SLOG_VERSION);
+		}
+		jw_end(&jw);
+	}
+	jw_end(&jw);
+
+	emit_event(&jw, "cmd_exit");
+	jw_release(&jw);
+}
+
+static int cfg_path(const char *key, const char *value)
+{
+	if (is_absolute_path(value)) {
+		my__log_path = xstrdup(value);
+		my__is_enabled = 1;
+	} else {
+		warning("'%s' must be an absolute path: '%s'",
+			key, value);
+	}
+
+	return 0;
+}
+
+static int cfg_pretty(const char *key, const char *value)
+{
+	my__is_pretty = git_config_bool(key, value);
+	return 0;
+}
+
+int slog_default_config(const char *key, const char *value)
+{
+	const char *sub;
+
+	/*
+	 * git_default_config() calls slog_default_config() with "slog.*"
+	 * k/v pairs.  git_default_config() MAY or MAY NOT be called when
+	 * cmd_<command>() calls git_config().
+	 *
+	 * Remember if we've ever been called.
+	 */
+	my__is_config_loaded = 1;
+
+	if (skip_prefix(key, "slog.", &sub)) {
+		if (!strcmp(sub, "path"))
+			return cfg_path(key, value);
+		if (!strcmp(sub, "pretty"))
+			return cfg_pretty(key, value);
+	}
+
+	return 0;
+}
+
+static int lazy_load_config_cb(const char *key, const char * value, void *data)
+{
+	return slog_default_config(key, value);
+}
+
+/*
+ * If cmd_<command>() did not cause slog_default_config() to be called
+ * during git_config(), we try to lookup our config settings the first
+ * time we actually need them.
+ *
+ * (We do this rather than using read_early_config() at initialization
+ * because we want any "-c key=value" arguments to be included.)
+ */
+static inline void lazy_load_config(void)
+{
+	if (my__is_config_loaded)
+		return;
+	my__is_config_loaded = 1;
+
+	read_early_config(lazy_load_config_cb, NULL);
+}
+
+int slog_is_enabled(void)
+{
+	lazy_load_config();
+
+	return my__is_enabled;
+}
+
+static void do_final_steps(int in_signal)
+{
+	static int completed = 0;
+
+	if (completed)
+		return;
+	completed = 1;
+
+	if (slog_is_enabled()) {
+		if (!my__wrote_start_event)
+			emit_start_event();
+		emit_exit_event();
+		my__is_enabled = 0;
+	}
+
+	if (my__log_fd != -1)
+		close(my__log_fd);
+	free(my__log_path);
+	free(my__command_name);
+	free(my__sub_command_name);
+	argv_array_clear(&my__argv);
+	jw_release(&my__errors);
+}
+
+static void slog_atexit(void)
+{
+	do_final_steps(0);
+}
+
+static void slog_signal(int signo)
+{
+	my__signal = signo;
+
+	do_final_steps(1);
+
+	sigchain_pop(signo);
+	raise(signo);
+}
+
+static void intern_argv(int argc, const char **argv)
+{
+	int k;
+
+	for (k = 0; k < argc; k++)
+		argv_array_push(&my__argv, argv[k]);
+}
+
+/*
+ * Collect basic startup information before cmd_main() has a chance
+ * to alter the command line and before we have seen the config (to
+ * know if logging is enabled).  And since the config isn't loaded
+ * until cmd_main() dispatches to cmd_<command>(), we have to wait
+ * and lazy-write the "cmd_start" event.
+ *
+ * This also implies that commands such as "help" and "version" that
+ * don't need load the config won't generate any log data.
+ */
+static void initialize(int argc, const char **argv)
+{
+	my__start_time = getnanotime() / 1000;
+	my__pid = getpid();
+
+	intern_argv(argc, argv);
+
+	atexit(slog_atexit);
+
+	/*
+	 * Put up backstop signal handler to ensure we get the "cmd_exit"
+	 * event.  This is primarily for when the pager throws SIGPIPE
+	 * when the user quits.
+	 */
+	sigchain_push(SIGPIPE, slog_signal);
+}
+
+int slog_wrap_main(slog_fn_main_t fn_main, int argc, const char **argv)
+{
+	int result;
+
+	initialize(argc, argv);
+	result = fn_main(argc, argv);
+	slog_exit_code(result);
+
+	return result;
+}
+
+void slog_set_command_name(const char *command_name)
+{
+	/*
+	 * Capture the command name even if logging is not enabled
+	 * because we don't know if the config has been loaded yet by
+	 * the cmd_<command>() and/or it may be too early to force a
+	 * lazy load.
+	 */
+	if (my__command_name)
+		free(my__command_name);
+	my__command_name = xstrdup(command_name);
+}
+
+void slog_set_sub_command_name(const char *sub_command_name)
+{
+	/*
+	 * Capture the sub-command name even if logging is not enabled
+	 * because we don't know if the config has been loaded yet by
+	 * the cmd_<command>() and/or it may be too early to force a
+	 * lazy load.
+	 */
+	if (my__sub_command_name)
+		free(my__sub_command_name);
+	my__sub_command_name = xstrdup(sub_command_name);
+}
+
+int slog_is_pretty(void)
+{
+	return my__is_pretty;
+}
+
+int slog_exit_code(int exit_code)
+{
+	my__exit_time = getnanotime() / 1000;
+	my__exit_code = exit_code;
+
+	return exit_code;
+}
+
+void slog_error_message(const char *prefix, const char *fmt, va_list params)
+{
+	struct strbuf em = STRBUF_INIT;
+	va_list copy_params;
+
+	if (prefix && *prefix)
+		strbuf_addstr(&em, prefix);
+
+	va_copy(copy_params, params);
+	strbuf_vaddf(&em, fmt, copy_params);
+	va_end(copy_params);
+
+	if (!my__errors.json.len)
+		jw_array_begin(&my__errors, my__is_pretty);
+	jw_array_string(&my__errors, em.buf);
+	/* leave my__errors array unterminated for now */
+
+	strbuf_release(&em);
+}
+
 #endif
diff --git a/structured-logging.h b/structured-logging.h
index c9e8c1d..61e98e6 100644
--- a/structured-logging.h
+++ b/structured-logging.h
@@ -1,13 +1,95 @@
 #ifndef STRUCTURED_LOGGING_H
 #define STRUCTURED_LOGGING_H
 
+typedef int (*slog_fn_main_t)(int, const char **);
+
 #if !defined(STRUCTURED_LOGGING)
 /*
  * Structured logging is not available.
  * Stub out all API routines.
  */
+#define slog_is_available() (0)
+#define slog_default_config(k, v) (0)
+#define slog_wrap_main(real_cmd_main, argc, argv) ((real_cmd_main)((argc), (argv)))
+#define slog_set_command_name(n) do { } while (0)
+#define slog_set_sub_command_name(n) do { } while (0)
+#define slog_is_enabled() (0)
+#define slog_is_pretty() (0)
+#define slog_exit_code(exit_code) (exit_code)
+#define slog_error_message(prefix, fmt, params) do { } while (0)
 
 #else
 
+/*
+ * Is structured logging available (compiled-in)?
+ */
+#define slog_is_available() (1)
+
+/*
+ * Process "slog.*" config settings.
+ */
+int slog_default_config(const char *key, const char *value);
+
+/*
+ * Wrapper for the "real" cmd_main().  Initialize structured logging if
+ * enabled, run the given real_cmd_main(), and capture the return value.
+ *
+ * Note:  common-main.c is shared by many top-level commands.
+ * common-main.c:main() does common process setup before calling
+ * the version of cmd_main() found in the executable.  Some commands
+ * SHOULD NOT do logging (such as t/helper/test-tool).  Ones that do
+ * need some common initialization/teardown.
+ *
+ * Use this function for any top-level command that should do logging.
+ *
+ * Usage:
+ *
+ * static int real_cmd_main(int argc, const char **argv)
+ * {
+ *     ....the actual code for the command....
+ * }
+ *
+ * int cmd_main(int argc, const char **argv)
+ * {
+ *     return slog_wrap_main(real_cmd_main, argc, argv);
+ * }
+ * 
+ *
+ * See git.c for an example.
+ */ 
+int slog_wrap_main(slog_fn_main_t real_cmd_main, int argc, const char **argv);
+
+/*
+ * Record a canonical command name and optional sub-command name for the
+ * current process.  For example, "checkout" and "switch-branch".
+ */
+void slog_set_command_name(const char *name);
+void slog_set_sub_command_name(const char *name);
+
+/*
+ * Is structured logging enabled?
+ */
+int slog_is_enabled(void);
+
+/*
+ * Is JSON pretty-printing enabled?
+ */
+int slog_is_pretty(void);
+
+/*
+ * Register the process exit code with the structured logging layer
+ * and return it.  This value will appear in the final "cmd_exit" event.
+ *
+ * Use this to wrap all calls to exit().
+ * Use this before returning in main().
+ */
+int slog_exit_code(int exit_code);
+
+/*
+ * Append formatted error message to the structured log result.
+ * Messages from this will appear in the final "cmd_exit" event.
+ */
+void slog_error_message(const char *prefix, const char *fmt, va_list params);
+
 #endif /* STRUCTURED_LOGGING */
 #endif /* STRUCTURED_LOGGING_H */
diff --git a/usage.c b/usage.c
index cc80333..5d48f6b 100644
--- a/usage.c
+++ b/usage.c
@@ -27,12 +27,16 @@ static NORETURN void usage_builtin(const char *err, va_list params)
 
 static NORETURN void die_builtin(const char *err, va_list params)
 {
+	slog_error_message("fatal: ", err, params);
+
 	vreportf("fatal: ", err, params);
 	exit(128);
 }
 
 static void error_builtin(const char *err, va_list params)
 {
+	slog_error_message("error: ", err, params);
+
 	vreportf("error: ", err, params);
 }
 
-- 
2.9.3




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux