From: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx> Add global counters to Trace2. Create a mechanism in Trace2 to count an activity and emit a single "counter" event at the end of the program. This is an alternative to the existing "data" events that are emitted immediately. Create an array of counters (indexed by `enum trace2_counter_id`) to allow various activites to be tracked as desired. Preload the array with two counters for testing purposes. Create unit tests to demonstrate and verify. Signed-off-by: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx> --- Documentation/technical/api-trace2.txt | 67 ++++++++++++++++++++ Makefile | 1 + t/helper/test-trace2.c | 88 +++++++++++++++++++++++++- t/t0211-trace2-perf.sh | 42 ++++++++++++ t/t0212-trace2-event.sh | 41 ++++++++++++ trace2.c | 50 +++++++++++++++ trace2.h | 33 ++++++++++ trace2/tr2_ctr.c | 67 ++++++++++++++++++++ trace2/tr2_ctr.h | 79 +++++++++++++++++++++++ trace2/tr2_tls.c | 29 +++++++++ trace2/tr2_tls.h | 16 +++++ 11 files changed, 512 insertions(+), 1 deletion(-) create mode 100644 trace2/tr2_ctr.c create mode 100644 trace2/tr2_ctr.h diff --git a/Documentation/technical/api-trace2.txt b/Documentation/technical/api-trace2.txt index bdba0f92280..a3ea867ff92 100644 --- a/Documentation/technical/api-trace2.txt +++ b/Documentation/technical/api-trace2.txt @@ -1297,6 +1297,73 @@ d0 | main | atexit | | 0.111026 | | In this example, the "test1" timer was started 4 times and ran for 0.000393 seconds. +Counter Events:: + + Trace2 also provides global counters as an alternative to regions + and data events. These make it possible to count an activity of + interest, such a call to a library routine, during the program + and get a single counter event at the end. ++ +At the end of the program, a single summary event is emitted; this +value is aggregated across all threads. These events have "summary" +as their thread name. ++ +For some counters, individual (per-thread) counter events are also +generated. This may be helpful in understanding how work is balanced +between threads in some circumstances. ++ +---------------- +static void *load_cache_entries_thread(void *_data) +{ + struct load_cache_entries_thread_data *p = _data; + int i; + + trace2_thread_start("load_cache_entries"); + ... + trace2_thread_exit(); +} + +static unsigned long load_cache_entry_block(struct index_state *istate, + struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap, + unsigned long start_offset, const struct cache_entry *previous_ce) +{ + int i; + unsigned long src_offset = start_offset; + + trace2_counter_increment(TRACE2_COUNTER_ID_TEST2, nr); + + for (i = offset; i < offset + nr; i++) { + ... + } +} +---------------- ++ +---------------- +$ export GIT_TRACE2_PERF_BRIEF=1 +$ export GIT_TRACE2_PERF=~/log.perf +$ git status +... +$ cat ~/log.perf +... +d0 | main | exit | | 53.977680 | | | code:0 +d0 | th12:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193220 +d0 | th11:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | th10:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | th09:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | th08:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | th07:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | th06:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | th05:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | th04:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | th03:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | th02:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221 +d0 | summary | counter | | 53.977708 | | test | name:test2 value:2125430 +d0 | main | atexit | | 53.977708 | | | code:0 +---------------- ++ +This example shows the value computed by each of the 11 +`load_cache_entries` threads and the total across all threads. + == Future Work === Relationship to the Existing Trace Api (api-trace.txt) diff --git a/Makefile b/Makefile index 8b657f0162a..cc5bd8593f1 100644 --- a/Makefile +++ b/Makefile @@ -1020,6 +1020,7 @@ LIB_OBJS += trace.o LIB_OBJS += trace2.o LIB_OBJS += trace2/tr2_cfg.o LIB_OBJS += trace2/tr2_cmd_name.o +LIB_OBJS += trace2/tr2_ctr.o LIB_OBJS += trace2/tr2_dst.o LIB_OBJS += trace2/tr2_sid.o LIB_OBJS += trace2/tr2_tmr.o diff --git a/t/helper/test-trace2.c b/t/helper/test-trace2.c index 51d022422bf..a7dbecfda9a 100644 --- a/t/helper/test-trace2.c +++ b/t/helper/test-trace2.c @@ -270,7 +270,7 @@ static int ut_009timer(int argc, const char **argv) const char *usage_error = "expect <count> <ms_delay> <threads>"; - struct ut_009_data data = { 0, 0 }; + struct ut_009_data data = { 0 }; int nr_threads = 0; int k; pthread_t *pids = NULL; @@ -301,6 +301,90 @@ static int ut_009timer(int argc, const char **argv) return 0; } +/* + * Single-threaded counter test. Add several values to the TEST1 counter. + * The test script can verify that an aggregate Trace2 "counter" event is + * emitted containing the sum of the values provided. + */ +static int ut_010counter(int argc, const char **argv) +{ + const char *usage_error = + "expect <v1> [<v2> [...]]"; + int value; + int k; + + if (argc < 1) + die("%s", usage_error); + + for (k = 0; k < argc; k++) { + if (get_i(&value, argv[k])) + die("invalid value[%s] -- %s", + argv[k], usage_error); + trace2_counter_add(TRACE2_COUNTER_ID_TEST1, value); + } + + return 0; +} + +struct ut_011_data { + int v1, v2; +}; + +static void *ut_011counter_thread_proc(void *_ut_011_data) +{ + struct ut_011_data *data = _ut_011_data; + + trace2_thread_start("ut_011"); + + trace2_counter_add(TRACE2_COUNTER_ID_TEST2, data->v1); + trace2_counter_add(TRACE2_COUNTER_ID_TEST2, data->v2); + + trace2_thread_exit(); + return NULL; +} + +/* + * Multi-threaded counter test. Create several threads that each + * increment the TEST2 global counter. The test script can verify + * that an individual Trace2 "counter" event for each thread and an + * aggregate "counter" event are generated. + */ +static int ut_011counter(int argc, const char **argv) +{ + const char *usage_error = + "expect <v1> <v2> <threads>"; + + struct ut_011_data data = { 0, 0 }; + int nr_threads = 0; + int k; + pthread_t *pids = NULL; + + if (argc != 3) + die("%s", usage_error); + if (get_i(&data.v1, argv[0])) + die("%s", usage_error); + if (get_i(&data.v2, argv[1])) + die("%s", usage_error); + if (get_i(&nr_threads, argv[2])) + die("%s", usage_error); + + CALLOC_ARRAY(pids, nr_threads); + + for (k = 0; k < nr_threads; k++) { + if (pthread_create(&pids[k], NULL, ut_011counter_thread_proc, &data)) + die("failed to create thread[%d]", k); + } + + for (k = 0; k < nr_threads; k++) { + if (pthread_join(pids[k], NULL)) + die("failed to join thread[%d]", k); + } + + free(pids); + + return 0; +} + /* * Usage: * test-tool trace2 <ut_name_1> <ut_usage_1> @@ -320,6 +404,8 @@ static struct unit_test ut_table[] = { { ut_007bug, "007bug", "" }, { ut_008timer, "008timer", "<count> <ms_delay>" }, { ut_009timer, "009timer", "<count> <ms_delay> <threads>" }, + { ut_010counter, "010counter","<v1> [<v2> [<v3> [...]]]" }, + { ut_011counter, "011counter","<v1> <v2> <threads>" }, }; /* clang-format on */ diff --git a/t/t0211-trace2-perf.sh b/t/t0211-trace2-perf.sh index 381c3eea458..5f9a3533ce4 100755 --- a/t/t0211-trace2-perf.sh +++ b/t/t0211-trace2-perf.sh @@ -219,4 +219,46 @@ test_expect_success 'test stopwatch timers - summary and threads' ' have_timer_event "summary" "test2" 15 actual ' +# Exercise the global counter "test" in a loop and confirm that we get an +# event with the sum. +# + +have_counter_event () { + thread=$1 + name=$2 + value=$3 + file=$4 + + pattern="d0|${thread}|counter||_T_ABS_||test" + pattern="${pattern}|name:${name}" + pattern="${pattern} value:${value}" + + grep "${pattern}" ${file} + + return $? +} + +test_expect_success 'test global counters - global, single-thead' ' + test_when_finished "rm trace.perf actual" && + test_config_global trace2.perfBrief 1 && + test_config_global trace2.perfTarget "$(pwd)/trace.perf" && + test-tool trace2 010counter 2 3 5 7 11 13 && + perl "$TEST_DIRECTORY/t0211/scrub_perf.perl" <trace.perf >actual && + + have_counter_event "summary" "test1" 41 actual +' + +test_expect_success 'test global counters - global+threads' ' + test_when_finished "rm trace.perf actual" && + test_config_global trace2.perfBrief 1 && + test_config_global trace2.perfTarget "$(pwd)/trace.perf" && + test-tool trace2 011counter 5 10 3 && + perl "$TEST_DIRECTORY/t0211/scrub_perf.perl" <trace.perf >actual && + + have_counter_event "th01:ut_011" "test2" 15 actual && + have_counter_event "th02:ut_011" "test2" 15 actual && + have_counter_event "th03:ut_011" "test2" 15 actual && + have_counter_event "summary" "test2" 45 actual +' + test_done diff --git a/t/t0212-trace2-event.sh b/t/t0212-trace2-event.sh index 277688fdbc4..9e76ef5caa7 100755 --- a/t/t0212-trace2-event.sh +++ b/t/t0212-trace2-event.sh @@ -368,4 +368,45 @@ test_expect_success 'test stopwatch timers - global+threads' ' have_timer_event "summary" "test2" 15 trace.event ' +# Exercise the global counter in a loop and confirm that we get the +# expected sum in an event record. +# + +have_counter_event () { + thread=$1 + name=$2 + value=$3 + file=$4 + + pattern="\"event\":\"counter\"" + pattern="${pattern}.*\"thread\":\"${thread}\"" + pattern="${pattern}.*\"name\":\"${name}\"" + pattern="${pattern}.*\"value\":${value}" + + grep "${pattern}" ${file} + + return $? +} + +test_expect_success 'test global counter - global, single-thread' ' + test_when_finished "rm trace.event" && + test_config_global trace2.eventBrief 1 && + test_config_global trace2.eventTarget "$(pwd)/trace.event" && + test-tool trace2 010counter 2 3 5 7 11 13 && + + have_counter_event "summary" "test1" 41 trace.event +' + +test_expect_success 'test global counter - global+threads' ' + test_when_finished "rm trace.event" && + test_config_global trace2.eventBrief 1 && + test_config_global trace2.eventTarget "$(pwd)/trace.event" && + test-tool trace2 011counter 5 10 3 && + + have_counter_event "th01:ut_011" "test2" 15 trace.event && + have_counter_event "th02:ut_011" "test2" 15 trace.event && + have_counter_event "th03:ut_011" "test2" 15 trace.event && + have_counter_event "summary" "test2" 45 trace.event +' + test_done diff --git a/trace2.c b/trace2.c index 23289dd6eb4..aa6ed6dd3ee 100644 --- a/trace2.c +++ b/trace2.c @@ -8,6 +8,7 @@ #include "version.h" #include "trace2/tr2_cfg.h" #include "trace2/tr2_cmd_name.h" +#include "trace2/tr2_ctr.h" #include "trace2/tr2_dst.h" #include "trace2/tr2_sid.h" #include "trace2/tr2_sysenv.h" @@ -114,6 +115,41 @@ static void tr2main_emit_thread_timers(uint64_t us_elapsed_absolute) us_elapsed_absolute); } +static void tr2main_emit_summary_counters(uint64_t us_elapsed_absolute) +{ + struct tr2_tgt *tgt_j; + int j; + struct tr2_counter_block merged = { { { 0 } } }; + + /* + * Sum across all of the per-thread counter data into + * a single composite block of counter values. + */ + tr2tls_aggregate_counter_blocks(&merged); + + /* + * Emit "summary" counter events for each composite counter value + * that had activity. + */ + for_each_wanted_builtin (j, tgt_j) + if (tgt_j->pfn_counter) + tr2_emit_counter_block(tgt_j->pfn_counter, + us_elapsed_absolute, + &merged, "summary"); +} + +static void tr2main_emit_thread_counters(uint64_t us_elapsed_absolute) +{ + struct tr2_tgt *tgt_j; + int j; + + for_each_wanted_builtin (j, tgt_j) + if (tgt_j->pfn_counter) + tr2tls_emit_counter_blocks_by_thread( + tgt_j->pfn_counter, + us_elapsed_absolute); +} + static int tr2main_exit_code; /* @@ -144,6 +180,9 @@ static void tr2main_atexit_handler(void) tr2main_emit_thread_timers(us_elapsed_absolute); tr2main_emit_summary_timers(us_elapsed_absolute); + tr2main_emit_thread_counters(us_elapsed_absolute); + tr2main_emit_summary_counters(us_elapsed_absolute); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_atexit) tgt_j->pfn_atexit(us_elapsed_absolute, @@ -897,3 +936,14 @@ void trace2_timer_stop(enum trace2_timer_id tid) tr2_stop_timer(tid); } + +void trace2_counter_add(enum trace2_counter_id cid, uint64_t value) +{ + if (!trace2_enabled) + return; + + if (cid < 0 || cid >= TRACE2_NUMBER_OF_COUNTERS) + BUG("invalid counter id: %d", cid); + + tr2_counter_increment(cid, value); +} diff --git a/trace2.h b/trace2.h index 22da5c5516c..d4ed602c19a 100644 --- a/trace2.h +++ b/trace2.h @@ -52,6 +52,7 @@ struct json_writer; * [] trace2_data* -- emit region/thread/repo data messages. * [] trace2_printf* -- legacy trace[1] messages. * [] trace2_timer* -- start/stop stopwatch timer (messages are deferred). + * [] trace2_counter* -- global counters (messages are deferrred). */ /* @@ -573,4 +574,36 @@ enum trace2_timer_id { void trace2_timer_start(enum trace2_timer_id tid); void trace2_timer_stop(enum trace2_timer_id tid); +/* + * Define the set of global counters. + * + * We can add more at any time, but they must be defined at compile + * time (to avoid the need to dynamically allocate and synchronize + * them between different threads). + * + * These must start at 0 and be contiguous (because we them elsewhere + * as array indexes). + * + * Any value added to this enum must also be added to the counter + * definitions array. See `trace2/tr2_ctr.c:tr2_counter_def_block[]`. + */ +enum trace2_counter_id { + /* + * Define two counters for testing. See `t/helper/test-trace2.c`. + * These can be used for ad hoc testing, but should not be used + * for permanent analysis code. + */ + TRACE2_COUNTER_ID_TEST1 = 0, /* emits summary event only */ + TRACE2_COUNTER_ID_TEST2, /* emits summary and thread events */ + + + /* Add additional counter definitions before here. */ + TRACE2_NUMBER_OF_COUNTERS +}; + +/* + * Increment global counter by value. + */ +void trace2_counter_add(enum trace2_counter_id cid, uint64_t value); + #endif /* TRACE2_H */ diff --git a/trace2/tr2_ctr.c b/trace2/tr2_ctr.c new file mode 100644 index 00000000000..ce80ceb5476 --- /dev/null +++ b/trace2/tr2_ctr.c @@ -0,0 +1,67 @@ +#include "cache.h" +#include "thread-utils.h" +#include "trace2/tr2_tls.h" +#include "trace2/tr2_ctr.h" + +/* + * Define metadata for each global counter. This list must match the + * set defined in "enum trace2_counter_id". + */ +struct tr2_counter_def { + const char *category; + const char *name; + + unsigned int want_thread_events:1; +}; + +static struct tr2_counter_def tr2_counter_def_block[TRACE2_NUMBER_OF_COUNTERS] = { + [TRACE2_COUNTER_ID_TEST1] = { "test", "test1", 0 }, + [TRACE2_COUNTER_ID_TEST2] = { "test", "test2", 1 }, +}; + +void tr2_counter_increment(enum trace2_counter_id cid, uint64_t value) +{ + struct tr2tls_thread_ctx *ctx = tr2tls_get_self(); + struct tr2_counter *c = &ctx->counters.counter[cid]; + + c->value += value; +} + +void tr2_merge_counter_block(struct tr2_counter_block *merged, + const struct tr2_counter_block *src) +{ + enum trace2_counter_id cid; + + for (cid = 0; cid < TRACE2_NUMBER_OF_COUNTERS; cid++) { + struct tr2_counter *c_merged = &merged->counter[cid]; + const struct tr2_counter *c = &src->counter[cid]; + + c_merged->is_aggregate = 1; + + c_merged->value += c->value; + } + + merged->is_aggregate = 1; +} + +void tr2_emit_counter_block(tr2_tgt_evt_counter_t *pfn, + uint64_t us_elapsed_absolute, + const struct tr2_counter_block *blk, + const char *thread_name) +{ + enum trace2_counter_id cid; + + for (cid = 0; cid < TRACE2_NUMBER_OF_COUNTERS; cid++) { + const struct tr2_counter *c = &blk->counter[cid]; + const struct tr2_counter_def *d = &tr2_counter_def_block[cid]; + + if (!c->value) + continue; /* counter was not used */ + + if (!d->want_thread_events && !c->is_aggregate) + continue; /* per-thread events not wanted */ + + pfn(us_elapsed_absolute, thread_name, d->category, d->name, + c->value); + } +} diff --git a/trace2/tr2_ctr.h b/trace2/tr2_ctr.h new file mode 100644 index 00000000000..fd6fbef89a2 --- /dev/null +++ b/trace2/tr2_ctr.h @@ -0,0 +1,79 @@ +#ifndef TR2_CTR_H +#define TR2_CTR_H + +#include "trace2.h" +#include "trace2/tr2_tgt.h" + +/* + * Define a mechanism to allow global "counters". + * + * Counters can be used count interesting activity that does not fit + * the "region and data" model, such as code called from many + * different regions and/or where you want to count a number of items, + * but don't have control of when the last item will be processed, + * such as counter the number of calls to `lstat()`. + * + * Counters differ from Trace2 "data" events. Data events are emitted + * immediately and are appropriate for documenting loop counters and + * etc. Counter values are accumulated during the program and the final + * counter value event is emitted at program exit. + * + * To make this model efficient, we define a compile-time fixed set + * of counters and counter ids. This lets us avoid the complexities + * of dynamically allocating a counter and sharing that definition + * with other threads. + * + * We define (at compile time) a set of "counter ids" to access the + * various counters inside of a fixed size "counter block". + * + * A counter defintion table provides the counter category and name + * so we can eliminate those arguments from the public counter API. + * These are defined in a parallel tabel in `tr2_ctr.c`. + * + * Each thread has a private block of counters in its thread local + * storage data so no locks are required for a thread to increment + * it's version of the counter. At program exit, the counter blocks + * from all of the per-thread counters are added together to give the + * final summary value for the each global counter. + */ + +/* + * The definition of an individual counter. + */ +struct tr2_counter { + uint64_t value; + + unsigned int is_aggregate:1; +}; + +/* + * Compile time fixed block of all defined counters. + */ +struct tr2_counter_block { + struct tr2_counter counter[TRACE2_NUMBER_OF_COUNTERS]; + + unsigned int is_aggregate:1; +}; + +/* + * Add "value" to the global counter. + */ +void tr2_counter_increment(enum trace2_counter_id cid, uint64_t value); + +/* + * Accumulate counter data from the source block into the merged block. + */ +void tr2_merge_counter_block(struct tr2_counter_block *merged, + const struct tr2_counter_block *src); + +/* + * Send counter data for all counters in this block to the target. + * + * This will generate an event record for each counter that had activity. + */ +void tr2_emit_counter_block(tr2_tgt_evt_counter_t *pfn, + uint64_t us_elapsed_absolute, + const struct tr2_counter_block *blk, + const char *thread_name); + +#endif /* TR2_CTR_H */ diff --git a/trace2/tr2_tls.c b/trace2/tr2_tls.c index 675f6aeef31..28ea55863d1 100644 --- a/trace2/tr2_tls.c +++ b/trace2/tr2_tls.c @@ -1,5 +1,6 @@ #include "cache.h" #include "thread-utils.h" +#include "trace2/tr2_ctr.h" #include "trace2/tr2_tls.h" #include "trace2/tr2_tmr.h" @@ -233,3 +234,31 @@ void tr2_emit_timers_by_thread(tr2_tgt_evt_timer_t *pfn, ctx = next; } } + +void tr2tls_aggregate_counter_blocks(struct tr2_counter_block *merged) +{ + struct tr2tls_thread_ctx *ctx = tr2tls_ctx_list; + + while (ctx) { + struct tr2tls_thread_ctx *next = ctx->next_ctx; + + tr2_merge_counter_block(merged, &ctx->counters); + + ctx = next; + } +} + +void tr2tls_emit_counter_blocks_by_thread(tr2_tgt_evt_counter_t *pfn, + uint64_t us_elapsed_absolute) +{ + struct tr2tls_thread_ctx *ctx = tr2tls_ctx_list; + + while (ctx) { + struct tr2tls_thread_ctx *next = ctx->next_ctx; + + tr2_emit_counter_block(pfn, us_elapsed_absolute, &ctx->counters, + ctx->thread_name); + + ctx = next; + } +} diff --git a/trace2/tr2_tls.h b/trace2/tr2_tls.h index 72e37beb1e7..503829bbd44 100644 --- a/trace2/tr2_tls.h +++ b/trace2/tr2_tls.h @@ -2,6 +2,7 @@ #define TR2_TLS_H #include "strbuf.h" +#include "trace2/tr2_ctr.h" #include "trace2/tr2_tmr.h" struct tr2tls_thread_ctx { @@ -12,10 +13,25 @@ struct tr2tls_thread_ctx { int thread_id; struct tr2_timer_block timers; + struct tr2_counter_block counters; char thread_name[FLEX_ARRAY]; }; +/* + * Iterate over the global list of threads and aggregate the + * counter data into the given counter block. The resulting block + * will contain the global counter sums. + */ +void tr2tls_aggregate_counter_blocks(struct tr2_counter_block *merged); + +/* + * Iterate over the global list of threads and emit "per-thread" + * counter data for each. + */ +void tr2tls_emit_counter_blocks_by_thread(tr2_tgt_evt_counter_t *pfn, + uint64_t us_elapsed_absolute); + /* * Iterate over the global list of threads and aggregate the timer * data into the given timer block. The resulting block will contain -- gitgitgadget