Re: [PATCH bpf-next] selftests/bpf: Add benchmark for local_storage RCU Tasks Trace usage

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jun 23, 2022 at 04:46:09PM -0700, Dave Marchevsky wrote:
> This benchmark measures grace period latency and kthread cpu usage of
> RCU Tasks Trace when many processes are creating/deleting BPF
> local_storage. Intent here is to quantify improvement on these metrics
> after Paul's recent RCU Tasks patches [0].
> 
> Specifically, fork 15k tasks which call a bpf prog that creates/destroys
> task local_storage and sleep in a loop, resulting in many
> call_rcu_tasks_trace calls.
> 
> To determine grace period latency, trace time elapsed between
> rcu_tasks_trace_pregp_step and rcu_tasks_trace_postgp; for cpu usage
> look at rcu_task_trace_kthread's stime in /proc/PID/stat.
> 
> On my virtualized test environment (Skylake, 8 cpus) benchmark results
> demonstrate significant improvement:
> 
> BEFORE Paul's patches:
> 
>   SUMMARY tasks_trace grace period latency        avg 22298.551 us stddev 1302.165 us
>   SUMMARY ticks per tasks_trace grace period      avg 2.291 stddev 0.324
> 
> AFTER Paul's patches:
> 
>   SUMMARY tasks_trace grace period latency        avg 16969.197 us  stddev 2525.053 us
>   SUMMARY ticks per tasks_trace grace period      avg 1.146 stddev 0.178
> 
> Note that since these patches are not in bpf-next benchmarking was done
> by cherry-picking this patch onto rcu tree.
> 
>   [0]: https://lore.kernel.org/rcu/20220620225402.GA3842369@paulmck-ThinkPad-P17-Gen-1/
> 
> Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx>

Nice!  Thank you!!!

Acked-by: Paul E. McKenney <paulmck@xxxxxxxxxx>

> ---
>  tools/testing/selftests/bpf/Makefile          |   4 +-
>  tools/testing/selftests/bpf/bench.c           |  42 +++
>  tools/testing/selftests/bpf/bench.h           |  12 +
>  .../bench_local_storage_rcu_tasks_trace.c     | 272 ++++++++++++++++++
>  ...run_bench_local_storage_rcu_tasks_trace.sh |  11 +
>  .../local_storage_rcu_tasks_trace_bench.c     |  65 +++++
>  6 files changed, 405 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
>  create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
>  create mode 100644 tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
> 
> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> index 4fbd88a8ed9e..9350f0e33727 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -572,6 +572,7 @@ $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
>  $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
>  $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
>  $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
> +$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
>  $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
>  $(OUTPUT)/bench: LDLIBS += -lm
>  $(OUTPUT)/bench: $(OUTPUT)/bench.o \
> @@ -585,7 +586,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
>  		 $(OUTPUT)/bench_bpf_loop.o \
>  		 $(OUTPUT)/bench_strncmp.o \
>  		 $(OUTPUT)/bench_bpf_hashmap_full_update.o \
> -		 $(OUTPUT)/bench_local_storage.o
> +		 $(OUTPUT)/bench_local_storage.o \
> +		 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o
>  	$(call msg,BINARY,,$@)
>  	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
>  
> diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
> index 1e7b5d4b1f11..c1f20a147462 100644
> --- a/tools/testing/selftests/bpf/bench.c
> +++ b/tools/testing/selftests/bpf/bench.c
> @@ -79,6 +79,43 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
>  	       hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
>  }
>  
> +void
> +grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
> +{
> +	int i;
> +
> +	memset(gp_stat, 0, sizeof(struct basic_stats));
> +
> +	for (i = 0; i < res_cnt; i++)
> +		gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt);
> +
> +#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean)
> +	if (res_cnt > 1) {
> +		for (i = 0; i < res_cnt; i++)
> +			gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
> +	}
> +	gp_stat->stddev = sqrt(gp_stat->stddev);
> +#undef IT_MEAN_DIFF
> +}
> +
> +void
> +grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
> +{
> +	int i;
> +
> +	memset(gp_stat, 0, sizeof(struct basic_stats));
> +	for (i = 0; i < res_cnt; i++)
> +		gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt);
> +
> +#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean)
> +	if (res_cnt > 1) {
> +		for (i = 0; i < res_cnt; i++)
> +			gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
> +	}
> +	gp_stat->stddev = sqrt(gp_stat->stddev);
> +#undef IT_MEAN_DIFF
> +}
> +
>  void hits_drops_report_final(struct bench_res res[], int res_cnt)
>  {
>  	int i;
> @@ -236,6 +273,7 @@ extern struct argp bench_ringbufs_argp;
>  extern struct argp bench_bloom_map_argp;
>  extern struct argp bench_bpf_loop_argp;
>  extern struct argp bench_local_storage_argp;
> +extern struct argp bench_local_storage_rcu_tasks_trace_argp;
>  extern struct argp bench_strncmp_argp;
>  
>  static const struct argp_child bench_parsers[] = {
> @@ -244,6 +282,8 @@ static const struct argp_child bench_parsers[] = {
>  	{ &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
>  	{ &bench_local_storage_argp, 0, "local_storage benchmark", 0 },
>  	{ &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
> +	{ &bench_local_storage_rcu_tasks_trace_argp, 0,
> +		"local_storage RCU Tasks Trace slowdown benchmark", 0 },
>  	{},
>  };
>  
> @@ -449,6 +489,7 @@ extern const struct bench bench_bpf_hashmap_full_update;
>  extern const struct bench bench_local_storage_cache_seq_get;
>  extern const struct bench bench_local_storage_cache_interleaved_get;
>  extern const struct bench bench_local_storage_cache_hashmap_control;
> +extern const struct bench bench_local_storage_tasks_trace;
>  
>  static const struct bench *benchs[] = {
>  	&bench_count_global,
> @@ -487,6 +528,7 @@ static const struct bench *benchs[] = {
>  	&bench_local_storage_cache_seq_get,
>  	&bench_local_storage_cache_interleaved_get,
>  	&bench_local_storage_cache_hashmap_control,
> +	&bench_local_storage_tasks_trace,
>  };
>  
>  static void setup_benchmark()
> diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
> index 4b15286753ba..d748255877e2 100644
> --- a/tools/testing/selftests/bpf/bench.h
> +++ b/tools/testing/selftests/bpf/bench.h
> @@ -30,11 +30,19 @@ struct env {
>  	struct cpu_set cons_cpus;
>  };
>  
> +struct basic_stats {
> +	double mean;
> +	double stddev;
> +};
> +
>  struct bench_res {
>  	long hits;
>  	long drops;
>  	long false_hits;
>  	long important_hits;
> +	unsigned long gp_ns;
> +	unsigned long gp_ct;
> +	unsigned int stime;
>  };
>  
>  struct bench {
> @@ -65,6 +73,10 @@ void ops_report_final(struct bench_res res[], int res_cnt);
>  void local_storage_report_progress(int iter, struct bench_res *res,
>  				   long delta_ns);
>  void local_storage_report_final(struct bench_res res[], int res_cnt);
> +void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt,
> +				      struct basic_stats *gp_stat);
> +void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt,
> +				    struct basic_stats *gp_stat);
>  
>  static inline __u64 get_time_ns(void)
>  {
> diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
> new file mode 100644
> index 000000000000..43026430b25a
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
> @@ -0,0 +1,272 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
> +
> +#include <argp.h>
> +
> +#include <sys/prctl.h>
> +#include "local_storage_rcu_tasks_trace_bench.skel.h"
> +#include "bench.h"
> +
> +#include <signal.h>
> +
> +static struct {
> +	__u32 nr_procs;
> +	__u32 kthread_pid;
> +	bool quiet;
> +} args = {
> +	.nr_procs = 1000,
> +	.kthread_pid = 0,
> +	.quiet = false,
> +};
> +
> +enum {
> +	ARG_NR_PROCS = 7000,
> +	ARG_KTHREAD_PID = 7001,
> +	ARG_QUIET = 7002,
> +};
> +
> +static const struct argp_option opts[] = {
> +	{ "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0,
> +		"Set number of user processes to spin up"},
> +	{ "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
> +		"Pid of rcu_tasks_trace kthread for ticks tracking"},
> +	{ "quiet", ARG_QUIET, "{0,1}", 0,
> +		"If true, don't report progress"},
> +	{},
> +};
> +
> +static error_t parse_arg(int key, char *arg, struct argp_state *state)
> +{
> +	long ret;
> +
> +	switch (key) {
> +	case ARG_NR_PROCS:
> +		ret = strtol(arg, NULL, 10);
> +		if (ret < 1 || ret > UINT_MAX) {
> +			fprintf(stderr, "invalid nr_procs\n");
> +			argp_usage(state);
> +		}
> +		args.nr_procs = ret;
> +		break;
> +	case ARG_KTHREAD_PID:
> +		ret = strtol(arg, NULL, 10);
> +		if (ret < 1) {
> +			fprintf(stderr, "invalid kthread_pid\n");
> +			argp_usage(state);
> +		}
> +		args.kthread_pid = ret;
> +		break;
> +	case ARG_QUIET:
> +		ret = strtol(arg, NULL, 10);
> +		if (ret < 0 || ret > 1) {
> +			fprintf(stderr, "invalid quiet %ld\n", ret);
> +			argp_usage(state);
> +		}
> +		args.quiet = ret;
> +		break;
> +break;
> +	default:
> +		return ARGP_ERR_UNKNOWN;
> +	}
> +
> +	return 0;
> +}
> +
> +const struct argp bench_local_storage_rcu_tasks_trace_argp = {
> +	.options = opts,
> +	.parser = parse_arg,
> +};
> +
> +#define MAX_SLEEP_PROCS 150000
> +
> +static void validate(void)
> +{
> +	if (env.producer_cnt != 1) {
> +		fprintf(stderr, "benchmark doesn't support multi-producer!\n");
> +		exit(1);
> +	}
> +	if (env.consumer_cnt != 1) {
> +		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
> +		exit(1);
> +	}
> +
> +	if (args.nr_procs > MAX_SLEEP_PROCS) {
> +		fprintf(stderr, "benchmark supports up to %u sleeper procs!\n",
> +			MAX_SLEEP_PROCS);
> +		exit(1);
> +	}
> +}
> +
> +static long kthread_pid_ticks(void)
> +{
> +	char procfs_path[100];
> +	long stime;
> +	FILE *f;
> +
> +	if (!args.kthread_pid)
> +		return -1;
> +
> +	sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid);
> +	f = fopen(procfs_path, "r");
> +	if (f < 0) {
> +		fprintf(stderr, "couldn't open %s, exiting\n", procfs_path);
> +		exit(1);
> +	}
> +	fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime);
> +	fclose(f);
> +	return stime;
> +}
> +
> +static struct {
> +	struct local_storage_rcu_tasks_trace_bench *skel;
> +	long prev_kthread_stime;
> +} ctx;
> +
> +static void sleep_and_loop(void)
> +{
> +	while (true) {
> +		sleep(rand() % 4);
> +		syscall(__NR_getpgid);
> +	}
> +}
> +
> +static void local_storage_tasks_trace_setup(void)
> +{
> +	int i, err, forkret, runner_pid;
> +
> +	runner_pid = getpid();
> +
> +	for (i = 0; i < args.nr_procs; i++) {
> +		forkret = fork();
> +		if (forkret < 0) {
> +			fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i,
> +				args.nr_procs);
> +			goto err_out;
> +		}
> +
> +		if (!forkret) {
> +			err = prctl(PR_SET_PDEATHSIG, SIGKILL);
> +			if (err < 0) {
> +				fprintf(stderr, "prctl failed with err %d, exiting\n", err);
> +				goto err_out;
> +			}
> +
> +			if (getppid() != runner_pid) {
> +				fprintf(stderr, "Runner died while spinning up procs, exiting\n");
> +				goto err_out;
> +			}
> +			sleep_and_loop();
> +		}
> +	}
> +	fprintf(stderr, "Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid);
> +
> +	setup_libbpf();
> +
> +	ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load();
> +	if (!ctx.skel) {
> +		fprintf(stderr, "Error doing open_and_load, exiting\n");
> +		goto err_out;
> +	}
> +
> +	ctx.prev_kthread_stime = kthread_pid_ticks();
> +
> +	if (!bpf_program__attach(ctx.skel->progs.get_local)) {
> +		fprintf(stderr, "Error attaching bpf program\n");
> +		goto err_out;
> +	}
> +
> +	if (!bpf_program__attach(ctx.skel->progs.pregp_step)) {
> +		fprintf(stderr, "Error attaching bpf program\n");
> +		goto err_out;
> +	}
> +
> +	if (!bpf_program__attach(ctx.skel->progs.postgp)) {
> +		fprintf(stderr, "Error attaching bpf program\n");
> +		goto err_out;
> +	}
> +
> +	return;
> +err_out:
> +	exit(1);
> +}
> +
> +static void measure(struct bench_res *res)
> +{
> +	long ticks;
> +
> +	res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0);
> +	res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0);
> +	ticks = kthread_pid_ticks();
> +	res->stime = ticks - ctx.prev_kthread_stime;
> +	ctx.prev_kthread_stime = ticks;
> +}
> +
> +static void *consumer(void *input)
> +{
> +	return NULL;
> +}
> +
> +static void *producer(void *input)
> +{
> +	while (true)
> +		syscall(__NR_getpgid);
> +	return NULL;
> +}
> +
> +static void report_progress(int iter, struct bench_res *res, long delta_ns)
> +{
> +	if (ctx.skel->bss->unexpected) {
> +		fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp).");
> +		fprintf(stderr, "Data can't be trusted, exiting\n");
> +		exit(1);
> +	}
> +
> +	if (args.quiet)
> +		return;
> +
> +	printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
> +	       iter, res->gp_ns / (double)res->gp_ct);
> +	printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n",
> +	       iter, res->stime / (double)res->gp_ct);
> +}
> +
> +static void report_final(struct bench_res res[], int res_cnt)
> +{
> +	struct basic_stats gp_stat;
> +
> +	grace_period_latency_basic_stats(res, res_cnt, &gp_stat);
> +	printf("SUMMARY tasks_trace grace period latency");
> +	printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev);
> +	grace_period_ticks_basic_stats(res, res_cnt, &gp_stat);
> +	printf("SUMMARY ticks per tasks_trace grace period");
> +	printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev);
> +}
> +
> +/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use
> + * of RCU Tasks-Trace.
> + *
> + * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside
> + * from sleep() loop, and creating/destroying BPF task-local storage on wakeup.
> + * The number of forked tasks is configurable.
> + *
> + * exercising code paths which call call_rcu_tasks_trace while there are many
> + * thousands of tasks on the system should result in RCU Tasks-Trace having to
> + * do a noticeable amount of work.
> + *
> + * This should be observable by measuring rcu_tasks_trace_kthread CPU usage
> + * after the grace period has ended, or by measuring grace period latency.
> + *
> + * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step
> + * and rcu_tasks_trace_postgp functions to measure grace period latency and
> + * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks
> + */
> +const struct bench bench_local_storage_tasks_trace = {
> +	.name = "local-storage-tasks-trace",
> +	.validate = validate,
> +	.setup = local_storage_tasks_trace_setup,
> +	.producer_thread = producer,
> +	.consumer_thread = consumer,
> +	.measure = measure,
> +	.report_progress = report_progress,
> +	.report_final = report_final,
> +};
> diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
> new file mode 100755
> index 000000000000..5dac1f02892c
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
> @@ -0,0 +1,11 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +
> +kthread_pid=`pgrep rcu_tasks_trace_kthread`
> +
> +if [ -z $kthread_pid ]; then
> +	echo "error: Couldn't find rcu_tasks_trace_kthread"
> +	exit 1
> +fi
> +
> +./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace
> diff --git a/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
> new file mode 100644
> index 000000000000..9b11342b19a0
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
> @@ -0,0 +1,65 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
> +
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +#include "bpf_misc.h"
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
> +	__uint(map_flags, BPF_F_NO_PREALLOC);
> +	__type(key, int);
> +	__type(value, int);
> +} task_storage SEC(".maps");
> +
> +long hits;
> +long gp_hits;
> +long gp_times;
> +long current_gp_start;
> +long unexpected;
> +
> +SEC("fentry/" SYS_PREFIX "sys_getpgid")
> +int get_local(void *ctx)
> +{
> +	struct task_struct *task;
> +	int idx;
> +	int *s;
> +
> +	idx = 0;
> +	task = bpf_get_current_task_btf();
> +	s = bpf_task_storage_get(&task_storage, task, &idx,
> +				 BPF_LOCAL_STORAGE_GET_F_CREATE);
> +	if (!s)
> +		return 0;
> +
> +	*s = 3;
> +	bpf_task_storage_delete(&task_storage, task);
> +	__sync_add_and_fetch(&hits, 1);
> +	return 0;
> +}
> +
> +SEC("kprobe/rcu_tasks_trace_pregp_step")
> +int pregp_step(struct pt_regs *ctx)
> +{
> +	current_gp_start = bpf_ktime_get_ns();
> +	return 0;
> +}
> +
> +SEC("kprobe/rcu_tasks_trace_postgp")
> +int postgp(struct pt_regs *ctx)
> +{
> +	if (!current_gp_start) {
> +		/* Will only happen if prog tracing rcu_tasks_trace_pregp_step doesn't
> +		 * execute before this prog
> +		 */
> +		__sync_add_and_fetch(&unexpected, 1);
> +		return 0;
> +	}
> +
> +	__sync_add_and_fetch(&gp_times, bpf_ktime_get_ns() - current_gp_start);
> +	__sync_add_and_fetch(&gp_hits, 1);
> +	current_gp_start = 0;
> +	return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> -- 
> 2.30.2
> 



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux