On Mon, Dec 11, 2017 at 03:35:02PM +0100, Christian Borntraeger wrote: > > > On 12/11/2017 03:16 PM, Yury Norov wrote: > > This benchmark sends many IPIs in different modes and measures > > time for IPI delivery (first column), and total time, ie including > > time to acknowledge the receive by sender (second column). > > > > The scenarios are: > > Dry-run: do everything except actually sending IPI. Useful > > to estimate system overhead. > > Self-IPI: Send IPI to self CPU. > > Normal IPI: Send IPI to some other CPU. > > Broadcast IPI: Send broadcast IPI to all online CPUs. > > > > For virtualized guests, sending and reveiving IPIs causes guest exit. > > I used this test to measure performance impact on KVM subsystem of > > Christoffer Dall's series "Optimize KVM/ARM for VHE systems". > > > > https://www.spinics.net/lists/kvm/msg156755.html > > > > Test machine is ThunderX2, 112 online CPUs. Below the results normalized > > to host dry-run time. Smaller - better. > > > > Host, v4.14: > > Dry-run: 0 1 > > Self-IPI: 9 18 > > Normal IPI: 81 110 > > Broadcast IPI: 0 2106 > > > > Guest, v4.14: > > Dry-run: 0 1 > > Self-IPI: 10 18 > > Normal IPI: 305 525 > > Broadcast IPI: 0 9729 > > > > Guest, v4.14 + VHE: > > Dry-run: 0 1 > > Self-IPI: 9 18 > > Normal IPI: 176 343 > > Broadcast IPI: 0 9885 > > > > CC: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > > CC: Ashish Kalra <Ashish.Kalra@xxxxxxxxxx> > > CC: Christoffer Dall <christoffer.dall@xxxxxxxxxx> > > CC: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx> > > CC: Linu Cherian <Linu.Cherian@xxxxxxxxxx> > > CC: Sunil Goutham <Sunil.Goutham@xxxxxxxxxx> > > Signed-off-by: Yury Norov <ynorov@xxxxxxxxxxxxxxxxxx> > > --- > > arch/Kconfig | 10 ++++ > > kernel/Makefile | 1 + > > kernel/ipi_benchmark.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++ > > 3 files changed, 145 insertions(+) > > create mode 100644 kernel/ipi_benchmark.c > > diff --git a/arch/Kconfig b/arch/Kconfig > > index 057370a0ac4e..80d6ef439199 100644 > > --- a/arch/Kconfig > > +++ b/arch/Kconfig > > @@ -82,6 +82,16 @@ config JUMP_LABEL > > ( On 32-bit x86, the necessary options added to the compiler > > flags may increase the size of the kernel slightly. ) > > > > +config IPI_BENCHMARK > > + tristate "Test IPI performance on SMP systems" > > + depends on SMP > > + help > > + Test IPI performance on SMP systems. If system has only one online > > + CPU, sending IPI to other CPU is obviously not possible, and ENOENT > > + is returned for corresponding test. > > + > > + If unsure, say N. > > + > > config STATIC_KEYS_SELFTEST > > bool "Static key selftest" > > depends on JUMP_LABEL > > diff --git a/kernel/Makefile b/kernel/Makefile > > index 172d151d429c..04e550e1990c 100644 > > --- a/kernel/Makefile > > +++ b/kernel/Makefile > > @@ -101,6 +101,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/ > > obj-$(CONFIG_IRQ_WORK) += irq_work.o > > obj-$(CONFIG_CPU_PM) += cpu_pm.o > > obj-$(CONFIG_BPF) += bpf/ > > +obj-$(CONFIG_IPI_BENCHMARK) += ipi_benchmark.o > > > > obj-$(CONFIG_PERF_EVENTS) += events/ > > > > diff --git a/kernel/ipi_benchmark.c b/kernel/ipi_benchmark.c > > new file mode 100644 > > index 000000000000..35f1f7598c36 > > --- /dev/null > > +++ b/kernel/ipi_benchmark.c > > @@ -0,0 +1,134 @@ > > +/* > > + * Performance test for IPI on SMP machines. > > + * > > + * Copyright (c) 2017 Cavium Networks. > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of version 2 of the GNU General Public > > + * License as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope that it will be useful, but > > + * WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * General Public License for more details. > > + */ > > + > > +#include <linux/module.h> > > +#include <linux/kernel.h> > > +#include <linux/init.h> > > +#include <linux/ktime.h> > > + > > +#define NTIMES 100000 > > + > > +#define POKE_ANY 0 > > +#define DRY_RUN 1 > > +#define POKE_SELF 2 > > +#define POKE_ALL 3 > > + > > +static void __init handle_ipi(void *t) > > +{ > > + ktime_t *time = (ktime_t *) t; > > + > > + if (time) > > + *time = ktime_get() - *time; > > +} > > + > > +static ktime_t __init send_ipi(int flags) > > +{ > > + ktime_t time; > > + unsigned int cpu = get_cpu(); > > + > > + switch (flags) { > > + case POKE_ALL: > > + /* If broadcasting, don't force all CPUs to update time. */ > > + smp_call_function_many(cpu_online_mask, handle_ipi, NULL, 1); > > + /* Fall thru */ > > + case DRY_RUN: > > + /* Do everything except actually sending IPI. */ > > + time = 0; > > + break; > > + case POKE_ANY: > > + cpu = cpumask_any_but(cpu_online_mask, cpu); > > + if (cpu >= nr_cpu_ids) { > > + time = -ENOENT; > > + break; > > + } > > + /* Fall thru */ > > + case POKE_SELF: > > + time = ktime_get(); > > + smp_call_function_single(cpu, handle_ipi, &time, 1); > > + break; > > + default: > > + time = -EINVAL; > > + } > > + > > + put_cpu(); > > + return time; > > +} > > + > > +static int __init __bench_ipi(unsigned long i, ktime_t *time, int flags) > > +{ > > + ktime_t t; > > + > > + *time = 0; > > + while (i--) { > > + t = send_ipi(flags); > > + if ((int) t < 0) > > + return (int) t; > > + > > + *time += t; > > + } > > + > > + return 0; > > +} > > + > > +static int __init bench_ipi(unsigned long times, int flags, > > + ktime_t *ipi, ktime_t *total) > > +{ > > + int ret; > > + > > + *total = ktime_get(); > > + ret = __bench_ipi(times, ipi, flags); > > + if (unlikely(ret)) > > + return ret; > > + > > + *total = ktime_get() - *total; > > + > > + return 0; > > +} > > + > > +static int __init init_bench_ipi(void) > > +{ > > + ktime_t ipi, total; > > + int ret; > > + > > + ret = bench_ipi(NTIMES, DRY_RUN, &ipi, &total); > > + if (ret) > > + pr_err("Dry-run FAILED: %d\n", ret); > > + else > > + pr_err("Dry-run: %18llu, %18llu ns\n", ipi, total); > > you do not use NTIMES here to calculate the average value. Is that intended? I think, it's more visually to represent all results in number of dry-run times, like I did in patch description. So on kernel side I expose raw data and calculate final values after finishing tests. If you think that average values are preferable, I can do that in v2. Yury