This test runs kfree_rcu in a loop to measure performance of the new kfree_rcu, with and without patch. To see improvement, run with boot parameters: rcuperf.kfree_loops=2000 rcuperf.kfree_alloc_num=100 rcuperf.perf_type=kfree Without patch, test runs in 6.9 seconds. With patch, test runs in 6.1 seconds (+13% improvement) If it is desired to run the test but with the traditional (non-batched) kfree_rcu, for example to compare results, then you could pass along the rcuperf.kfree_no_batch=1 boot parameter. Cc: max.byungchul.park@xxxxxxxxx Cc: byungchul.park@xxxxxxx Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx> --- kernel/rcu/rcuperf.c | 169 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 168 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 7a6890b23c5f..34658760da5e 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -89,7 +89,7 @@ torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable static char *perf_type = "rcu"; module_param(perf_type, charp, 0444); -MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, ...)"); +MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, kfree,...)"); static int nrealreaders; static int nrealwriters; @@ -592,6 +592,170 @@ rcu_perf_shutdown(void *arg) return -EINVAL; } +/* + * kfree_rcu performance tests: Start a kfree_rcu loop on all CPUs for number + * of iterations and measure total time for all iterations to complete. + */ + +torture_param(int, kfree_nthreads, -1, "Number of RCU reader threads"); +torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done by a thread"); +torture_param(int, kfree_alloc_size, 16, "Size of each allocation"); +torture_param(int, kfree_loops, 10, "Size of each allocation"); +torture_param(int, kfree_no_batch, 0, "Use the non-batching (slower) version of kfree_rcu"); + +static struct task_struct **kfree_reader_tasks; +static int kfree_nrealthreads; +static atomic_t n_kfree_perf_thread_started; +static atomic_t n_kfree_perf_thread_ended; + +#define KFREE_OBJ_BYTES 8 + +struct kfree_obj { + char kfree_obj[KFREE_OBJ_BYTES]; + struct rcu_head rh; +}; + +void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func); + +static int +kfree_perf_thread(void *arg) +{ + int i, l = 0; + long me = (long)arg; + struct kfree_obj **alloc_ptrs; + u64 start_time, end_time; + + VERBOSE_PERFOUT_STRING("kfree_perf_thread task started"); + set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); + set_user_nice(current, MAX_NICE); + atomic_inc(&n_kfree_perf_thread_started); + + alloc_ptrs = (struct kfree_obj **)kmalloc(sizeof(struct kfree_obj *) * kfree_alloc_num, + GFP_KERNEL); + if (!alloc_ptrs) + return -ENOMEM; + + start_time = ktime_get_mono_fast_ns(); + do { + for (i = 0; i < kfree_alloc_num; i++) { + alloc_ptrs[i] = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL); + if (!alloc_ptrs[i]) + return -ENOMEM; + } + + for (i = 0; i < kfree_alloc_num; i++) { + if (!kfree_no_batch) { + kfree_rcu(alloc_ptrs[i], rh); + } else { + rcu_callback_t cb; + + cb = (rcu_callback_t)(unsigned long)offsetof(struct kfree_obj, rh); + kfree_call_rcu_nobatch(&(alloc_ptrs[i]->rh), cb); + } + } + + schedule_timeout_uninterruptible(2); + } while (!torture_must_stop() && ++l < kfree_loops); + + kfree(alloc_ptrs); + + if (atomic_inc_return(&n_kfree_perf_thread_ended) >= kfree_nrealthreads) { + end_time = ktime_get_mono_fast_ns(); + pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d\n", + (unsigned long long)(end_time - start_time), kfree_loops); + if (shutdown) { + smp_mb(); /* Assign before wake. */ + wake_up(&shutdown_wq); + } + } + + torture_kthread_stopping("kfree_perf_thread"); + return 0; +} + +static void +kfree_perf_cleanup(void) +{ + int i; + + if (torture_cleanup_begin()) + return; + + if (kfree_reader_tasks) { + for (i = 0; i < kfree_nrealthreads; i++) + torture_stop_kthread(kfree_perf_thread, + kfree_reader_tasks[i]); + kfree(kfree_reader_tasks); + } + + torture_cleanup_end(); +} + +/* + * shutdown kthread. Just waits to be awakened, then shuts down system. + */ +static int +kfree_perf_shutdown(void *arg) +{ + do { + wait_event(shutdown_wq, + atomic_read(&n_kfree_perf_thread_ended) >= + kfree_nrealthreads); + } while (atomic_read(&n_kfree_perf_thread_ended) < kfree_nrealthreads); + + smp_mb(); /* Wake before output. */ + + kfree_perf_cleanup(); + kernel_power_off(); + return -EINVAL; +} + +static int __init +kfree_perf_init(void) +{ + long i; + int firsterr = 0; + + if (!torture_init_begin("kfree_perf", verbose)) + return -EBUSY; + + kfree_nrealthreads = compute_real(kfree_nthreads); + /* Start up the kthreads. */ + if (shutdown) { + init_waitqueue_head(&shutdown_wq); + firsterr = torture_create_kthread(kfree_perf_shutdown, NULL, + shutdown_task); + if (firsterr) + goto unwind; + schedule_timeout_uninterruptible(1); + } + + kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]), + GFP_KERNEL); + if (kfree_reader_tasks == NULL) { + firsterr = -ENOMEM; + goto unwind; + } + + for (i = 0; i < kfree_nrealthreads; i++) { + firsterr = torture_create_kthread(kfree_perf_thread, (void *)i, + kfree_reader_tasks[i]); + if (firsterr) + goto unwind; + } + + while (atomic_read(&n_kfree_perf_thread_started) < kfree_nrealthreads) + schedule_timeout_uninterruptible(1); + + torture_init_end(); + return 0; + +unwind: + torture_init_end(); + kfree_perf_cleanup(); + return firsterr; +} + static int __init rcu_perf_init(void) { @@ -601,6 +765,9 @@ rcu_perf_init(void) &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, }; + if (strcmp(perf_type, "kfree") == 0) + return kfree_perf_init(); + if (!torture_init_begin(perf_type, verbose)) return -EBUSY; -- 2.22.0.770.g0f2c4a37fd-goog