On Fri, Aug 19, 2022 at 02:07:37PM -0700, Vipin Sharma wrote: > Add command line options to run the vcpus and the main process on the > specific cpus on a host machine. This is useful as it provides > options to analyze performance based on the vcpus and dirty log worker > locations, like on the different numa nodes or on the same numa nodes. > > Link: https://lore.kernel.org/lkml/20220801151928.270380-1-vipinsh@xxxxxxxxxx > Signed-off-by: Vipin Sharma <vipinsh@xxxxxxxxxx> > Suggested-by: David Matlack <dmatlack@xxxxxxxxxx> > Suggested-by: Sean Christopherson <seanjc@xxxxxxxxxx> > Suggested-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > > v2: > - Removed -d option. > - One cpu list passed as option, cpus for vcpus, followed by > application thread cpu. > - Added paranoid cousin of atoi(). > > v1: https://lore.kernel.org/lkml/20220817152956.4056410-1-vipinsh@xxxxxxxxxx > > .../selftests/kvm/access_tracking_perf_test.c | 2 +- > .../selftests/kvm/demand_paging_test.c | 2 +- > .../selftests/kvm/dirty_log_perf_test.c | 89 +++++++++++++++++-- > .../selftests/kvm/include/perf_test_util.h | 3 +- > .../selftests/kvm/lib/perf_test_util.c | 32 ++++++- > .../kvm/memslot_modification_stress_test.c | 2 +- > 6 files changed, 116 insertions(+), 14 deletions(-) > > diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c > index 1c2749b1481a..9659462f4747 100644 > --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c > +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c > @@ -299,7 +299,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) > vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, > params->backing_src, !overlap_memory_access); > > - perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main); > + perf_test_start_vcpu_threads(nr_vcpus, NULL, vcpu_thread_main); > > pr_info("\n"); > access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); > diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c > index 779ae54f89c4..b9848174d6e7 100644 > --- a/tools/testing/selftests/kvm/demand_paging_test.c > +++ b/tools/testing/selftests/kvm/demand_paging_test.c > @@ -336,7 +336,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) > pr_info("Finished creating vCPUs and starting uffd threads\n"); > > clock_gettime(CLOCK_MONOTONIC, &start); > - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); > + perf_test_start_vcpu_threads(nr_vcpus, NULL, vcpu_worker); > pr_info("Started all vCPUs\n"); > > perf_test_join_vcpu_threads(nr_vcpus); > diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c > index f99e39a672d3..ace4ed954628 100644 > --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c > +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c > @@ -8,10 +8,12 @@ > * Copyright (C) 2020, Google, Inc. > */ > > +#define _GNU_SOURCE > #include <stdio.h> > #include <stdlib.h> > #include <time.h> > #include <pthread.h> > +#include <sched.h> > #include <linux/bitmap.h> > > #include "kvm_util.h" > @@ -132,6 +134,7 @@ struct test_params { > bool partition_vcpu_memory_access; > enum vm_mem_backing_src_type backing_src; > int slots; > + int *vcpu_to_lcpu; > }; > > static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) > @@ -248,7 +251,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) > for (i = 0; i < nr_vcpus; i++) > vcpu_last_completed_iteration[i] = -1; > > - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); > + perf_test_start_vcpu_threads(nr_vcpus, p->vcpu_to_lcpu, vcpu_worker); > > /* Allow the vCPUs to populate memory */ > pr_debug("Starting iteration %d - Populating\n", iteration); > @@ -348,12 +351,61 @@ static void run_test(enum vm_guest_mode mode, void *arg) > perf_test_destroy_vm(vm); > } > > +static int atoi_paranoid(const char *num_str) > +{ > + int num; > + char *end_ptr; > + > + errno = 0; > + num = (int)strtol(num_str, &end_ptr, 10); > + TEST_ASSERT(errno == 0, "Conversion error: %d\n", errno); > + TEST_ASSERT(num_str != end_ptr && *end_ptr == '\0', > + "Invalid number string.\n"); > + > + return num; > +} Introduce atoi_paranoid() and upgrade existing atoi() users in a separate commit. Also please put it in e.g. test_util.c so that it can be used by other tests (and consider upgrading other tests to use it in your commit). > + > +static int parse_cpu_list(const char *arg, int *lcpu_list, int list_size) > +{ > + char delim[2] = ","; > + char *cpu, *cpu_list; > + int i = 0, cpu_num; > + > + cpu_list = strdup(arg); > + TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); > + > + cpu = strtok(cpu_list, delim); > + while (cpu) { > + TEST_ASSERT(i != list_size, > + "Too many cpus, max supported: %d\n", list_size); > + > + cpu_num = atoi_paranoid(cpu); > + TEST_ASSERT(cpu_num >= 0, "Invalid cpu number: %d\n", cpu_num); > + lcpu_list[i++] = cpu_num; > + cpu = strtok(NULL, delim); > + } > + free(cpu_list); > + > + return i; > +} > + > +static void assign_dirty_log_perf_test_cpu(int cpu) > +{ > + cpu_set_t cpuset; > + int err; > + > + CPU_ZERO(&cpuset); > + CPU_SET(cpu, &cpuset); > + err = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset); > + TEST_ASSERT(err == 0, "Error in setting dirty log perf test cpu\n"); > +} > + > static void help(char *name) > { > puts(""); > printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " > "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" > - "[-x memslots]\n", name); > + "[-x memslots] [-c logical cpus to run test on]\n", name); > puts(""); > printf(" -i: specify iteration counts (default: %"PRIu64")\n", > TEST_HOST_LOOP_N); > @@ -383,6 +435,14 @@ static void help(char *name) > backing_src_help("-s"); > printf(" -x: Split the memory region into this number of memslots.\n" > " (default: 1)\n"); > + printf(" -c: Comma separated values of the logical CPUs, which will run\n" > + " the vCPUs, followed by the main application thread cpu.\n" > + " Number of values must be equal to the number of vCPUs + 1.\n\n" > + " Example: ./dirty_log_perf_test -v 3 -c 22,23,24,50\n" > + " This means that the vcpu 0 will run on the logical cpu 22,\n" > + " vcpu 1 on the logical cpu 23, vcpu 2 on the logical cpu 24\n" > + " and the main thread will run on cpu 50.\n" > + " (default: No cpu mapping)\n"); > puts(""); > exit(0); > } > @@ -390,14 +450,18 @@ static void help(char *name) > int main(int argc, char *argv[]) > { > int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); > + int lcpu_list[KVM_MAX_VCPUS + 1]; > + > struct test_params p = { > .iterations = TEST_HOST_LOOP_N, > .wr_fract = 1, > .partition_vcpu_memory_access = true, > .backing_src = DEFAULT_VM_MEM_SRC, > .slots = 1, > + .vcpu_to_lcpu = NULL, > }; > int opt; > + int nr_lcpus = -1; > > dirty_log_manual_caps = > kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); > @@ -406,8 +470,11 @@ int main(int argc, char *argv[]) > > guest_modes_append_default(); > > - while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) { > + while ((opt = getopt(argc, argv, "c:eghi:p:m:nb:f:v:os:x:")) != -1) { > switch (opt) { > + case 'c': > + nr_lcpus = parse_cpu_list(optarg, lcpu_list, KVM_MAX_VCPUS + 1); I think we should move all the logic to pin threads to perf_test_util.c. The only thing dirty_log_perf_test.c should do is pass optarg into perf_test_util.c. This will make it trivial for any other test based on pef_test_util.c to also use pinning. e.g. All a test needs to do to use pinning is add a flag to the optlist and add a case statement like: case 'c': perf_test_setup_pinning(optarg); break; perf_test_setup_pinning() would: - Parse the list and populate perf_test_vcpu_args with each vCPU's assigned pCPU. - Pin the current thread to it's assigned pCPU if one is provided. Validating that the number of pCPUs == number of vCPUs is a little tricky. But that could be done as part of perf_test_start_vcpu_threads(). Alternatively, you could set up pinning after getting the number of vCPUs. e.g. const char *cpu_list = NULL; ... while ((opt = getopt(...)) != -1) { switch (opt) { case 'c': cpu_list = optarg; // is grabbing optarg here safe? break; } ... } if (cpu_list) perf_test_setup_pinning(cpu_list, nr_vcpus); > + break; > case 'e': > /* 'e' is for evil. */ > run_vcpus_while_disabling_dirty_logging = true; > @@ -415,7 +482,7 @@ int main(int argc, char *argv[]) > dirty_log_manual_caps = 0; > break; > case 'i': > - p.iterations = atoi(optarg); > + p.iterations = atoi_paranoid(optarg); > break; > case 'p': > p.phys_offset = strtoull(optarg, NULL, 0); > @@ -430,12 +497,12 @@ int main(int argc, char *argv[]) > guest_percpu_mem_size = parse_size(optarg); > break; > case 'f': > - p.wr_fract = atoi(optarg); > + p.wr_fract = atoi_paranoid(optarg); > TEST_ASSERT(p.wr_fract >= 1, > "Write fraction cannot be less than one"); > break; > case 'v': > - nr_vcpus = atoi(optarg); > + nr_vcpus = atoi_paranoid(optarg); > TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, > "Invalid number of vcpus, must be between 1 and %d", max_vcpus); > break; > @@ -446,7 +513,7 @@ int main(int argc, char *argv[]) > p.backing_src = parse_backing_src_type(optarg); > break; > case 'x': > - p.slots = atoi(optarg); > + p.slots = atoi_paranoid(optarg); > break; > case 'h': > default: > @@ -455,6 +522,14 @@ int main(int argc, char *argv[]) > } > } > > + if (nr_lcpus != -1) { > + TEST_ASSERT(nr_lcpus == nr_vcpus + 1, > + "Number of logical cpus (%d) is not equal to the number of vcpus + 1 (%d).", > + nr_lcpus, nr_vcpus); > + assign_dirty_log_perf_test_cpu(lcpu_list[nr_vcpus]); > + p.vcpu_to_lcpu = lcpu_list; > + } > + > TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); > > pr_info("Test iterations: %"PRIu64"\n", p.iterations); > diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h > index eaa88df0555a..bd6c566cfc92 100644 > --- a/tools/testing/selftests/kvm/include/perf_test_util.h > +++ b/tools/testing/selftests/kvm/include/perf_test_util.h > @@ -53,7 +53,8 @@ void perf_test_destroy_vm(struct kvm_vm *vm); > > void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); > > -void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); > +void perf_test_start_vcpu_threads(int vcpus, int *vcpus_to_lcpu, > + void (*vcpu_fn)(struct perf_test_vcpu_args *)); > void perf_test_join_vcpu_threads(int vcpus); > void perf_test_guest_code(uint32_t vcpu_id); > > diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c > index 9618b37c66f7..771fbdf3d2c2 100644 > --- a/tools/testing/selftests/kvm/lib/perf_test_util.c > +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c > @@ -2,11 +2,14 @@ > /* > * Copyright (C) 2020, Google LLC. > */ > +#define _GNU_SOURCE > #include <inttypes.h> > > #include "kvm_util.h" > #include "perf_test_util.h" > #include "processor.h" > +#include <pthread.h> > +#include <sched.h> > > struct perf_test_args perf_test_args; > > @@ -260,10 +263,15 @@ static void *vcpu_thread_main(void *data) > return NULL; > } > > -void perf_test_start_vcpu_threads(int nr_vcpus, > +void perf_test_start_vcpu_threads(int nr_vcpus, int *vcpu_to_lcpu, > void (*vcpu_fn)(struct perf_test_vcpu_args *)) > { > - int i; > + int i, err = 0; > + pthread_attr_t attr; > + cpu_set_t cpuset; > + > + pthread_attr_init(&attr); > + CPU_ZERO(&cpuset); > > vcpu_thread_fn = vcpu_fn; > WRITE_ONCE(all_vcpu_threads_running, false); > @@ -274,7 +282,24 @@ void perf_test_start_vcpu_threads(int nr_vcpus, > vcpu->vcpu_idx = i; > WRITE_ONCE(vcpu->running, false); > > - pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu); > + if (vcpu_to_lcpu) { > + CPU_SET(vcpu_to_lcpu[i], &cpuset); > + > + err = pthread_attr_setaffinity_np(&attr, > + sizeof(cpu_set_t), > + &cpuset); > + TEST_ASSERT(err == 0, > + "vCPU %d could not be mapped to logical cpu %d, error returned: %d\n", > + i, vcpu_to_lcpu[i], err); > + > + CPU_CLR(vcpu_to_lcpu[i], &cpuset); > + } > + > + err = pthread_create(&vcpu->thread, &attr, vcpu_thread_main, > + vcpu); > + TEST_ASSERT(err == 0, > + "error in creating vcpu %d thread, error returned: %d\n", > + i, err); > } > > for (i = 0; i < nr_vcpus; i++) { > @@ -283,6 +308,7 @@ void perf_test_start_vcpu_threads(int nr_vcpus, > } > > WRITE_ONCE(all_vcpu_threads_running, true); > + pthread_attr_destroy(&attr); > } > > void perf_test_join_vcpu_threads(int nr_vcpus) > diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c > index 6ee7e1dde404..246f8cc7bb2b 100644 > --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c > +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c > @@ -103,7 +103,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) > > pr_info("Finished creating vCPUs\n"); > > - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); > + perf_test_start_vcpu_threads(nr_vcpus, NULL, vcpu_worker); > > pr_info("Started all vCPUs\n"); > > -- > 2.37.1.595.g718a3a8f04-goog >