On Wednesday, October 26, 2022 11:44 PM, Sean Christopherson wrote: > > I think it would be better to do the thread pinning at the time when > > the thread is created by providing a pthread_attr_t attr, e.g. : > > > > pthread_attr_t attr; > > > > CPU_SET(vcpu->pcpu, &cpu_set); > > pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpu_set); > > pthread_create(thread, attr,...); > > > > Also, pinning a vCPU thread to a pCPU is a general operation which > > other users would need. I think we could make it more general and put > > it to kvm_util. > > We could, but it taking advantage of the pinning functionality would require > plumbing a command line option for every test, I think we could make this "pinning" be optional (no need to force everyone to use it). > or alternatively adding partial > command line parsing with a "hidden" global struct to kvm_selftest_init(), > though handling error checking for a truly generic case would be a mess. Either > way, extending pinning to other tests would require non-trivial effort, and can be > done on top of this series. > > That said, it's also trival to extract the pinning helpers to common code, and I > can't think of any reason not to do that straightaway. > > Vipin, any objection to squashing the below diff with patch 5? > > > e.g. adding it to the helper function that I'm trying to create > > If we go this route in the future, we'd need to add a worker trampoline as the > pinning needs to happen in the worker task itself to guarantee that the pinning > takes effect before the worker does anything useful. That should be very > doable. The alternative way is the one I shared before, using this: /* Thread created with attribute ATTR will be limited to run only on the processors represented in CPUSET. */ extern int pthread_attr_setaffinity_np (pthread_attr_t *__attr, size_t __cpusetsize, const cpu_set_t *__cpuset) Basically, the thread is created on the pCPU as user specified. I think this is better than "creating the thread on an arbitrary pCPU and then pinning it to the user specified pCPU in the thread's start routine". > > I do like the idea of extending __vcpu_thread_create(), but we can do that once > __vcpu_thread_create() lands to avoid further delaying this series. Sounds good. I can move some of those to vcpu_thread_create() once it's ready later. > struct perf_test_args { > @@ -43,8 +41,12 @@ struct perf_test_args { > bool nested; > /* True if all vCPUs are pinned to pCPUs */ > bool pin_vcpus; > + /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */ > + uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS]; How about putting the pcpu id to "struct kvm_vcpu"? (please see below code posed to shows how that works). This is helpful when we later make this more generic, as kvm_vcpu is used by everyone. Probably we also don't need "bool pin_vcpus". We could initialize pcpu_id to -1 to indicate that the vcpu doesn't need pinning (this is also what I meant above optional for other users). Put the whole changes together (tested and worked fine), FYI: diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index b30500cc197e..2829c98078d0 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -304,7 +304,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) int nr_vcpus = params->nr_vcpus; vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, - params->backing_src, !overlap_memory_access); + params->backing_src, !overlap_memory_access, NULL); perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main); diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index dcdb6964b1dc..e19c3ce32c62 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -286,7 +286,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) int r, i; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, - p->src_type, p->partition_vcpu_memory_access); + p->src_type, p->partition_vcpu_memory_access, NULL); demand_paging_size = get_backing_src_pagesz(p->src_type); diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 35504b36b126..cbe7de28e094 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -132,6 +132,7 @@ struct test_params { bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; + char *pcpu_list; }; static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) @@ -223,7 +224,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, p->slots, p->backing_src, - p->partition_vcpu_memory_access); + p->partition_vcpu_memory_access, + p->pcpu_list); perf_test_set_wr_fract(vm, p->wr_fract); @@ -401,13 +403,13 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - const char *pcpu_list = NULL; struct test_params p = { .iterations = TEST_HOST_LOOP_N, .wr_fract = 1, .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, + .pcpu_list = NULL, }; int opt; @@ -424,7 +426,7 @@ int main(int argc, char *argv[]) guest_percpu_mem_size = parse_size(optarg); break; case 'c': - pcpu_list = optarg; + p.pcpu_list = optarg; break; case 'e': /* 'e' is for evil. */ @@ -471,9 +473,6 @@ int main(int argc, char *argv[]) } } - if (pcpu_list) - perf_test_setup_pinning(pcpu_list, nr_vcpus); - TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); pr_info("Test iterations: %"PRIu64"\n", p.iterations); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index c9286811a4cb..d403b374bae5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -7,7 +7,11 @@ #ifndef SELFTEST_KVM_UTIL_H #define SELFTEST_KVM_UTIL_H +#include <pthread.h> + #include "kvm_util_base.h" #include "ucall_common.h" +void kvm_parse_vcpu_pinning(struct kvm_vcpu **vcpu, int nr_vcpus, const char *pcpus_string); + #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index e42a09cd24a0..79867a478a81 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -47,6 +47,7 @@ struct userspace_mem_region { struct kvm_vcpu { struct list_head list; uint32_t id; + int pcpu_id; int fd; struct kvm_vm *vm; struct kvm_run *run; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index ccfe3b9dc6bd..81428022bdb5 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -52,7 +52,8 @@ extern struct perf_test_args perf_test_args; struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, uint64_t vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, - bool partition_vcpu_memory_access); + bool partition_vcpu_memory_access, + char *pcpu_list); void perf_test_destroy_vm(struct kvm_vm *vm); void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f1cb1627161f..8acee6d4ccbe 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1114,6 +1114,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu->vm = vm; vcpu->id = vcpu_id; + vcpu->pcpu_id = -1; vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); @@ -2021,3 +2022,58 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, break; } } + +void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +{ + cpu_set_t mask; + int r; + + CPU_ZERO(&mask); + CPU_SET(pcpu, &mask); + r = sched_setaffinity(0, sizeof(mask), &mask); + TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); +} + +static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) +{ + uint32_t pcpu = atoi_non_negative(cpu_str); + + TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), + "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); + return pcpu; +} + +void kvm_parse_vcpu_pinning(struct kvm_vcpu **vcpu, int nr_vcpus, const char *pcpus_string) +{ + cpu_set_t allowed_mask; + char *cpu, *cpu_list; + char delim[2] = ","; + int i, r; + + if (!pcpus_string) + return; + + cpu_list = strdup(pcpus_string); + TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); + + r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); + TEST_ASSERT(!r, "sched_getaffinity() failed"); + + cpu = strtok(cpu_list, delim); + + /* 1. Get all pcpus for vcpus. */ + for (i = 0; i < nr_vcpus; i++) { + TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); + vcpu[i]->pcpu_id = parse_pcpu(cpu, &allowed_mask); + cpu = strtok(NULL, delim); + } + + /* 2. Check if the main worker needs to be pinned. */ + if (cpu) { + kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + cpu = strtok(NULL, delim); + } + + TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); + free(cpu_list); +} diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 520d1f896d61..95166c5a77f7 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -112,7 +112,8 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, uint64_t vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, - bool partition_vcpu_memory_access) + bool partition_vcpu_memory_access, + char *pcpu_list) { struct perf_test_args *pta = &perf_test_args; struct kvm_vm *vm; @@ -157,7 +158,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, */ vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, perf_test_guest_code, vcpus); - + kvm_parse_vcpu_pinning(vcpus, nr_vcpus, pcpu_list); pta->vm = vm; /* Put the test region at the top guest physical memory. */ @@ -284,17 +285,29 @@ void perf_test_start_vcpu_threads(int nr_vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)) { int i; + pthread_attr_t attr, *attr_p; + cpu_set_t cpuset; vcpu_thread_fn = vcpu_fn; WRITE_ONCE(all_vcpu_threads_running, false); for (i = 0; i < nr_vcpus; i++) { struct vcpu_thread *vcpu = &vcpu_threads[i]; + attr_p = NULL; vcpu->vcpu_idx = i; WRITE_ONCE(vcpu->running, false); - pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu); + if (vcpus[i]->pcpu_id != -1) { + CPU_ZERO(&cpuset); + CPU_SET(vcpus[i]->pcpu_id, &cpuset); + pthread_attr_init(&attr); + pthread_attr_setaffinity_np(&attr, + sizeof(cpu_set_t), &cpuset); + attr_p = &attr; + } + + pthread_create(&vcpu->thread, attr_p, vcpu_thread_main, vcpu); } for (i = 0; i < nr_vcpus; i++) { diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 9968800ec2ec..5dbe09537b2d 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -99,7 +99,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, - p->partition_vcpu_memory_access); + p->partition_vcpu_memory_access, NULL); pr_info("Finished creating vCPUs\n");