On Tue Mar 18, 2025 at 7:59 PM CET, Christoph Schlameuss wrote: [...] > diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h > index 0aca5fa01f3d772c3b3dd62a22134c0d4cb9dc22..4ab196caa9e79e4c4d295d23fed65e1a142e6ab1 100644 > --- a/arch/s390/include/asm/kvm_host.h > +++ b/arch/s390/include/asm/kvm_host.h [...] > +static struct ssca_vsie *get_ssca(struct kvm *kvm, struct vsie_page *vsie_page) > +{ > + u64 sca_o_hva = vsie_page->sca_o; > + phys_addr_t sca_o_hpa = virt_to_phys((void *)sca_o_hva); > + struct ssca_vsie *ssca, *ssca_new = NULL; > + > + /* get existing ssca */ > + down_read(&kvm->arch.vsie.ssca_lock); > + ssca = get_existing_ssca(kvm, sca_o_hva); > + up_read(&kvm->arch.vsie.ssca_lock); > + if (ssca) > + return ssca; I would assume this is the most common case, no? And below only happens rarely, right? > + /* > + * Allocate new ssca, it will likely be needed below. > + * We want at least #online_vcpus shadows, so every VCPU can execute the > + * VSIE in parallel. (Worst case all single core VMs.) > + */ > + if (kvm->arch.vsie.ssca_count < atomic_read(&kvm->online_vcpus)) { > + BUILD_BUG_ON(offsetof(struct ssca_block, cpu) != 64); > + BUILD_BUG_ON(offsetof(struct ssca_vsie, ref_count) != 0x2200); > + BUILD_BUG_ON(sizeof(struct ssca_vsie) > ((1UL << SSCA_PAGEORDER)-1) * PAGE_SIZE); > + ssca_new = (struct ssca_vsie *)__get_free_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, > + SSCA_PAGEORDER); > + if (!ssca_new) { > + ssca = ERR_PTR(-ENOMEM); > + goto out; > + } > + init_ssca(vsie_page, ssca_new); > + } > + > + /* enter write lock and recheck to make sure ssca has not been created by other cpu */ > + down_write(&kvm->arch.vsie.ssca_lock); I am wondering whether it's really worth having this optimization of trying to avoid taking the lock? Maybe we can accept a bit of contention on the rwlock since it shouldn't happen very often and keep the code a bit less complex?