On 7/1/22 18:25, Pierre Morel wrote: > We report a topology change to the guest for any CPU hotplug. > > The reporting to the guest is done using the Multiprocessor > Topology-Change-Report (MTCR) bit of the utility entry in the guest's > SCA which will be cleared during the interpretation of PTF. > > On every vCPU creation we set the MCTR bit to let the guest know the > next time he uses the PTF with command 2 instruction that the> topology changed and that he should use the STSI(15.1.x) instruction s/he/it (twice) > to get the topology details. > > STSI(15.1.x) gives information on the CPU configuration topology. > Let's accept the interception of STSI with the function code 15 and > let the userland part of the hypervisor handle it when userland > support the CPU Topology facility.And the user STSI capability. Also: supportS. > > Signed-off-by: Pierre Morel <pmorel@xxxxxxxxxxxxx> > Reviewed-by: Nico Boehr <nrb@xxxxxxxxxxxxx> > --- > arch/s390/include/asm/kvm_host.h | 18 +++++++++++++--- > arch/s390/kvm/kvm-s390.c | 36 ++++++++++++++++++++++++++++++++ > arch/s390/kvm/priv.c | 16 ++++++++++---- > arch/s390/kvm/vsie.c | 8 +++++++ > 4 files changed, 71 insertions(+), 7 deletions(-) > > diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h > index 766028d54a3e..ae6bd3d607de 100644 > --- a/arch/s390/include/asm/kvm_host.h > +++ b/arch/s390/include/asm/kvm_host.h > @@ -93,19 +93,30 @@ union ipte_control { > }; > }; > [...] > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > index 8fcb56141689..ee59b03f2e45 100644 > --- a/arch/s390/kvm/kvm-s390.c > +++ b/arch/s390/kvm/kvm-s390.c > @@ -1691,6 +1691,31 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) > return ret; > } > > +/** > + * kvm_s390_update_topology_change_report - update CPU topology change report > + * @kvm: guest KVM description > + * @val: set or clear the MTCR bit > + * > + * Updates the Multiprocessor Topology-Change-Report bit to signal > + * the guest with a topology change. > + * This is only relevant if the topology facility is present. > + * > + * The SCA version, bsca or esca, doesn't matter as offset is the same. > + */ > +static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) > +{ > + struct bsca_block *sca = kvm->arch.sca; > + union sca_utility new, old; > + > + read_lock(&kvm->arch.sca_lock); You forgot to put the assignment of sca under the lock. > + do { > + old = READ_ONCE(sca->utility); > + new = old; > + new.mtcr = val; > + } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val); > + read_unlock(&kvm->arch.sca_lock); > +} > + > static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) > { > int ret; > @@ -2877,6 +2902,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) > kvm_clear_async_pf_completion_queue(vcpu); > if (!kvm_is_ucontrol(vcpu->kvm)) > sca_del_vcpu(vcpu); > + kvm_s390_update_topology_change_report(vcpu->kvm, 1); > > if (kvm_is_ucontrol(vcpu->kvm)) > gmap_remove(vcpu->arch.gmap); > @@ -3272,6 +3298,14 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) > vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; > if (test_kvm_facility(vcpu->kvm, 9)) > vcpu->arch.sie_block->ecb |= ECB_SRSI; > + /* > + * CPU Topology > + * This facility only uses the utility field of the SCA and none > + * of the cpu entries that are problematic with the other > + * interpretation facilities so we can pass it through. > + */ This is the comment for vsie.c > + if (test_kvm_facility(vcpu->kvm, 11)) > + vcpu->arch.sie_block->ecb |= ECB_PTF; > if (test_kvm_facility(vcpu->kvm, 73)) > vcpu->arch.sie_block->ecb |= ECB_TE; > if (!kvm_is_ucontrol(vcpu->kvm)) > @@ -3403,6 +3437,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) > rc = kvm_s390_vcpu_setup(vcpu); > if (rc) > goto out_ucontrol_uninit; > + > + kvm_s390_update_topology_change_report(vcpu->kvm, 1); > return 0; > > out_ucontrol_uninit: > diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c > index 12c464c7cddf..046afee1be94 100644 > --- a/arch/s390/kvm/priv.c > +++ b/arch/s390/kvm/priv.c > @@ -873,10 +873,13 @@ static int handle_stsi(struct kvm_vcpu *vcpu) > if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) > return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); > > - if (fc > 3) { > - kvm_s390_set_psw_cc(vcpu, 3); > - return 0; > - } > + /* Bailout forbidden function codes */ > + if (fc > 3 && (fc != 15 || kvm_s390_pv_cpu_is_protected(vcpu))) > + goto out_no_data; > + > + /* fc 15 is provided with PTF/CPU topology support */ > + if (fc == 15 && !test_kvm_facility(vcpu->kvm, 11)) > + goto out_no_data; > > if (vcpu->run->s.regs.gprs[0] & 0x0fffff00 > || vcpu->run->s.regs.gprs[1] & 0xffff0000) > @@ -910,6 +913,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu) > goto out_no_data; > handle_stsi_3_2_2(vcpu, (void *) mem); > break; > + case 15: /* fc 15 is fully handled in userspace */ > + if (vcpu->kvm->arch.user_stsi) > + insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2); > + trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); > + return -EREMOTE; This doesn't look right to me, you still return -EREMOTE if user_stsi is false. The way I read the PoP here is that it is ok to set condition code 3 for the else case > } > if (kvm_s390_pv_cpu_is_protected(vcpu)) { > memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem, > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > index dada78b92691..94138f8f0c1c 100644 > --- a/arch/s390/kvm/vsie.c > +++ b/arch/s390/kvm/vsie.c > @@ -503,6 +503,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > /* Host-protection-interruption introduced with ESOP */ > if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP)) > scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT; > + /* > + * CPU Topology > + * This facility only uses the utility field of the SCA and none of > + * the cpu entries that are problematic with the other interpretation > + * facilities so we can pass it through > + */ > + if (test_kvm_facility(vcpu->kvm, 11)) > + scb_s->ecb |= scb_o->ecb & ECB_PTF; > /* transactional execution */ > if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) { > /* remap the prefix is tx is toggled on */