Re: [PATCH RFC 3/5] KVM: s390: Shadow VSIE SCA in guest-1

Janosch Frank <frankja@xxxxxxxxxxxxx> · Wed, 19 Mar 2025 14:41:29 +0100

On 3/18/25 7:59 PM, Christoph Schlameuss wrote:
Introduce a new shadow_sca function into kvm_s390_handle_vsie.
kvm_s390_handle_vsie is called within guest-1 when guest-2 initiates the
VSIE.

shadow_sca and unshadow_sca create and manage ssca_block structs in
guest-1 memory. References to the created ssca_blocks are kept within an
array and limited to the number of cpus. This ensures each VSIE in
execution can have its own SCA. Having the amount of shadowed SCAs
configurable above this is left to another patch.

SCAOL/H in the VSIE control block are overwritten with references to the
shadow SCA. The original SCA pointer is saved in the vsie_page and
restored on VSIE exit. This limits the amount of change in the
preexisting VSIE pin and shadow functions.

The shadow SCA contains the addresses of the original guest-3 SCA as
well as the original VSIE control blocks. With these addresses the
machine can directly monitor the intervention bits within the original
SCA entries.

The ssca_blocks are also kept within a radix tree to reuse already
existing ssca_blocks efficiently. While the radix tree and array with
references to the ssca_blocks are held in kvm_s390_vsie.
The use of the ssca_blocks is tracked using an ref_count on the block
itself.

No strict mapping between the guest-1 vcpu and guest-3 vcpu is enforced.
Instead each VSIE entry updates the shadow SCA creating a valid mapping
for all cpus currently in VSIE.

Signed-off-by: Christoph Schlameuss <schlameuss@xxxxxxxxxxxxx>
---
  arch/s390/include/asm/kvm_host.h |  22 +++-
  arch/s390/kvm/vsie.c             | 264 ++++++++++++++++++++++++++++++++++++++-
  2 files changed, 281 insertions(+), 5 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 0aca5fa01f3d772c3b3dd62a22134c0d4cb9dc22..4ab196caa9e79e4c4d295d23fed65e1a142e6ab1 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -29,6 +29,7 @@
  #define KVM_S390_BSCA_CPU_SLOTS 64
  #define KVM_S390_ESCA_CPU_SLOTS 248
  #define KVM_MAX_VCPUS 255
+#define KVM_S390_MAX_VCPUS 256

#define KVM_S390_SSCA_CPU_SLOTS 256

Yes, I'm aware, that ESCA and MAX_VCPUS are pretty confusing.
I'm searching for solutions but they might take a while.

  
  #define KVM_INTERNAL_MEM_SLOTS 1
  
@@ -137,13 +138,23 @@ struct esca_block {
  
  /*
   * The shadow sca / ssca needs to cover both bsca and esca depending on what the
- * guest uses so we use KVM_S390_ESCA_CPU_SLOTS.
+ * guest uses so we allocate space for 256 entries that are defined in the
+ * architecture.
   * The header part of the struct must not cross page boundaries.
   */
  struct ssca_block {
  	__u64	osca;
  	__u64	reserved08[7];
-	struct ssca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
+	struct ssca_entry cpu[KVM_S390_MAX_VCPUS];

This should have been resolved in the previous patch, no?

+};
+
+/*
+ * Store the vsie ssca block and accompanied management data.
+ */
+struct ssca_vsie {
+	struct ssca_block ssca;			/* 0x0000 */
+	__u8	reserved[0x2200 - 0x2040];	/* 0x2040 */
+	atomic_t ref_count;			/* 0x2200 */
  };
  

[...]

  void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
  				 unsigned long end)
  {
@@ -699,6 +932,9 @@ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  
  	hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
  	if (hpa) {
+		/* with vsie_sigpif scaoh/l was pointing to g1 ssca_block but
+		 * should have been reset in unshadow_sca()
+		 */

There shouldn't be text in the first or last line of multi-line comments.

  		unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
  		vsie_page->sca_gpa = 0;
  		scb_s->scaol = 0;
@@ -775,6 +1011,9 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  		if (rc)
  			goto unpin;
  		vsie_page->sca_gpa = gpa;
+		/* with vsie_sigpif scaoh and scaol will be overwritten
+		 * in shadow_sca to point to g1 ssca_block instead
+		 */

Same

  		scb_s->scaoh = (u32)((u64)hpa >> 32);
  		scb_s->scaol = (u32)(u64)hpa;
  	}
@@ -1490,12 +1729,17 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
  		goto out_unpin_scb;
  	rc = pin_blocks(vcpu, vsie_page);
  	if (rc)
-		goto out_unshadow;
+		goto out_unshadow_scb;
+	rc = shadow_sca(vcpu, vsie_page);
+	if (rc)
+		goto out_unpin_blocks;
  	register_shadow_scb(vcpu, vsie_page);
  	rc = vsie_run(vcpu, vsie_page);
  	unregister_shadow_scb(vcpu);

For personal preference I'd like to have a \n here to visually separate 
the cleanup from the rest of the code.

+	unshadow_sca(vcpu, vsie_page);
+out_unpin_blocks:
  	unpin_blocks(vcpu, vsie_page);
-out_unshadow:
+out_unshadow_scb:
  	unshadow_scb(vcpu, vsie_page);
  out_unpin_scb:
  	unpin_scb(vcpu, vsie_page, scb_addr);
@@ -1510,12 +1754,15 @@ void kvm_s390_vsie_init(struct kvm *kvm)
  {
  	mutex_init(&kvm->arch.vsie.mutex);
  	INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT);
+	init_rwsem(&kvm->arch.vsie.ssca_lock);
+	INIT_RADIX_TREE(&kvm->arch.vsie.osca_to_ssca, GFP_KERNEL_ACCOUNT);
  }
  
  /* Destroy the vsie data structures. To be called when a vm is destroyed. */
  void kvm_s390_vsie_destroy(struct kvm *kvm)
  {
  	struct vsie_page *vsie_page;
+	struct ssca_vsie *ssca;
  	int i;
  
  	mutex_lock(&kvm->arch.vsie.mutex);
@@ -1531,6 +1778,17 @@ void kvm_s390_vsie_destroy(struct kvm *kvm)
  	}
  	kvm->arch.vsie.page_count = 0;
  	mutex_unlock(&kvm->arch.vsie.mutex);
+
+	down_write(&kvm->arch.vsie.ssca_lock);
+	for (i = 0; i < kvm->arch.vsie.ssca_count; i++) {
+		ssca = kvm->arch.vsie.sscas[i];
+		kvm->arch.vsie.sscas[i] = NULL;
+		radix_tree_delete(&kvm->arch.vsie.osca_to_ssca,
+				  (u64)phys_to_virt(ssca->ssca.osca));
+		free_pages((unsigned long)ssca, SSCA_PAGEORDER);
+	}
+	kvm->arch.vsie.ssca_count = 0;
+	up_write(&kvm->arch.vsie.ssca_lock);
  }
  
  void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)