On 24.10.19 13:40, Janosch Frank wrote:
Let's add a KVM interface to create and destroy protected VMs.
More details please.
[...]
+#ifdef CONFIG_KVM_S390_PROTECTED_VIRTUALIZATION_HOST
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+ int r = 0;
+ void __user *argp = (void __user *)cmd->data;
+
+ switch (cmd->cmd) {
+ case KVM_PV_VM_CREATE: {
+ r = kvm_s390_pv_alloc_vm(kvm);
+ if (r)
+ break;
So ... I can create multiple VMs?
Especially, I can call KVM_PV_VM_CREATE two times, setting
"kvm->arch.pv.stor_var = NULL and leaking memory" on the second call.
Not sure if that's desirable.
Shouldn't this be something like "KVM_PV_VM_INIT" and then make sure it
can only be called once?
+
+ mutex_lock(&kvm->lock);
+ kvm_s390_vcpu_block_all(kvm);
+ /* FMT 4 SIE needs esca */
+ r = sca_switch_to_extended(kvm);
+ if (!r)
+ r = kvm_s390_pv_create_vm(kvm);
+ kvm_s390_vcpu_unblock_all(kvm);
+ mutex_unlock(&kvm->lock);
+ break;
+ }
+ case KVM_PV_VM_DESTROY: {
+ /* All VCPUs have to be destroyed before this call. */
Then please verify that? "KVM_PV_VM_DEINIT"
Also, who guarantees that user space calls this at all? Why is that
needed? (IOW, when does user space call this?)
+ mutex_lock(&kvm->lock);
+ kvm_s390_vcpu_block_all(kvm);
+ r = kvm_s390_pv_destroy_vm(kvm);
+ if (!r)
+ kvm_s390_pv_dealloc_vm(kvm);
+ kvm_s390_vcpu_unblock_all(kvm);
+ mutex_unlock(&kvm->lock);
+ break;
+ }
+ case KVM_PV_VM_SET_SEC_PARMS: {
+ struct kvm_s390_pv_sec_parm parms = {};
+ void *hdr;
+
+ r = -EFAULT;
+ if (copy_from_user(&parms, argp, sizeof(parms)))
+ break;
+
+ /* Currently restricted to 8KB */
+ r = -EINVAL;
+ if (parms.length > PAGE_SIZE * 2)
+ break;
+
+ r = -ENOMEM;
+ hdr = vmalloc(parms.length);
+ if (!hdr)
+ break;
+
+ r = -EFAULT;
+ if (!copy_from_user(hdr, (void __user *)parms.origin,
+ parms.length))
+ r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length);
+
+ vfree(hdr);
+ break;
+ }
+ case KVM_PV_VM_UNPACK: {
+ struct kvm_s390_pv_unp unp = {};
+
+ r = -EFAULT;
+ if (copy_from_user(&unp, argp, sizeof(unp)))
+ break;
+
+ r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak);
+ break;
+ }
+ case KVM_PV_VM_VERIFY: {
+ u32 ret;
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_handle(kvm),
+ UVC_CMD_VERIFY_IMG,
+ &ret);
+ VM_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x",
+ ret >> 16, ret & 0x0000ffff);
+ break;
+ }
+ default:
+ return -ENOTTY;
+ }
+ return r;
+}
+#endif
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2254,6 +2351,22 @@ long kvm_arch_vm_ioctl(struct file *filp,
mutex_unlock(&kvm->slots_lock);
break;
}
+#ifdef CONFIG_KVM_S390_PROTECTED_VIRTUALIZATION_HOST
+ case KVM_S390_PV_COMMAND: {
+ struct kvm_pv_cmd args;
+
+ r = -EINVAL;
+ if (!is_prot_virt_host())
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+
+ r = kvm_s390_handle_pv(kvm, &args);
+ break;
+ }
+#endif
default:
r = -ENOTTY;
}
@@ -2529,6 +2642,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu);
+ if (IS_ENABLED(CONFIG_KVM_S390_PROTECTED_VIRTUALIZATION_HOST) &&
+ kvm_s390_pv_handle_cpu(vcpu))
+ kvm_s390_pv_destroy_cpu(vcpu);
free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu);
@@ -2555,8 +2671,13 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
sca_dispose(kvm);
- debug_unregister(kvm->arch.dbf);
kvm_s390_gisa_destroy(kvm);
+ if (IS_ENABLED(CONFIG_KVM_S390_PROTECTED_VIRTUALIZATION_HOST) &&
+ kvm_s390_pv_is_protected(kvm)) {
+ kvm_s390_pv_destroy_vm(kvm);
+ kvm_s390_pv_dealloc_vm(kvm);
+ }
+ debug_unregister(kvm->arch.dbf);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
@@ -2652,6 +2773,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
unsigned int vcpu_idx;
u32 scaol, scaoh;
+ if (kvm->arch.use_esca)
+ return 0;
+
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
if (!new_sca)
return -ENOMEM;
@@ -3073,6 +3197,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
goto out_free_sie_block;
+
+ if (kvm_s390_pv_is_protected(kvm)) {
+ rc = kvm_s390_pv_create_cpu(vcpu);
+ if (rc) {
+ kvm_vcpu_uninit(vcpu);
+ goto out_free_sie_block;
+ }
+ }
+
VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
@@ -4338,6 +4471,28 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
return -ENOIOCTLCMD;
}
+#ifdef CONFIG_KVM_S390_PROTECTED_VIRTUALIZATION_HOST
+static int kvm_s390_handle_pv_vcpu(struct kvm_vcpu *vcpu,
+ struct kvm_pv_cmd *cmd)
+{
+ int r = 0;
+
+ switch (cmd->cmd) {
+ case KVM_PV_VCPU_CREATE: {
+ r = kvm_s390_pv_create_cpu(vcpu);
+ break;
+ }
+ case KVM_PV_VCPU_DESTROY: {
+ r = kvm_s390_pv_destroy_cpu(vcpu);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ }
+ return r;
+}
+#endif
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4470,6 +4625,22 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
irq_state.len);
break;
}
+#ifdef CONFIG_KVM_S390_PROTECTED_VIRTUALIZATION_HOST
+ case KVM_S390_PV_COMMAND_VCPU: {
+ struct kvm_pv_cmd args;
+
+ r = -EINVAL;
+ if (!is_prot_virt_host())
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+
+ r = kvm_s390_handle_pv_vcpu(vcpu, &args);
+ break;
+ }
+#endif
default:
r = -ENOTTY;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 6d9448dbd052..0d61dcc51f0e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -196,6 +196,53 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
return kvm->arch.user_cpu_state_ctrl != 0;
}
+#ifdef CONFIG_KVM_S390_PROTECTED_VIRTUALIZATION_HOST
+/* implemented in pv.c */
+void kvm_s390_pv_unpin(struct kvm *kvm);
+void kvm_s390_pv_dealloc_vm(struct kvm *kvm);
+int kvm_s390_pv_alloc_vm(struct kvm *kvm);
+int kvm_s390_pv_create_vm(struct kvm *kvm);
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu);
+int kvm_s390_pv_destroy_vm(struct kvm *kvm);
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu);
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length);
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak);
+int kvm_s390_pv_verify(struct kvm *kvm);
+
+static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+ return !!kvm->arch.pv.handle;
+}
+
+static inline u64 kvm_s390_pv_handle(struct kvm *kvm)
+{
+ return kvm->arch.pv.handle;
+}
+
+static inline u64 kvm_s390_pv_handle_cpu(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pv.handle;
+}
+#else
+static inline void kvm_s390_pv_unpin(struct kvm *kvm) {}
+static inline void kvm_s390_pv_dealloc_vm(struct kvm *kvm) {}
+static inline int kvm_s390_pv_alloc_vm(struct kvm *kvm) { return 0; }
+static inline int kvm_s390_pv_create_vm(struct kvm *kvm) { return 0; }
+static inline int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu) { return 0; }
+static inline int kvm_s390_pv_destroy_vm(struct kvm *kvm) { return 0; }
+static inline int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu) { return 0; }
+static inline int kvm_s390_pv_set_sec_parms(struct kvm *kvm,
+ u64 origin, u64 length) { return 0; }
+static inline int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr,
+ unsigned long size, unsigned long tweak)
+{ return 0; }
+static inline int kvm_s390_pv_verify(struct kvm *kvm) { return 0; }
+static inline bool kvm_s390_pv_is_protected(struct kvm *kvm) { return 0; }
+static inline u64 kvm_s390_pv_handle(struct kvm *kvm) { return 0; }
+static inline u64 kvm_s390_pv_handle_cpu(struct kvm_vcpu *vcpu) { return 0; }
+#endif
+
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
new file mode 100644
index 000000000000..94cf16f40f25
--- /dev/null
+++ b/arch/s390/kvm/pv.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hosting Secure Execution virtual machines
+ *
+ * Copyright IBM Corp. 2019
+ * Author(s): Janosch Frank <frankja@xxxxxxxxxxxxx>
+ */
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pagemap.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+#include <asm/gmap.h>
+#include <asm/mman.h>
+#include "kvm-s390.h"
+
+void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
+{
+ vfree(kvm->arch.pv.stor_var);
+ free_pages(kvm->arch.pv.stor_base,
+ get_order(uv_info.guest_base_stor_len));
+ memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+}
+
+int kvm_s390_pv_alloc_vm(struct kvm *kvm)
+{
+ unsigned long base = uv_info.guest_base_stor_len;
+ unsigned long virt = uv_info.guest_virt_var_stor_len;
+ unsigned long npages = 0, vlen = 0;
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+
+ kvm->arch.pv.stor_var = NULL;
+ kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base));
+ if (!kvm->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /*
+ * Calculate current guest storage for allocation of the
+ * variable storage, which is based on the length in MB.
+ *
+ * Slots are sorted by GFN
+ */
+ mutex_lock(&kvm->slots_lock);
+ slots = kvm_memslots(kvm);
+ memslot = slots->memslots;
+ npages = memslot->base_gfn + memslot->npages;
What if
a) your guest has multiple memory slots
b) you hotplug memory and add memslots later
Do you dence that, and if so, how?
+
+ mutex_unlock(&kvm->slots_lock);
+ kvm->arch.pv.guest_len = npages * PAGE_SIZE;
+
+ /* Allocate variable storage */
+ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
I get the feeling that prot virt mainly consumes memory ;)
+ vlen += uv_info.guest_virt_base_stor_len;
+ kvm->arch.pv.stor_var = vzalloc(vlen);
+ if (!kvm->arch.pv.stor_var) {
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int kvm_s390_pv_destroy_vm(struct kvm *kvm)
+{
+ int rc;
+ u32 ret;
+
+ rc = uv_cmd_nodata(kvm_s390_pv_handle(kvm),
+ UVC_CMD_DESTROY_SEC_CONF, &ret);
+ VM_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x",
+ ret >> 16, ret & 0x0000ffff);
+ return rc;
+}
+
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu)
+{
+ int rc = 0;
+ u32 ret;
+
+ if (kvm_s390_pv_handle_cpu(vcpu)) {
+ rc = uv_cmd_nodata(kvm_s390_pv_handle_cpu(vcpu),
+ UVC_CMD_DESTROY_SEC_CPU,
+ &ret);
+
+ VCPU_EVENT(vcpu, 3, "PROTVIRT DESTROY VCPU: cpu %d rc %x rrc %x",
+ vcpu->vcpu_id, ret >> 16, ret & 0x0000ffff);
+ }
+
+ free_pages(vcpu->arch.pv.stor_base,
+ get_order(uv_info.guest_cpu_stor_len));
+ /* Clear cpu and vm handle */
+ memset(&vcpu->arch.sie_block->reserved10, 0,
+ sizeof(vcpu->arch.sie_block->reserved10));
+ memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
+ vcpu->arch.sie_block->sdf = 0;
+ return rc;
+}
+
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu)
+{
+ int rc;
+ struct uv_cb_csc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CPU,
+ .header.len = sizeof(uvcb),
+ };
+
+ /* EEXIST and ENOENT? */
+ if (kvm_s390_pv_handle_cpu(vcpu))
+ return -EINVAL;
+
+ vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL,
+ get_order(uv_info.guest_cpu_stor_len));
+ if (!vcpu->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /* Input */
+ uvcb.guest_handle = kvm_s390_pv_handle(vcpu->kvm);
+ uvcb.num = vcpu->arch.sie_block->icpua;
+ uvcb.state_origin = (u64)vcpu->arch.sie_block;
+ uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+
+ rc = uv_call(0, (u64)&uvcb);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
+ vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
+ uvcb.header.rrc);
+
+ /* Output */
+ vcpu->arch.pv.handle = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_handle(vcpu->kvm);
+ vcpu->arch.sie_block->sdf = 2;
+ if (!rc)
+ return 0;
+
+ kvm_s390_pv_destroy_cpu(vcpu);
+ return -EINVAL;
+}
+
+int kvm_s390_pv_create_vm(struct kvm *kvm)
+{
+ int rc;
+
+ struct uv_cb_cgc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CONF,
+ .header.len = sizeof(uvcb)
+ };
+
+ if (kvm_s390_pv_handle(kvm))
+ return -EINVAL;
+
+ /* Inputs */
+ uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
+ uvcb.guest_stor_len = kvm->arch.pv.guest_len;
+ uvcb.guest_asce = kvm->arch.gmap->asce;
+ uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+ uvcb.conf_var_stor_origin = (u64)kvm->arch.pv.stor_var;
+
+ rc = uv_call(0, (u64)&uvcb);
+ VM_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
+ uvcb.guest_handle, uvcb.guest_stor_len, uvcb.header.rc,
+ uvcb.header.rrc);
+
+ /* Outputs */
+ kvm->arch.pv.handle = uvcb.guest_handle;
+
+ if (rc && (uvcb.header.rc & 0x8000)) {
+ kvm_s390_pv_destroy_vm(kvm);
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -EINVAL;
+ }
+ return rc;
+}
+
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm,
+ void *hdr, u64 length)
+{
+ int rc;
+ struct uv_cb_ssc uvcb = {
+ .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
+ .header.len = sizeof(uvcb),
+ .sec_header_origin = (u64)hdr,
+ .sec_header_len = length,
+ .guest_handle = kvm_s390_pv_handle(kvm),
+ };
+
+ if (!kvm_s390_pv_handle(kvm))
+ return -EINVAL;
+
+ rc = uv_call(0, (u64)&uvcb);
+ VM_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
+ uvcb.header.rc, uvcb.header.rrc);
+ if (rc)
+ return -EINVAL;
+ return 0;
+}
+
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak)
+{
+ int i, rc = 0;
+ struct uv_cb_unp uvcb = {
+ .header.cmd = UVC_CMD_UNPACK_IMG,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_handle(kvm),
+ .tweak[0] = tweak
+ };
+
+ if (addr & ~PAGE_MASK || size & ~PAGE_MASK)
+ return -EINVAL;
+
+
+ VM_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
+ addr, size);
+ for (i = 0; i < size / PAGE_SIZE; i++) {
+ uvcb.gaddr = addr + i * PAGE_SIZE;
+ uvcb.tweak[1] = i * PAGE_SIZE;
+retry:
+ rc = uv_call(0, (u64)&uvcb);
+ if (!rc)
+ continue;
+ /* If not yet mapped fault and retry */
+ if (uvcb.header.rc == 0x10a) {
+ rc = gmap_fault(kvm->arch.gmap, uvcb.gaddr,
+ FAULT_FLAG_WRITE);
+ if (rc)
+ return rc;
+ goto retry;
+ }
+ VM_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx rc %x rrc %x",
+ uvcb.gaddr, uvcb.header.rc, uvcb.header.rrc);
+ break;
+ }
+ VM_EVENT(kvm, 3, "PROTVIRT VM UNPACK: finished with rc %x rrc %x",
+ uvcb.header.rc, uvcb.header.rrc);
+ return rc;
+}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 52641d8ca9e8..bb37d5710c89 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1000,6 +1000,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PMU_EVENT_FILTER 173
#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
+#define KVM_CAP_S390_PROTECTED 180
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1461,6 +1462,38 @@ struct kvm_enc_region {
/* Available with KVM_CAP_ARM_SVE */
#define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int)
+struct kvm_s390_pv_sec_parm {
+ __u64 origin;
+ __u64 length;
+};
+
+struct kvm_s390_pv_unp {
+ __u64 addr;
+ __u64 size;
+ __u64 tweak;
+};
+
+enum pv_cmd_id {
+ KVM_PV_VM_CREATE,
+ KVM_PV_VM_DESTROY,
+ KVM_PV_VM_SET_SEC_PARMS,
+ KVM_PV_VM_UNPACK,
+ KVM_PV_VM_VERIFY,
+ KVM_PV_VCPU_CREATE,
+ KVM_PV_VCPU_DESTROY,
+};
+
+struct kvm_pv_cmd {
+ __u32 cmd;
+ __u16 rc;
+ __u16 rrc;
+ __u64 data;
+};
+
+/* Available with KVM_CAP_S390_SE */
+#define KVM_S390_PV_COMMAND _IOW(KVMIO, 0xc3, struct kvm_pv_cmd)
+#define KVM_S390_PV_COMMAND_VCPU _IOW(KVMIO, 0xc4, struct kvm_pv_cmd)
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
This is a lengthy patch and I ahven't explored anything yet :)