[PATCH v3 34/49] i386/sev: Add KVM_EXIT_VMGEXIT handling for Page State Changes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When running SEV-SNP guests, the kernel may forward some subset of
VMGEXIT-based guest hypercalls to userspace. One of these is for Page
State Change requests, as documented by the GHCB specification[1].

Userspace does not directly have control over the SNP RMP table to
actually satisfy these requests, but will instead make use of the
kvm_convert_memory() interface, which makes use of the
KVM_SET_MEMORY_ATTRIBUTES ioctl to instruct KVM to map these these
GPAs using private/shared memory and make the appropriate RMP changes
via the associated kernel hooks.

Add the basic infrastructure for handling KVM_EXIT_VMGEXIT events, and
then implement handling for Page State Change requests on top of that.

[1] https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56421.pdf

Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
---
 target/i386/kvm/kvm.c |   3 +
 target/i386/sev.c     | 152 ++++++++++++++++++++++++++++++++++++++++++
 target/i386/sev.h     |   2 +
 3 files changed, 157 insertions(+)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 59e9048e61..22eb21a2f3 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -5409,6 +5409,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         ret = kvm_xen_handle_exit(cpu, &run->xen);
         break;
 #endif
+    case KVM_EXIT_VMGEXIT:
+        ret = kvm_handle_vmgexit(run);
+        break;
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 0c8e4bdb4c..0c6a253138 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1423,6 +1423,158 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
     return ret;
 }
 
+typedef struct __attribute__((__packed__)) PscHdr {
+    uint16_t cur_entry;
+    uint16_t end_entry;
+    uint32_t reserved;
+} PscHdr;
+
+typedef struct __attribute__((__packed__)) PscEntry {
+    uint64_t cur_page    : 12,
+             gfn         : 40,
+             operation   : 4,
+             pagesize    : 1,
+             reserved    : 7;
+} PscEntry;
+
+#define VMGEXIT_PSC_MAX_ENTRY 253
+
+typedef struct __attribute__((__packed__)) SnpPscDesc {
+    PscHdr hdr;
+    PscEntry entries[VMGEXIT_PSC_MAX_ENTRY];
+} SnpPscDesc;
+
+static int next_contig_gpa_range(SnpPscDesc *desc, uint16_t *entries_processed,
+                                 hwaddr *gfn_base, int *gfn_count,
+                                 bool *range_to_private)
+{
+    int i;
+
+    *entries_processed = 0;
+    *gfn_base = 0;
+    *gfn_count = 0;
+    *range_to_private = false;
+
+    for (i = desc->hdr.cur_entry; i <= desc->hdr.end_entry; i++) {
+        PscEntry *entry = &desc->entries[i];
+        bool to_private = entry->operation == 1;
+        int page_count = entry->pagesize ? 512 : 1;
+
+        if (!*gfn_count) {
+            *range_to_private = to_private;
+            *gfn_base = entry->gfn;
+        }
+
+        /* When first non-adjacent entry is seen, report the previous range */
+        if (entry->gfn != *gfn_base + *gfn_count || (to_private != *range_to_private)) {
+            return 0;
+        }
+
+        *gfn_count += page_count;
+
+        /*
+         * Currently entry-specific PSC_ERROR_INVALID_ENTRY errors are not
+         * returned. Instead only the more general GENERIC/INVALID_HDR
+         * errors are returned. If support for PSC_ERROR_INVALID_ENTRY errors
+         * are added, this logic will need to be re-worked to either not
+         * increment entries_processed until the request is issued
+         * successfully, or to rewind it after failure. Guests don't
+         * currently do anything useful with entry-specific errors so vs.
+         * the other errors types so this is unlikely to be an issue in the
+         * meantime.
+         */
+        entry->cur_page = page_count;
+        *entries_processed += 1;
+    }
+
+    return *gfn_count ? 0 : -ENOENT;
+}
+
+#define GHCB_SHARED_BUF_SIZE    0x7f0
+#define PSC_ERROR_GENERIC       (0x100UL << 32)
+#define PSC_ERROR_INVALID_HDR   ((0x1UL << 32) | 1)
+#define PSC_ERROR_INVALID_ENTRY ((0x1UL << 32) | 2)
+#define PSC_ENTRY_COUNT_MAX     253
+
+static int kvm_handle_vmgexit_psc(__u64 shared_gpa, __u64 *psc_ret)
+{
+    hwaddr len = GHCB_SHARED_BUF_SIZE;
+    MemTxAttrs attrs = { 0 };
+    SnpPscDesc *desc;
+    void *ghcb_shared_buf;
+    uint8_t shared_buf[GHCB_SHARED_BUF_SIZE];
+    uint16_t entries_processed;
+    hwaddr gfn_base = 0;
+    int gfn_count = 0;
+    bool range_to_private;
+
+    *psc_ret = 0;
+    ghcb_shared_buf = address_space_map(&address_space_memory, shared_gpa,
+                                        &len, true, attrs);
+    if (len < GHCB_SHARED_BUF_SIZE) {
+        g_warning("unable to map entire shared GHCB buffer, mapped size %ld (expected %d)",
+                  len, GHCB_SHARED_BUF_SIZE);
+        *psc_ret = PSC_ERROR_GENERIC;
+        goto out_unmap;
+    }
+    memcpy(shared_buf, ghcb_shared_buf, GHCB_SHARED_BUF_SIZE);
+    address_space_unmap(&address_space_memory, ghcb_shared_buf, len, true, len);
+
+    desc = (SnpPscDesc *)shared_buf;
+
+    if (desc->hdr.end_entry >= PSC_ENTRY_COUNT_MAX) {
+        *psc_ret = PSC_ERROR_INVALID_HDR;
+        goto out_unmap;
+    }
+
+    /* No more entries left to process. */
+    if (desc->hdr.cur_entry > desc->hdr.end_entry) {
+        goto out_unmap;
+    }
+
+    while (!next_contig_gpa_range(desc, &entries_processed,
+                                  &gfn_base, &gfn_count, &range_to_private)) {
+        int ret = kvm_convert_memory(gfn_base * 0x1000, gfn_count * 0x1000,
+                                     range_to_private);
+        if (ret) {
+            *psc_ret = 0x100ULL << 32; /* Indicate interrupted processing */
+            g_warning("error doing memory conversion: %d", ret);
+            break;
+        }
+
+        desc->hdr.cur_entry += entries_processed;
+    }
+
+    ghcb_shared_buf = address_space_map(&address_space_memory, shared_gpa,
+                                        &len, true, attrs);
+    if (len < GHCB_SHARED_BUF_SIZE) {
+        g_warning("unable to map entire shared GHCB buffer, mapped size %ld (expected %d)",
+                  len, GHCB_SHARED_BUF_SIZE);
+        *psc_ret = PSC_ERROR_GENERIC;
+        goto out_unmap;
+    }
+    memcpy(ghcb_shared_buf, shared_buf, GHCB_SHARED_BUF_SIZE);
+out_unmap:
+    address_space_unmap(&address_space_memory, ghcb_shared_buf, len, true, len);
+
+    return 0;
+}
+
+int kvm_handle_vmgexit(struct kvm_run *run)
+{
+    int ret;
+
+    if (run->vmgexit.type == KVM_USER_VMGEXIT_PSC) {
+        ret = kvm_handle_vmgexit_psc(run->vmgexit.psc.shared_gpa,
+                                     &run->vmgexit.psc.ret);
+    } else {
+        warn_report("KVM: unknown vmgexit type: %d", run->vmgexit.type);
+        ret = -1;
+    }
+
+    return ret;
+}
+
 static char *
 sev_common_get_sev_device(Object *obj, Error **errp)
 {
diff --git a/target/i386/sev.h b/target/i386/sev.h
index 5dc4767b1e..5cbfc3365b 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -66,4 +66,6 @@ int sev_inject_launch_secret(const char *hdr, const char *secret,
 int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
 void sev_es_set_reset_vector(CPUState *cpu);
 
+int kvm_handle_vmgexit(struct kvm_run *run);
+
 #endif
-- 
2.25.1





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux