On 7/31/20 3:25 PM, Sean Christopherson wrote:
On Fri, Jul 24, 2020 at 06:54:46PM -0500, eric van tassell wrote:Improve SEV guest startup time from O(n) to a constant by deferring guest page pinning until the pages are used to satisfy nested page faults. Implement the code to do the pinning (sev_get_page) and the notifier sev_set_spte_notify(). Track the pinned pages with xarray so they can be released during guest termination.I like that SEV is trying to be a better citizen, but this is trading one hack for another. - KVM goes through a lot of effort to ensure page faults don't need to allocate memory, and this throws all that effort out the window.
can you elaborate on that?
Not sure i understand. We do ignore mmio here. Can you detail a bit more what you see as problematic with the sync_page() flow?- Tracking all gfns in a separate database (from the MMU) is wasteful. - Having to wait to free pinned memory until the VM is destroyed is less than ideal. More thoughts in the next patch.Signed-off-by: eric van tassell <Eric.VanTassell@xxxxxxx> --- arch/x86/kvm/svm/sev.c | 71 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/svm/svm.h | 3 ++ 3 files changed, 76 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index f7f1f4ecf08e..040ae4aa7c5a 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -184,6 +184,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) sev->asid = asid; INIT_LIST_HEAD(&sev->regions_list);+ xa_init(&sev->pages_xarray);+ return 0;e_free:@@ -415,6 +417,42 @@ static unsigned long get_num_contig_pages(unsigned long idx, return pages; }+static int sev_get_page(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn)+{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct xarray *xa = &sev->pages_xarray; + struct page *page = pfn_to_page(pfn); + int ret; + + /* store page at index = gfn */ + ret = xa_insert(xa, gfn, page, GFP_ATOMIC); + if (ret == -EBUSY) { + /* + * If xa_insert returned -EBUSY, the gfn was already associated + * with a struct page *. + */ + struct page *cur_page; + + cur_page = xa_load(xa, gfn); + /* If cur_page == page, no change is needed, so return 0 */ + if (cur_page == page) + return 0; + + /* Release the page that was stored at index = gfn */ + put_page(cur_page); + + /* Return result of attempting to store page at index = gfn */ + ret = xa_err(xa_store(xa, gfn, page, GFP_ATOMIC)); + } + + if (ret) + return ret; + + get_page(page); + + return 0; +} + static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) { unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i; @@ -1085,6 +1123,8 @@ void sev_vm_destroy(struct kvm *kvm) struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct list_head *head = &sev->regions_list; struct list_head *pos, *q; + XA_STATE(xas, &sev->pages_xarray, 0); + struct page *xa_page;if (!sev_guest(kvm))return; @@ -1109,6 +1149,12 @@ void sev_vm_destroy(struct kvm *kvm) } }+ /* Release each pinned page that SEV tracked in sev->pages_xarray. */+ xas_for_each(&xas, xa_page, ULONG_MAX) { + put_page(xa_page); + } + xa_destroy(&sev->pages_xarray); + mutex_unlock(&kvm->lock);sev_unbind_asid(kvm, sev->handle);@@ -1193,3 +1239,28 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } + +int sev_set_spte_notify(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn, + int level, bool mmio, u64 *spte) +{ + int rc; + + if (!sev_guest(vcpu->kvm)) + return 0; + + /* MMIO page contains the unencrypted data, no need to lock this page */ + if (mmio)Rather than make this a generic set_spte() notify hook, I think it makes more sense to specifying have it be a "pin_spte" style hook. That way the caller can skip mmio PFNs as well as flows that can't possibly be relevant to SEV, e.g. the sync_page() flow.
+ return 0; + + rc = sev_get_page(vcpu->kvm, gfn, pfn); + if (rc) + return rc; + + /* + * Flush any cached lines of the page being added since "ownership" of + * it will be transferred from the host to an encrypted guest. + */ + clflush_cache_range(__va(pfn << PAGE_SHIFT), page_level_size(level)); + + return 0; +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 535ad311ad02..9b304c761a99 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4130,6 +4130,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,.apic_init_signal_blocked = svm_apic_init_signal_blocked,+ + .set_spte_notify = sev_set_spte_notify, };static struct kvm_x86_init_ops svm_init_ops __initdata = {diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 121b198b51e9..8a5c01516c89 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -65,6 +65,7 @@ struct kvm_sev_info { int fd; /* SEV device fd */ unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ + struct xarray pages_xarray; /* List of PFN locked */ };struct kvm_svm {@@ -488,5 +489,7 @@ int svm_unregister_enc_region(struct kvm *kvm, void pre_sev_run(struct vcpu_svm *svm, int cpu); int __init sev_hardware_setup(void); void sev_hardware_teardown(void); +int sev_set_spte_notify(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn, + int level, bool mmio, u64 *spte);#endif-- 2.17.1