KVM allocates pages with get_user_pages_* (that use FOLL_GET). For long term pinning of guest pages pin_user_pages_* (that use FOLL_PIN) need to be used. Add a flag to hva_to_pfn* to allocate pinned pages when the memslot represents encrypted memory. Suggested-by: David Hildenbrand <david@xxxxxxxxxx> Signed-off-by: Nikunj A Dadhania <nikunj@xxxxxxx> --- include/linux/kvm_host.h | 6 ++++ virt/kvm/kvm_main.c | 63 ++++++++++++++++++++++++++++++---------- virt/kvm/kvm_mm.h | 2 +- virt/kvm/pfncache.c | 2 +- 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f11039944c08..c23022960d51 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -55,6 +55,7 @@ * include/linux/kvm_h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) +#define KVM_MEMSLOT_ENCRYPTED (1UL << 17) /* * Bit 63 of the memslot generation number is an "update in-progress flag", @@ -583,6 +584,11 @@ static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *mem return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap); } +static inline bool memslot_is_encrypted(const struct kvm_memory_slot *slot) +{ + return slot && (slot->flags & KVM_MEMSLOT_ENCRYPTED); +} + #ifndef KVM_DIRTY_LOG_MANUAL_CAPS #define KVM_DIRTY_LOG_MANUAL_CAPS KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0afc016cc54d..c035fe6b39ec 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2381,9 +2381,10 @@ static inline int check_user_page_hwpoison(unsigned long addr) * only part that runs if we can in atomic context. */ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, - bool *writable, kvm_pfn_t *pfn) + bool *writable, kvm_pfn_t *pfn, bool use_pin) { struct page *page[1]; + bool ret; /* * Fast pin a writable pfn only if it is a write fault request @@ -2393,7 +2394,12 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, if (!(write_fault || writable)) return false; - if (get_user_page_fast_only(addr, FOLL_WRITE, page)) { + if (!use_pin) + ret = get_user_page_fast_only(addr, FOLL_WRITE, page); + else + ret = pin_user_pages_fast_only(addr, 1, FOLL_WRITE | FOLL_LONGTERM, page); + + if (ret) { *pfn = page_to_pfn(page[0]); if (writable) @@ -2409,9 +2415,9 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, * 1 indicates success, -errno is returned if error is detected. */ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, - bool *writable, kvm_pfn_t *pfn) + bool *writable, kvm_pfn_t *pfn, bool use_pin) { - unsigned int flags = FOLL_HWPOISON; + unsigned int flags = 0; struct page *page; int npages = 0; @@ -2422,20 +2428,41 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, if (write_fault) flags |= FOLL_WRITE; - if (async) - flags |= FOLL_NOWAIT; - npages = get_user_pages_unlocked(addr, 1, &page, flags); + if (!use_pin) { + flags |= FOLL_HWPOISON; + if (async) + flags |= FOLL_NOWAIT; + + npages = get_user_pages_unlocked(addr, 1, &page, flags); + } else { + /* + * FOLL_LONGTERM is not supported in pin_user_pages_unlocked, + * use *_fast instead. + */ + flags |= FOLL_LONGTERM; + npages = pin_user_pages_fast(addr, 1, flags, &page); + } + if (npages != 1) return npages; /* map read fault as writable if possible */ if (unlikely(!write_fault) && writable) { struct page *wpage; + bool ret; + + if (!use_pin) + ret = get_user_page_fast_only(addr, FOLL_WRITE, &wpage); + else + ret = pin_user_pages_fast_only(addr, 1, FOLL_WRITE | FOLL_LONGTERM, &wpage); - if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) { + if (ret) { *writable = true; - put_page(page); + if (!use_pin) + put_page(page); + else + unpin_user_page(page); page = wpage; } } @@ -2541,7 +2568,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * whether the mapping is writable. */ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, - bool write_fault, bool *writable) + bool write_fault, bool *writable, bool use_pin) { struct vm_area_struct *vma; kvm_pfn_t pfn = 0; @@ -2550,13 +2577,13 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, /* we can do it either atomically or asynchronously, not both */ BUG_ON(atomic && async); - if (hva_to_pfn_fast(addr, write_fault, writable, &pfn)) + if (hva_to_pfn_fast(addr, write_fault, writable, &pfn, use_pin)) return pfn; if (atomic) return KVM_PFN_ERR_FAULT; - npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn); + npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn, use_pin); if (npages == 1) return pfn; @@ -2616,7 +2643,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, } return hva_to_pfn(addr, atomic, async, write_fault, - writable); + writable, memslot_is_encrypted(slot)); } EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot); @@ -2788,8 +2815,14 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(kvm_pfn_t pfn) { - if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn)) - put_page(pfn_to_page(pfn)); + if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn)) { + struct page *page = pfn_to_page(pfn); + + if (page_maybe_dma_pinned(page)) + unpin_user_page(page); + else + put_page(page); + } } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h index 34ca40823260..b1a5e379949b 100644 --- a/virt/kvm/kvm_mm.h +++ b/virt/kvm/kvm_mm.h @@ -25,7 +25,7 @@ #endif /* KVM_HAVE_MMU_RWLOCK */ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, - bool write_fault, bool *writable); + bool write_fault, bool *writable, bool use_pin); #ifdef CONFIG_HAVE_KVM_PFNCACHE void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index ce878f4be4da..44384f06c81b 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -135,7 +135,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva) smp_rmb(); /* We always request a writeable mapping */ - new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL); + new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL, false); if (is_error_noslot_pfn(new_pfn)) break; -- 2.32.0