We are currently cutting hva_to_pfn_fast short if we do not want an immediate exit, which is represented by !async && !atomic. However, this is unnecessary, and __get_user_pages_fast is *much* faster because the regular get_user_pages takes pmd_lock/pte_lock. In fact, when many CPUs take a nested vmexit at the same time the contention on those locks is visible, and this patch removes about 25% (compared to 4.18) from vmexit.flat on a 16 vCPU nested guest. Suggested-by: Andrea Arcangeli <aarcange@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- virt/kvm/kvm_main.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 861bb20e8451..0f26ff7ddedb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1343,18 +1343,16 @@ static inline int check_user_page_hwpoison(unsigned long addr) } /* - * The atomic path to get the writable pfn which will be stored in @pfn, - * true indicates success, otherwise false is returned. + * The fast path to get the writable pfn which will be stored in @pfn, + * true indicates success, otherwise false is returned. It's also the + * only part that runs if we can are in atomic context. */ -static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async, - bool write_fault, bool *writable, kvm_pfn_t *pfn) +static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, + bool *writable, kvm_pfn_t *pfn) { struct page *page[1]; int npages; - if (!(async || atomic)) - return false; - /* * Fast pin a writable pfn only if it is a write fault request * or the caller allows to map a writable pfn for a read fault @@ -1498,7 +1496,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, /* we can do it either atomically or asynchronously, not both */ BUG_ON(atomic && async); - if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn)) + if (hva_to_pfn_fast(addr, write_fault, writable, &pfn)) return pfn; if (atomic) -- 1.8.3.1