On Wed, Jul 5, 2023 at 12:10 PM Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx> wrote: > > > @@ -2514,35 +2512,26 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, > > * The slow path to get the pfn of the specified host virtual address, > > * 1 indicates success, -errno is returned if error is detected. > > */ > > -static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, > > - bool interruptible, bool *writable, kvm_pfn_t *pfn) > > +static int hva_to_pfn_slow(struct kvm_follow_pfn *foll, kvm_pfn_t *pfn) > > { > > - unsigned int flags = FOLL_HWPOISON; > > + unsigned int flags = FOLL_HWPOISON | FOLL_GET | foll->flags; > > struct page *page; > > int npages; > > > > might_sleep(); > > > > - if (writable) > > - *writable = write_fault; > > - > > - if (write_fault) > > - flags |= FOLL_WRITE; > > - if (async) > > - flags |= FOLL_NOWAIT; > > - if (interruptible) > > - flags |= FOLL_INTERRUPTIBLE; > > - > > - npages = get_user_pages_unlocked(addr, 1, &page, flags); > > + npages = get_user_pages_unlocked(foll->hva, 1, &page, flags); > > if (npages != 1) > > return npages; > > > > + foll->writable = (foll->flags & FOLL_WRITE) && foll->allow_write_mapping; > > + > > /* map read fault as writable if possible */ > > - if (unlikely(!write_fault) && writable) { > > + if (unlikely(!foll->writable) && foll->allow_write_mapping) { > > I guess !foll->writable should be !(foll->flags & FOLL_WRITE) here. The two statements are logically equivalent, although I guess using !(foll->flags & FOLL_WRITE) may be a little clearer, if a little more verbose. > > struct page *wpage; > > > > - if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) { > > - *writable = true; > > + if (get_user_page_fast_only(foll->hva, FOLL_WRITE, &wpage)) { > > + foll->writable = true; > > put_page(page); > > page = wpage; > > } > > @@ -2572,23 +2561,23 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn) > > return get_page_unless_zero(page); > > } > > > ... > > > +kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, > > + bool atomic, bool interruptible, bool *async, > > + bool write_fault, bool *writable, hva_t *hva) > > +{ > > + kvm_pfn_t pfn; > > + struct kvm_follow_pfn foll = { > > + .slot = slot, > > + .gfn = gfn, > > + .flags = 0, > > + .atomic = atomic, > > + .allow_write_mapping = !!writable, > > + }; > > + > > + if (write_fault) > > + foll.flags |= FOLL_WRITE; > > + if (async) > > + foll.flags |= FOLL_NOWAIT; > > + if (interruptible) > > + foll.flags |= FOLL_INTERRUPTIBLE; > > + > > + pfn = __kvm_follow_pfn(&foll); > > + if (pfn == KVM_PFN_ERR_NEEDS_IO) { > > Could we just use KVM_PFN_ERR_FAULT and foll.flags here? I.e., > if (pfn == KVM_PFN_ERR_FAULT && (foll.flags & FOLL_NOWAIT))? > Setting pfn to KVM_PFN_ERR_NEEDS_IO just to indicate an async fault > seems unnecessary. There are the cases where the fault does not fall within a vma or when the target vma's flags don't support the fault's access permissions. In those cases, continuing to try to resolve the fault won't cause problems per-se, but it's wasteful and a bit confusing. Having hva_to_pfn detect whether or not it may be possible to resolve the fault asynchronously and return KVM_PFN_ERR_NEEDS_IO if so seems like a good idea. It also matches what the existing code does. -David