У пн, 2023-09-11 у 11:16 +0900, David Stevens пише: > From: David Stevens <stevensd@xxxxxxxxxxxx> > > Migrate functions which need access to is_refcounted_page to > __kvm_follow_pfn. The functions which need this are __kvm_faultin_pfn > and reexecute_instruction. The former requires replacing the async > in/out parameter with FOLL_NOWAIT parameter and the KVM_PFN_ERR_NEEDS_IO > return value. Handling non-refcounted pages is complicated, so it will > be done in a followup. The latter is a straightforward refactor. > > APIC related callers do not need to migrate because KVM controls the > memslot, so it will always be regular memory. Prefetch related callers > do not need to be migrated because atomic gfn_to_pfn calls can never > make it to hva_to_pfn_remapped. > > Signed-off-by: David Stevens <stevensd@xxxxxxxxxxxx> > --- > arch/x86/kvm/mmu/mmu.c | 43 ++++++++++++++++++++++++++++++++---------- > arch/x86/kvm/x86.c | 12 ++++++++++-- > 2 files changed, 43 insertions(+), 12 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index e1d011c67cc6..e1eca26215e2 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -4254,7 +4254,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) > static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > { > struct kvm_memory_slot *slot = fault->slot; > - bool async; > + struct kvm_follow_pfn foll = { > + .slot = slot, > + .gfn = fault->gfn, > + .flags = fault->write ? FOLL_WRITE : 0, > + .try_map_writable = true, > + .guarded_by_mmu_notifier = true, > + .allow_non_refcounted_struct_page = false, > + }; > > /* > * Retry the page fault if the gfn hit a memslot that is being deleted > @@ -4283,12 +4290,20 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > return RET_PF_EMULATE; > } > > - async = false; > - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, > - fault->write, &fault->map_writable, > - &fault->hva); > - if (!async) > - return RET_PF_CONTINUE; /* *pfn has correct page already */ > + foll.flags |= FOLL_NOWAIT; > + fault->pfn = __kvm_follow_pfn(&foll); > + > + if (!is_error_noslot_pfn(fault->pfn)) > + goto success; Unrelated but I can't say I like the 'is_error_noslot_pfn()' name, I wish it was called something like is_valid_pfn(). > + > + /* > + * If __kvm_follow_pfn() failed because I/O is needed to fault in the > + * page, then either set up an asynchronous #PF to do the I/O, or if > + * doing an async #PF isn't possible, retry __kvm_follow_pfn() with > + * I/O allowed. All other failures are fatal, i.e. retrying won't help. > + */ > + if (fault->pfn != KVM_PFN_ERR_NEEDS_IO) > + return RET_PF_CONTINUE; > > if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) { > trace_kvm_try_async_get_page(fault->addr, fault->gfn); > @@ -4306,9 +4321,17 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > * to wait for IO. Note, gup always bails if it is unable to quickly > * get a page and a fatal signal, i.e. SIGKILL, is pending. > */ > - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL, > - fault->write, &fault->map_writable, > - &fault->hva); > + foll.flags |= FOLL_INTERRUPTIBLE; > + foll.flags &= ~FOLL_NOWAIT; > + fault->pfn = __kvm_follow_pfn(&foll); > + > + if (!is_error_noslot_pfn(fault->pfn)) > + goto success; > + > + return RET_PF_CONTINUE; > +success: > + fault->hva = foll.hva; > + fault->map_writable = foll.writable; > return RET_PF_CONTINUE; > } > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 6c9c81e82e65..2011a7e47296 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -8556,6 +8556,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > { > gpa_t gpa = cr2_or_gpa; > kvm_pfn_t pfn; > + struct kvm_follow_pfn foll; > > if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF)) > return false; > @@ -8585,7 +8586,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > * retry instruction -> write #PF -> emulation fail -> retry > * instruction -> ... > */ > - pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); > + foll = (struct kvm_follow_pfn) { > + .slot = gfn_to_memslot(vcpu->kvm, gpa_to_gfn(gpa)), > + .gfn = gpa_to_gfn(gpa), > + .flags = FOLL_WRITE, > + .allow_non_refcounted_struct_page = true, > + }; > + pfn = __kvm_follow_pfn(&foll); > > /* > * If the instruction failed on the error pfn, it can not be fixed, > @@ -8594,7 +8601,8 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, > if (is_error_noslot_pfn(pfn)) > return false; > > - kvm_release_pfn_clean(pfn); > + if (foll.is_refcounted_page) > + kvm_release_page_clean(pfn_to_page(pfn)); > > /* The instructions are well-emulated on direct mmu. */ > if (vcpu->arch.mmu->root_role.direct) { Overall looks good, I might have missed something. Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> Best regards, Maxim Levitsky