Current code just hold rmap lock to ensure parallel page table update is prevented. That is not sufficient. The kernel should also check whether a mmu_notifer callback was running in parallel. Cc: Alexey Kardashevskiy <aik@xxxxxxxxx> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx> --- arch/powerpc/kvm/book3s_64_vio_hv.c | 30 +++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index ab6eeb8e753e..8a6bf12d2e88 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -438,8 +438,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return H_SUCCESS; } -static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, - unsigned long ua, unsigned long *phpa) +static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long ua, unsigned long *phpa) { pte_t *ptep, pte; unsigned shift = 0; @@ -453,10 +453,17 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, * to exit which will agains result in the below page table walk * to finish. */ - ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); - if (!ptep || !pte_present(*ptep)) + /* an rmap lock won't make it safe. because that just ensure hash + * page table entries are removed with rmap lock held. After that + * mmu notifier returns and we go ahead and removing ptes from Qemu page table. + */ + ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift); + if (!ptep) + return -ENXIO; + + pte = READ_ONCE(*ptep); + if (!pte_present(pte)) return -ENXIO; - pte = *ptep; if (!shift) shift = PAGE_SHIFT; @@ -478,10 +485,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce_list, unsigned long npages) { + struct kvm *kvm = vcpu->kvm; struct kvmppc_spapr_tce_table *stt; long i, ret = H_SUCCESS; unsigned long tces, entry, ua = 0; unsigned long *rmap = NULL; + unsigned long mmu_seq; bool prereg = false; struct kvmppc_spapr_tce_iommu_table *stit; @@ -489,6 +498,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (kvm_is_radix(vcpu->kvm)) return H_TOO_HARD; + /* + * used to check for invalidations in progress + */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -548,7 +563,9 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, * real page. */ lock_rmap(rmap); - if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { + + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) { ret = H_TOO_HARD; goto unlock_exit; } @@ -594,6 +611,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (rmap) unlock_rmap(rmap); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } -- 2.24.1