Am 2021-04-18 um 1:35 p.m. schrieb Philip Yang: > Retry fault interrupt maybe pending in IH ring after GPU page table is > updated to recover the vm fault, because each page of the range generate > retry fault interrupt. There is race if application unmap range to > remove and free the range first and then retry fault work restore_pages > handle the retry fault interrupt, because range can not be found, this > vm fault can not be recovered and report incorrect GPU vm fault to > application. > > Before unmap to remove and free range, drain retry fault interrupt from > IH ring1 to ensure no retry fault comes after the range is removed. > > Drain retry fault interrupt skip the range which is on deferred list to > remove, or the range is child range, which is split by unmap, does not > add to svms and have interval notifier. > > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> The series looks good to me. But the skip-recover changes and the -EAGAIN handling in svm_range_restore_pages should be a separate patch. Also, when we defer processing an interrupt (skip-recover or r == -EAGAIN) and wait for a subsequent retry interrupt, we may want to remove that fault address from the gmc->fault_ring that's used by amdgpu_gmc_filter_faults to filter out repeated page faults on the same address. In the future we would also have to remove those addresses from the IH CAM. Regards, Felix > --- > drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 75 +++++++++++++++++++++++++++- > 1 file changed, 74 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > index 0e0b4ffd20ab..45dd055118eb 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > @@ -1402,11 +1402,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm, > svm_range_lock(prange); > if (!prange->actual_loc) { > if (amdgpu_hmm_range_get_pages_done(hmm_range)) { > + pr_debug("hmm update the range, need validate again\n"); > r = -EAGAIN; > goto unlock_out; > } > } > if (!list_empty(&prange->child_list)) { > + pr_debug("range split by unmap in parallel, validate again\n"); > r = -EAGAIN; > goto unlock_out; > } > @@ -1828,6 +1830,28 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) > } > } > > +static void svm_range_drain_retry_fault(struct svm_range_list *svms) > +{ > + struct kfd_process_device *pdd; > + struct amdgpu_device *adev; > + struct kfd_process *p; > + uint32_t i; > + > + p = container_of(svms, struct kfd_process, svms); > + > + for (i = 0; i < p->n_pdds; i++) { > + pdd = p->pdds[i]; > + if (!pdd) > + continue; > + > + pr_debug("drain retry fault gpu %d svms %p\n", i, svms); > + adev = (struct amdgpu_device *)pdd->dev->kgd; > + > + amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1); > + pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); > + } > +} > + > static void svm_range_deferred_list_work(struct work_struct *work) > { > struct svm_range_list *svms; > @@ -1845,6 +1869,10 @@ static void svm_range_deferred_list_work(struct work_struct *work) > pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange, > prange->start, prange->last, prange->work_item.op); > > + /* Make sure no stale retry fault coming after range is freed */ > + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) > + svm_range_drain_retry_fault(prange->svms); > + > mm = prange->work_item.mm; > mmap_write_lock(mm); > mutex_lock(&svms->lock); > @@ -2152,6 +2180,44 @@ svm_range_best_restore_location(struct svm_range *prange, > return -1; > } > > +/* svm_range_skip_recover - decide if prange can be recovered > + * @prange: svm range structure > + * > + * GPU vm retry fault handle skip recover the range for cases: > + * 1. prange is on deferred list to be removed after unmap, it is stale fault, > + * deferred list work will drain the stale fault before free the prange. > + * 2. prange is on deferred list to add interval notifier after split, or > + * 3. prange is child range, it is split from parent prange, recover later > + * after interval notifier is added. > + * > + * Return: true to skip recover, false to recover > + */ > +static bool svm_range_skip_recover(struct svm_range *prange) > +{ > + struct svm_range_list *svms = prange->svms; > + > + spin_lock(&svms->deferred_list_lock); > + if (list_empty(&prange->deferred_list) && > + list_empty(&prange->child_list)) { > + spin_unlock(&svms->deferred_list_lock); > + return false; > + } > + spin_unlock(&svms->deferred_list_lock); > + > + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) { > + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n", > + svms, prange, prange->start, prange->last); > + return true; > + } > + if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP || > + prange->work_item.op == SVM_OP_ADD_RANGE) { > + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n", > + svms, prange, prange->start, prange->last); > + return true; > + } > + return false; > +} > + > int > svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, > uint64_t addr) > @@ -2187,7 +2253,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, > mmap_read_lock(mm); > mutex_lock(&svms->lock); > prange = svm_range_from_addr(svms, addr, NULL); > - > if (!prange) { > pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", > svms, addr); > @@ -2196,6 +2261,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, > } > > mutex_lock(&prange->migrate_mutex); > + > + if (svm_range_skip_recover(prange)) > + goto out_unlock_range; > + > timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; > /* skip duplicate vm fault on different pages of same range */ > if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { > @@ -2254,6 +2323,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, > out: > kfd_unref_process(p); > > + if (r == -EAGAIN) { > + pr_debug("recover vm fault later\n"); > + r = 0; > + } > return r; > } > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx