I think this patch is just a proof of concept for now. It should not be
submitted because there are still some known locking issues that need to
be solved, and we don't have the code yet that handles the recoverable
page faults resulting from this.
Regards,
Felix
On 2019-12-20 1:24, Alex Sierra wrote:
This is required for HMM functionality only on GFXv9 GPU, which supports
recoverable page faults.
[Why]
Instead of stopping all user mode queues during a userptr mapping.
The GFXv9 recoverable page fault is used to revalidate userptr mappings.
Now, this will be done on the page fault handler.
[How]
Invalidate buffer objects that correspond to the specific address range
on the mmu notifier.
Change-Id: I94b8fee8d88ff240b619cba1c5458aba98b17736
Signed-off-by: Alex Sierra <alex.sierra@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 56 ++++++++++++++++++++++++--
1 file changed, 52 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 392300f77b13..06415d8ad3c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -187,6 +187,45 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
}
}
+/**
+ * amdgpu_mn_invalidate_bo - invalidate a BO
+ *
+ * @bo: amdgpu buffer object to invalidate
+ * @adev: amdgpu device pointer
+ *
+ * Block for operations on BO while is cleared.
+ */
+static int amdgpu_mn_invalidate_bo(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_bo_base *bo_base;
+ struct amdgpu_bo_va *bo_va;
+ struct kgd_dev *kgd = (struct kgd_dev *)adev;
+ long r = 0;
+ long tmo;
+
+ tmo = msecs_to_jiffies(100);
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+ bo_va = container_of(bo_base, struct amdgpu_bo_va, base);
+ r = amdgpu_vm_bo_update(adev, bo_va, true);
+ if (r)
+ break;
+
+ r = dma_fence_wait_timeout(bo_va->last_pt_update, false, tmo);
+ if (r <= 0) {
+ if (r == 0)
+ r = -ETIMEDOUT;
+
+ break;
+ } else {
+ r = 0;
+ }
+
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(kgd, bo_base->vm->pasid);
+ }
+ return r;
+}
+
/**
* amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
*
@@ -250,6 +289,7 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
const struct mmu_notifier_range *update)
{
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ struct amdgpu_device *adev = amn->adev;
unsigned long start = update->start;
unsigned long end = update->end;
bool blockable = mmu_notifier_range_blockable(update);
@@ -275,11 +315,19 @@ amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
it = interval_tree_iter_next(it, start, end);
list_for_each_entry(bo, &node->bos, mn_list) {
- struct kgd_mem *mem = bo->kfd_bo;
- if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
- start, end))
- amdgpu_amdkfd_evict_userptr(mem, amn->mm);
+ if (amdgpu_ttm_tt_affect_userptr(
+ bo->tbo.ttm, start, end)){
+ if (!amdgpu_noretry &&
+ adev->family >= AMDGPU_FAMILY_AI) {
+ amdgpu_mn_invalidate_bo(adev, bo);
+ } else {
+ struct kgd_mem *mem = bo->kfd_bo;
+
+ amdgpu_amdkfd_evict_userptr(mem,
+ amn->mm);
+ }
+ }
}
}
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx