Am 21.12.22 um 00:27 schrieb Felix Kuehling:
This allows page table updates to be coordinated with interval notifiers
to avoid writing stale page table entries to the pabe table. Moving the
critical section inside the page table update avoids lock dependencies
with page table allocations under the notifier lock.
Suggested-by: Christian König <christian.koenig@xxxxxxx>
Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 27 ++++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 58 ++++++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 6 ++-
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 +-
4 files changed, 77 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a04f7aef4ca9..556d2e5d90e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -768,6 +768,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
* @vram_base: base for vram mappings
* @res: ttm_resource to map
* @pages_addr: DMA addresses to use for mapping
+ * @range: optional HMM range for coordination with interval notifier
* @fence: optional resulting fence
*
* Fill in the page table entries between @start and @last.
@@ -780,7 +781,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
- struct dma_fence **fence)
+ struct hmm_range *range, struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
struct amdgpu_vm_tlb_seq_cb *tlb_cb;
@@ -794,7 +795,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
if (!tlb_cb) {
r = -ENOMEM;
- goto error_unlock;
+ goto error_dev_exit;
}
/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
@@ -811,6 +812,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
memset(¶ms, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+#ifdef CONFIG_MMU_NOTIFIER
+ params.range = range;
+#endif
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
@@ -823,12 +827,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
else
sync_mode = AMDGPU_SYNC_EXPLICIT;
- amdgpu_vm_eviction_lock(vm);
- if (vm->evicting) {
- r = -EBUSY;
- goto error_free;
- }
-
if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
struct dma_fence *tmp = dma_fence_get_stub();
@@ -893,7 +891,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
start = tmp;
}
+ r = amdgpu_vm_pts_lock(¶ms);
+ if (r)
+ goto error_free;
r = vm->update_funcs->commit(¶ms, fence);
+ amdgpu_vm_pts_unlock(¶ms);
This won't work. We need the lock for updates as well and not just for
committing them.
if (flush_tlb || params.table_freed) {
tlb_cb->vm = vm;
@@ -911,8 +913,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
error_free:
kfree(tlb_cb);
-error_unlock:
- amdgpu_vm_eviction_unlock(vm);
+error_dev_exit:
drm_dev_exit(idx);
return r;
}
@@ -1058,7 +1059,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
resv, mapping->start, mapping->last,
update_flags, mapping->offset,
vram_base, mem, pages_addr,
- last_update);
+ NULL, last_update);
if (r)
return r;
}
@@ -1253,7 +1254,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
mapping->start, mapping->last,
init_pte_value, 0, 0, NULL, NULL,
- &f);
+ NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -2512,7 +2513,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
}
r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
- addr, flags, value, 0, NULL, NULL, NULL);
+ addr, flags, value, 0, NULL, NULL, NULL, NULL);
if (r)
goto error_unlock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 07af80df812b..8fad51d66bce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -31,6 +31,8 @@
#include <drm/drm_file.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <linux/sched/mm.h>
+#include <linux/hmm.h>
+#include <linux/mmu_notifier.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@@ -196,6 +198,13 @@ struct amdgpu_vm_update_params {
*/
struct amdgpu_vm *vm;
+#ifdef CONFIG_MMU_NOTIFIER
We should make this independent of CONFIG_MMU_NOTIFIER and just forward
declare the hmm_range structure here.
+ /**
+ * @range: optional HMM range for coordination with interval notifier
+ */
+ struct hmm_range *range;
+#endif
+
/**
* @immediate: if changes should be made immediately
*/
@@ -411,7 +420,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
- struct dma_fence **fence);
+ struct hmm_range *range, struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
@@ -535,4 +544,51 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
mutex_unlock(&vm->eviction_lock);
}
+/*
+ * Make page tables safe to access without a reservation and ensure that the
+ * ptes being written are still valid. This can fail if page tables are being
+ * evicted (-EBUSY) or an HMM range is being invalidated (-EAGAIN).
+ */
+static inline int amdgpu_vm_pts_lock(struct amdgpu_vm_update_params *params)
+{
+ int r = 0;
+
+#ifdef CONFIG_MMU_NOTIFIER
+ if (params->range) {
+ mutex_lock(params->vm->notifier_lock);
I really don't think having a separate lock here is a good idea.
Christian.
+ if (mmu_interval_read_retry(params->range->notifier,
+ params->range->notifier_seq)) {
+ r = -EAGAIN;
+ goto error_unlock_notifier;
+ }
+ }
+#endif
+ amdgpu_vm_eviction_lock(params->vm);
+ if (params->vm->evicting) {
+ r = -EBUSY;
+ goto error_unlock;
+ }
+
+ return 0;
+
+error_unlock:
+ amdgpu_vm_eviction_unlock(params->vm);
+#ifdef CONFIG_MMU_NOTIFIER
+error_unlock_notifier:
+ if (params->range)
+ mutex_unlock(params->vm->notifier_lock);
+#endif
+
+ return r;
+}
+
+static inline void amdgpu_vm_pts_unlock(struct amdgpu_vm_update_params *params)
+{
+ amdgpu_vm_eviction_unlock(params->vm);
+#ifdef CONFIG_MMU_NOTIFIER
+ if (params->range)
+ mutex_unlock(params->vm->notifier_lock);
+#endif
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index b5f3bba851db..2891284eba1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -597,9 +597,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
if (entry->bo)
return 0;
- amdgpu_vm_eviction_unlock(vm);
r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
- amdgpu_vm_eviction_lock(vm);
if (r)
return r;
@@ -960,6 +958,9 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
entry_end += cursor.pfn & ~(entry_end - 1);
entry_end = min(entry_end, end);
+ r = amdgpu_vm_pts_lock(params);
+ if (r)
+ return r;
do {
struct amdgpu_vm *vm = params->vm;
uint64_t upd_end = min(entry_end, frag_end);
@@ -992,6 +993,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
break;
}
} while (frag_start < entry_end);
+ amdgpu_vm_pts_unlock(params);
if (amdgpu_vm_pt_descendant(adev, &cursor)) {
/* Free all child entries.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2dc3b04064bd..cc46878901c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1217,7 +1217,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
last, init_pte_value, 0, 0, NULL, NULL,
- fence);
+ NULL, fence);
}
static int
@@ -1323,7 +1323,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
pte_flags,
(last_start - prange->start) << PAGE_SHIFT,
bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
- NULL, dma_addr, &vm->last_update);
+ NULL, dma_addr, NULL, &vm->last_update);
for (j = last_start - prange->start; j <= i; j++)
dma_addr[j] |= last_domain;