Am 27.08.22 um 01:16 schrieb Felix Kuehling:
BOs can be in a different location than was intended at allocation time,
for example when restoring fails after an eviction or BOs get pinned in
system memory. On some GPUs the MTYPE for coherent mappings depends on
the actual memory location.
Use the actual location to determine the pte_flags every time the page
tables are updated.
For a workaround ok, but looks a bit awkward. Basically we need
different MTYPE based on the location, right?
Christian.
Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>
---
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 19 +++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 +
3 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cbd593f7d553..5dd89f5a032f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -405,6 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
{
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ bool is_vram = mem->bo->tbo.resource->mem_type == TTM_PL_VRAM;
bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
uint32_t mapping_flags;
@@ -420,7 +421,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
switch (adev->asic_type) {
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
- if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ if (is_vram) {
if (bo_adev == adev) {
if (uncached)
mapping_flags |= AMDGPU_VM_MTYPE_UC;
@@ -1236,12 +1237,18 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
{
struct amdgpu_bo_va *bo_va = entry->bo_va;
struct amdgpu_device *adev = entry->adev;
+ uint64_t pte_flags = get_pte_flags(adev, mem);
int ret;
ret = kfd_mem_dmamap_attachment(mem, entry);
if (ret)
return ret;
+ if (unlikely(entry->pte_flags != pte_flags)) {
+ amdgpu_vm_bo_update_flags(bo_va, pte_flags);
+ entry->pte_flags = pte_flags;
+ }
+
/* Update the page tables */
ret = amdgpu_vm_bo_update(adev, bo_va, false);
if (ret) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 59cac347baa3..954a40d5d828 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1862,6 +1862,25 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
}
}
+/**
+ * amdgpu_vm_bo_update_flags - Update mapping flags of invalid mappings
+ *
+ * @bo_va: identifies the BO and VM
+ * @flags: new mapping flags
+ *
+ * The update is only applied to invalid mappings. This allows updating the
+ * mapping flags after a migration to maintain the desired coherence. The next
+ * call to amdgpu_vm_bo_update() will apply the new @flags to the page table.
+ */
+void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va,
+ uint64_t flags)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+
+ list_for_each_entry(mapping, &bo_va->invalids, list)
+ mapping->flags = flags;
+}
+
/**
* amdgpu_vm_get_block_size - calculate VM page table size as power of two
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9ecb7f663e19..11793716cd8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -413,6 +413,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va, uint64_t flags);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo);