This is just to report that I've had usually well-behaved applications sometimes having problems with memory access violations since kernel version 6.9-rc5. This past weekend I stumbled across a way to reliably reproduce the problem in the form of a Skyrim save file which causes a crash shortly after loading the game on affected kernels. Things go back to running smoothly only if I revert one of the changes in 5th April's "[PATCH] drm/amdgpu: fix visible VRAM handling during faults" as follows. Patch is against v6.9-rc7. It restores the check for partially visible-to-cpu memory in amdgpu_bo_fault_reserve_notify. Things seem stable again with this change. -- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index f6d503432a9e..a6874aea7820 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1396,7 +1396,10 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - if (amdgpu_res_cpu_visible(adev, bo->resource)) + if (bo->resource->mem_type != TTM_PL_VRAM) + return 0; + + if (amdgpu_bo_in_cpu_visible_vram(abo)) return 0; /* Can't move a pinned BO to visible VRAM */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index bc42ccbde659..0503af75dc26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -250,6 +250,28 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) return drm_vma_node_offset_addr(&bo->tbo.base.vma_node); } +/** + * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM + */ +static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_res_cursor cursor; + + if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) + return false; + + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); + while (cursor.remaining) { + if (cursor.start < adev->gmc.visible_vram_size) + return true; + + amdgpu_res_next(&cursor, cursor.size); + } + + return false; +} + /** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */