because this time SDMA may under GPU RESET so its ring->ready can be false(e.g. IB test failed during GPU reset), just keep going and GPU scheduler will reschedule this job once it failed. v2: consider error if ring not ready only finds gpu is not doing GPU reset handle all places in amdgpu_ttm.c as well Signed-off-by: Monk Liu <Monk.Liu at amd.com> Change-Id: I241036e0ba54c3aadc573d507c7bd615b8b978f9 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e38e6db..9c9e596 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -215,7 +215,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case TTM_PL_VRAM: if (adev->mman.buffer_funcs && adev->mman.buffer_funcs_ring && - adev->mman.buffer_funcs_ring->ready == false) { + (adev->mman.buffer_funcs_ring->ready == false && !adev->in_gpu_reset) ) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { @@ -331,7 +331,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE); - if (!ring->ready) { + if (!ring->ready && !adev->in_gpu_reset) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -579,7 +579,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, } if (adev->mman.buffer_funcs == NULL || adev->mman.buffer_funcs_ring == NULL || - !adev->mman.buffer_funcs_ring->ready) { + (!adev->mman.buffer_funcs_ring->ready && !adev->in_gpu_reset)) { /* use memcpy */ goto memcpy; } @@ -1656,6 +1656,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); if (direct_submit) { + if (!ring->ready) { + r = -EINVAL; + goto error_free; + } + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); job->fence = dma_fence_get(*fence); @@ -1663,6 +1668,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, DRM_ERROR("Error scheduling IBs (%d)\n", r); amdgpu_job_free(job); } else { + if (!ring->ready && !adev->in_gpu_reset) { + r = -EINVAL; + goto error_free; + } r = amdgpu_job_submit(job, ring, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, fence); if (r) @@ -1692,7 +1701,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct amdgpu_job *job; int r; - if (!ring->ready) { + if (!ring->ready && !adev->in_gpu_reset) { DRM_ERROR("Trying to clear memory with ring turned off.\n"); return -EINVAL; } -- 2.7.4