[PATCH 10/12] drm/amdgpu/sriov:implement guilty ctx for loose reset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Change-Id: I7904f362aa0f578a5cbf5d40c7a242c2c6680a92
Signed-off-by: Monk Liu <Monk.Liu at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 16 +++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       |  1 +
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 22 ++++++++++++++++++++++
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  1 +
 5 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b40d4ba..b63e602 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -737,6 +737,7 @@ struct amdgpu_ctx {
 	struct dma_fence	**fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
 	bool preamble_presented;
+	bool guilty;
 };
 
 struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6a1515e..f92962e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -79,16 +79,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (cs->in.num_chunks == 0)
 		return 0;
 
+	p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
+	if (!p->ctx)
+		return -EINVAL;
+
+	if (amdgpu_sriov_vf(p->adev) &&
+		amdgpu_sriov_reset_level == 0 &&
+		p->ctx->guilty)
+		return -ENODEV;
+
 	chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
 	if (!chunk_array)
 		return -ENOMEM;
 
-	p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
-	if (!p->ctx) {
-		ret = -EINVAL;
-		goto free_chunk;
-	}
-
 	/* get chunks */
 	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user,
@@ -184,7 +187,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	p->nchunks = 0;
 put_ctx:
 	amdgpu_ctx_put(p->ctx);
-free_chunk:
 	kfree(chunk_array);
 
 	return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 75c933b..028e9f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -60,6 +60,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 					  rq, amdgpu_sched_jobs);
 		if (r)
 			goto failed;
+		ctx->rings[i].entity.guilty = &ctx->guilty;
 	}
 
 	r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 12c3092..89b0573 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -493,10 +493,32 @@ void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched)
 void amd_sched_job_kickout(struct amd_sched_job *s_job)
 {
 	struct amd_gpu_scheduler *sched = s_job->sched;
+	struct amd_sched_entity *entity, *tmp;
+	struct amd_sched_rq *rq;
+	int i;
+	bool found;
 
 	spin_lock(&sched->job_list_lock);
 	list_del_init(&s_job->node);
 	spin_unlock(&sched->job_list_lock);
+
+	dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+
+	for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_KERNEL; i++) {
+		rq = &sched->sched_rq[i];
+
+		spin_lock(&rq->lock);
+		list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
+			if (s_job->s_entity == entity && entity->guilty) {
+				*entity->guilty = true;
+				found = true;
+				break;
+			}
+		}
+		spin_unlock(&rq->lock);
+		if (found)
+			break;
+	}
 }
 
 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index f0242aa..16c2244 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -49,6 +49,7 @@ struct amd_sched_entity {
 
 	struct dma_fence		*dependency;
 	struct dma_fence_cb		cb;
+	bool *guilty; /* this points to ctx's guilty */
 };
 
 /**
-- 
2.7.4



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux