On 2021-12-20 2:16 a.m., Christian König wrote:
Am 17.12.21 um 23:27 schrieb Andrey Grodzovsky:
Before we initialize schedulers we must know which reset
domain are we in - for single device there iis a single
domain per device and so single wq per device. For XGMI
the reset domain spans the entire XGMI hive and so the
reset wq is per hive.
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 34 ++--------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +
3 files changed, 51 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5f13195d23d1..b595e6d699b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2284,6 +2284,47 @@ static int amdgpu_device_fw_loading(struct
amdgpu_device *adev)
return r;
}
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+ long timeout;
+ int r, i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ /* No need to setup the GPU scheduler for rings that don't
need it */
+ if (!ring || ring->no_scheduler)
+ continue;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ timeout = adev->compute_timeout;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ timeout = adev->sdma_timeout;
+ break;
+ default:
+ timeout = adev->video_timeout;
+ break;
+ }
+
+ r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+ ring->num_hw_submission, amdgpu_job_hang_limit,
+ timeout, adev->reset_domain.wq,
ring->sched_score, ring->name);
+ if (r) {
+ DRM_ERROR("Failed to create scheduler on ring %s.\n",
+ ring->name);
+ return r;
+ }
Maybe better put that into amdgpu_ring.c. But not really a hard
requirement, more a gut feeling.
+ }
+
+ return 0;
+}
+
+
/**
* amdgpu_device_ip_init - run init for hardware IPs
*
@@ -2412,6 +2453,10 @@ static int amdgpu_device_ip_init(struct
amdgpu_device *adev)
}
}
+ r = amdgpu_device_init_schedulers(adev);
+ if (r)
+ goto init_failed;
+
/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset)
amdgpu_amdkfd_device_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3b7e86ea7167..5527c68c51de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -456,8 +456,6 @@ int amdgpu_fence_driver_init_ring(struct
amdgpu_ring *ring,
atomic_t *sched_score)
{
struct amdgpu_device *adev = ring->adev;
- long timeout;
- int r;
if (!adev)
return -EINVAL;
@@ -477,36 +475,12 @@ int amdgpu_fence_driver_init_ring(struct
amdgpu_ring *ring,
spin_lock_init(&ring->fence_drv.lock);
ring->fence_drv.fences = kcalloc(num_hw_submission * 2,
sizeof(void *),
GFP_KERNEL);
- if (!ring->fence_drv.fences)
- return -ENOMEM;
- /* No need to setup the GPU scheduler for rings that don't
need it */
- if (ring->no_scheduler)
- return 0;
+ ring->num_hw_submission = num_hw_submission;
+ ring->sched_score = sched_score;
Probably better to set that in the caller and drop the parameters from
the amdgpu_fence_driver_init_ring() function completely.
Christian.
I noticed that at least num_hw_submission is validated within the
function so not sure we should then discard the parameters.
Andrey
- switch (ring->funcs->type) {
- case AMDGPU_RING_TYPE_GFX:
- timeout = adev->gfx_timeout;
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- timeout = adev->compute_timeout;
- break;
- case AMDGPU_RING_TYPE_SDMA:
- timeout = adev->sdma_timeout;
- break;
- default:
- timeout = adev->video_timeout;
- break;
- }
-
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
- num_hw_submission, amdgpu_job_hang_limit,
- timeout, NULL, sched_score, ring->name);
- if (r) {
- DRM_ERROR("Failed to create scheduler on ring %s.\n",
- ring->name);
- return r;
- }
+ if (!ring->fence_drv.fences)
+ return -ENOMEM;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4d380e79752c..a4b8279e3011 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -253,6 +253,8 @@ struct amdgpu_ring {
bool has_compute_vm_bug;
bool no_scheduler;
int hw_prio;
+ unsigned num_hw_submission;
+ atomic_t *sched_score;
};
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p),
(ib)))