Am 20.12.21 um 22:51 schrieb Andrey Grodzovsky:
On 2021-12-20 2:16 a.m., Christian König wrote:
Am 17.12.21 um 23:27 schrieb Andrey Grodzovsky:
Before we initialize schedulers we must know which reset
domain are we in - for single device there iis a single
domain per device and so single wq per device. For XGMI
the reset domain spans the entire XGMI hive and so the
reset wq is per hive.
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45
++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 34 ++--------------
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +
3 files changed, 51 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5f13195d23d1..b595e6d699b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2284,6 +2284,47 @@ static int amdgpu_device_fw_loading(struct
amdgpu_device *adev)
return r;
}
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+ long timeout;
+ int r, i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ /* No need to setup the GPU scheduler for rings that don't
need it */
+ if (!ring || ring->no_scheduler)
+ continue;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ timeout = adev->compute_timeout;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ timeout = adev->sdma_timeout;
+ break;
+ default:
+ timeout = adev->video_timeout;
+ break;
+ }
+
+ r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+ ring->num_hw_submission, amdgpu_job_hang_limit,
+ timeout, adev->reset_domain.wq,
ring->sched_score, ring->name);
+ if (r) {
+ DRM_ERROR("Failed to create scheduler on ring %s.\n",
+ ring->name);
+ return r;
+ }
Maybe better put that into amdgpu_ring.c. But not really a hard
requirement, more a gut feeling.
+ }
+
+ return 0;
+}
+
+
/**
* amdgpu_device_ip_init - run init for hardware IPs
*
@@ -2412,6 +2453,10 @@ static int amdgpu_device_ip_init(struct
amdgpu_device *adev)
}
}
+ r = amdgpu_device_init_schedulers(adev);
+ if (r)
+ goto init_failed;
+
/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset)
amdgpu_amdkfd_device_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3b7e86ea7167..5527c68c51de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -456,8 +456,6 @@ int amdgpu_fence_driver_init_ring(struct
amdgpu_ring *ring,
atomic_t *sched_score)
{
struct amdgpu_device *adev = ring->adev;
- long timeout;
- int r;
if (!adev)
return -EINVAL;
@@ -477,36 +475,12 @@ int amdgpu_fence_driver_init_ring(struct
amdgpu_ring *ring,
spin_lock_init(&ring->fence_drv.lock);
ring->fence_drv.fences = kcalloc(num_hw_submission * 2,
sizeof(void *),
GFP_KERNEL);
- if (!ring->fence_drv.fences)
- return -ENOMEM;
- /* No need to setup the GPU scheduler for rings that don't
need it */
- if (ring->no_scheduler)
- return 0;
+ ring->num_hw_submission = num_hw_submission;
+ ring->sched_score = sched_score;
Probably better to set that in the caller and drop the parameters
from the amdgpu_fence_driver_init_ring() function completely.
Christian.
I noticed that at least num_hw_submission is validated within the
function so not sure we should then discard the parameters.
Good point. It also doesn't make sense to move this check up because the
power of two requirement comes from the fences, doesn't it?
Ok in this case just keep it like it is.
Christian.
Andrey
- switch (ring->funcs->type) {
- case AMDGPU_RING_TYPE_GFX:
- timeout = adev->gfx_timeout;
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- timeout = adev->compute_timeout;
- break;
- case AMDGPU_RING_TYPE_SDMA:
- timeout = adev->sdma_timeout;
- break;
- default:
- timeout = adev->video_timeout;
- break;
- }
-
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
- num_hw_submission, amdgpu_job_hang_limit,
- timeout, NULL, sched_score, ring->name);
- if (r) {
- DRM_ERROR("Failed to create scheduler on ring %s.\n",
- ring->name);
- return r;
- }
+ if (!ring->fence_drv.fences)
+ return -ENOMEM;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4d380e79752c..a4b8279e3011 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -253,6 +253,8 @@ struct amdgpu_ring {
bool has_compute_vm_bug;
bool no_scheduler;
int hw_prio;
+ unsigned num_hw_submission;
+ atomic_t *sched_score;
};
#define amdgpu_ring_parse_cs(r, p, ib)
((r)->funcs->parse_cs((p), (ib)))