Hi Andrey, Will you port this patch into amd-staging-drm-next? on 2/10/22 2:06 AM, Andrey Grodzovsky wrote: > All comments are fixed and code pushed. Thanks for everyone > who helped reviewing. > > Andrey > > On 2022-02-09 02:53, Christian König wrote: >> Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky: >>> Before we initialize schedulers we must know which reset >>> domain are we in - for single device there iis a single >>> domain per device and so single wq per device. For XGMI >>> the reset domain spans the entire XGMI hive and so the >>> reset wq is per hive. >>> >>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> >> >> One more comment below, with that fixed Reviewed-by: Christian König <christian.koenig@xxxxxxx>. >> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++++++++++++++++++++++ >>> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 34 ++-------------- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 + >>> 3 files changed, 51 insertions(+), 30 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> index 9704b0e1fd82..00123b0013d3 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> @@ -2287,6 +2287,47 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) >>> return r; >>> } >>> +static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) >>> +{ >>> + long timeout; >>> + int r, i; >>> + >>> + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { >>> + struct amdgpu_ring *ring = adev->rings[i]; >>> + >>> + /* No need to setup the GPU scheduler for rings that don't need it */ >>> + if (!ring || ring->no_scheduler) >>> + continue; >>> + >>> + switch (ring->funcs->type) { >>> + case AMDGPU_RING_TYPE_GFX: >>> + timeout = adev->gfx_timeout; >>> + break; >>> + case AMDGPU_RING_TYPE_COMPUTE: >>> + timeout = adev->compute_timeout; >>> + break; >>> + case AMDGPU_RING_TYPE_SDMA: >>> + timeout = adev->sdma_timeout; >>> + break; >>> + default: >>> + timeout = adev->video_timeout; >>> + break; >>> + } >>> + >>> + r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, >>> + ring->num_hw_submission, amdgpu_job_hang_limit, >>> + timeout, adev->reset_domain.wq, ring->sched_score, ring->name); >>> + if (r) { >>> + DRM_ERROR("Failed to create scheduler on ring %s.\n", >>> + ring->name); >>> + return r; >>> + } >>> + } >>> + >>> + return 0; >>> +} >>> + >>> + >>> /** >>> * amdgpu_device_ip_init - run init for hardware IPs >>> * >>> @@ -2419,6 +2460,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) >>> } >>> } >>> + r = amdgpu_device_init_schedulers(adev); >>> + if (r) >>> + goto init_failed; >>> + >>> /* Don't init kfd if whole hive need to be reset during init */ >>> if (!adev->gmc.xgmi.pending_reset) >>> amdgpu_amdkfd_device_init(adev); >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >>> index 45977a72b5dd..fa302540c69a 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >>> @@ -457,8 +457,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, >>> atomic_t *sched_score) >>> { >>> struct amdgpu_device *adev = ring->adev; >>> - long timeout; >>> - int r; >>> if (!adev) >>> return -EINVAL; >>> @@ -478,36 +476,12 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, >>> spin_lock_init(&ring->fence_drv.lock); >>> ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *), >>> GFP_KERNEL); >>> - if (!ring->fence_drv.fences) >>> - return -ENOMEM; >>> - /* No need to setup the GPU scheduler for rings that don't need it */ >>> - if (ring->no_scheduler) >>> - return 0; >>> + ring->num_hw_submission = num_hw_submission; >>> + ring->sched_score = sched_score; >> >> Let's move this into the caller and then use ring->num_hw_submission in the fence code as well. >> >> The maximum number of jobs on the ring is not really fence specific. >> >> Regards, >> Christian. >> >>> - switch (ring->funcs->type) { >>> - case AMDGPU_RING_TYPE_GFX: >>> - timeout = adev->gfx_timeout; >>> - break; >>> - case AMDGPU_RING_TYPE_COMPUTE: >>> - timeout = adev->compute_timeout; >>> - break; >>> - case AMDGPU_RING_TYPE_SDMA: >>> - timeout = adev->sdma_timeout; >>> - break; >>> - default: >>> - timeout = adev->video_timeout; >>> - break; >>> - } >>> - >>> - r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, >>> - num_hw_submission, amdgpu_job_hang_limit, >>> - timeout, NULL, sched_score, ring->name); >>> - if (r) { >>> - DRM_ERROR("Failed to create scheduler on ring %s.\n", >>> - ring->name); >>> - return r; >>> - } >>> + if (!ring->fence_drv.fences) >>> + return -ENOMEM; >>> return 0; >>> } >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> index fae7d185ad0d..7f20ce73a243 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> @@ -251,6 +251,8 @@ struct amdgpu_ring { >>> bool has_compute_vm_bug; >>> bool no_scheduler; >>> int hw_prio; >>> + unsigned num_hw_submission; >>> + atomic_t *sched_score; >>> }; >>> #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) >>