why we shouldn't return error code ? any particular reason ? if we wake up a thread waiting on some fence, and return 0 the UMD won't aware that a gpu hang occurred just now ... for this strict mode reset the policy is aligned with vulkan spec: it defines that the vk runtime should return VK_DEVICELOST error to app on those waiting functions, and kernel should let VK UMD aware of it so kernel must return error to UMD in the cs_wait IOCTL UMD cannot invoke amdgpu_query_ctx from no reason, it should be notified by some error fence like cs_wait BR Monk -----Original Message----- From: Christian König [mailto:ckoenig.leichtzumerken@xxxxxxxxx] Sent: 2017å¹´10æ??9æ?¥ 16:20 To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org Subject: Re: [PATCH 07/12] drm/amdgpu/sriov:implement strict gpu reset Am 30.09.2017 um 08:03 schrieb Monk Liu: > changes: > 1)implement strict mode sriov gpu reset 2)always call > sriov_gpu_reset_strict if hypervisor notify FLR 3)in strict reset > mode, set error to all fences. > 4)change fence_wait/cs_wait functions to return -ENODEV if fence > signaled with error == -ETIME, > > Since after strict gpu reset we consider the VRAM were lost, and since > assuming VRAM lost there is little help to recover shadow BO because > all textures/resources/shaders cannot recovered (if they resident in > VRAM) NAK, we shouldn't return an error code on the wait function and instead handle the fence error code in amdgpu_ctx_query(). Regards, Christian. > > Change-Id: I50d9b8b5185ba92f137f07c9deeac19d740d753b > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 25 ++++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 90 +++++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + > drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 4 +- > drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 4 +- > drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 60 ++++++++++++++++++ > drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 + > 10 files changed, 188 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index de11527..de9c164 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -123,6 +123,7 @@ extern int amdgpu_cntl_sb_buf_per_se; > extern int amdgpu_param_buf_per_se; > extern int amdgpu_job_hang_limit; > extern int amdgpu_lbpw; > +extern int amdgpu_sriov_reset_level; > > #ifdef CONFIG_DRM_AMDGPU_SI > extern int amdgpu_si_support; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index c6a214f..9467cf6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -1262,6 +1262,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, > struct amdgpu_ctx *ctx; > struct dma_fence *fence; > long r; > + int fence_err = 0; > > if (amdgpu_kms_vram_lost(adev, fpriv)) > return -ENODEV; > @@ -1283,6 +1284,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, > r = PTR_ERR(fence); > else if (fence) { > r = dma_fence_wait_timeout(fence, true, timeout); > + /* gpu hang and this fence is signaled by gpu reset if fence_err < 0 */ > + fence_err = dma_fence_get_status(fence); > dma_fence_put(fence); > } else > r = 1; > @@ -1292,7 +1295,10 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, > return r; > > memset(wait, 0, sizeof(*wait)); > - wait->out.status = (r == 0); > + wait->out.status = (fence_err < 0); > + > + if (fence_err < 0) > + return -ENODEV; > > return 0; > } > @@ -1346,6 +1352,7 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, > uint32_t fence_count = wait->in.fence_count; > unsigned int i; > long r = 1; > + int fence_err = 0; > > for (i = 0; i < fence_count; i++) { > struct dma_fence *fence; > @@ -1358,16 +1365,20 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, > continue; > > r = dma_fence_wait_timeout(fence, true, timeout); > + fence_err = dma_fence_get_status(fence); > dma_fence_put(fence); > if (r < 0) > return r; > > - if (r == 0) > + if (r == 0 || fence_err < 0) > break; > } > > memset(wait, 0, sizeof(*wait)); > - wait->out.status = (r > 0); > + wait->out.status = (r > 0 && fence_err == 0); > + > + if (fence_err < 0) > + return -ENODEV; > > return 0; > } > @@ -1391,6 +1402,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, > struct dma_fence **array; > unsigned int i; > long r; > + int fence_err = 0; > > /* Prepare the fence array */ > array = kcalloc(fence_count, sizeof(struct dma_fence *), > GFP_KERNEL); @@ -1418,10 +1430,12 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, > &first); > if (r < 0) > goto err_free_fence_array; > + else > + fence_err = dma_fence_get_status(array[first]); > > out: > memset(wait, 0, sizeof(*wait)); > - wait->out.status = (r > 0); > + wait->out.status = (r > 0 && fence_err == 0); > wait->out.first_signaled = first; > /* set return value 0 to indicate success */ > r = 0; > @@ -1431,6 +1445,9 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, > dma_fence_put(array[i]); > kfree(array); > > + if (fence_err < 0) > + return -ENODEV; > + > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 9efbb33..122e2e1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -2734,6 +2734,96 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, > } > > /** > + * amdgpu_sriov_gpu_reset_strict - reset the asic under strict mode > + * > + * @adev: amdgpu device pointer > + * @job: which job trigger hang > + * > + * Attempt the reset the GPU if it has hung (all asics). > + * for SRIOV case. > + * Returns 0 for success or an error on failure. > + * > + * this function will deny all process/fence created before this > +reset, > + * and drop all jobs unfinished during this reset. > + * > + * Application should take the responsibility to re-open the FD to > +re-create > + * the VM page table and recover all resources as well > + * > + **/ > +int amdgpu_sriov_gpu_reset_strict(struct amdgpu_device *adev, struct > +amdgpu_job *job) { > + int i, r = 0; > + int resched; > + struct amdgpu_ring *ring; > + > + /* other thread is already into the gpu reset so just quit and come later */ > + if (!atomic_add_unless(&adev->in_sriov_reset, 1, 1)) > + return -EAGAIN; > + > + atomic_inc(&adev->gpu_reset_counter); > + > + /* block TTM */ > + resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); > + > + /* fake signal jobs already scheduled */ > + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { > + ring = adev->rings[i]; > + > + if (!ring || !ring->sched.thread) > + continue; > + > + kthread_park(ring->sched.thread); > + amd_sched_set_sched_hang(&ring->sched); > + amdgpu_fence_driver_force_completion_ring(ring); > + amd_sched_set_queue_hang(&ring->sched); > + } > + > + /* request to take full control of GPU before re-initialization */ > + if (job) > + amdgpu_virt_reset_gpu(adev); > + else > + amdgpu_virt_request_full_gpu(adev, true); > + > + /* Resume IP prior to SMC */ > + amdgpu_sriov_reinit_early(adev); > + > + /* we need recover gart prior to run SMC/CP/SDMA resume */ > + amdgpu_ttm_recover_gart(adev); > + > + /* now we are okay to resume SMC/CP/SDMA */ > + amdgpu_sriov_reinit_late(adev); > + > + /* resume IRQ status */ > + amdgpu_irq_gpu_reset_resume_helper(adev); > + > + if (amdgpu_ib_ring_tests(adev)) > + dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r); > + > + /* release full control of GPU after ib test */ > + amdgpu_virt_release_full_gpu(adev, true); > + > + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { > + ring = adev->rings[i]; > + > + if (!ring || !ring->sched.thread) > + continue; > + > + kthread_unpark(ring->sched.thread); > + } > + > + drm_helper_resume_force_mode(adev->ddev); > + > + ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); > + if (r) > + dev_info(adev->dev, "Strict mode GPU reset failed\n"); > + else > + dev_info(adev->dev, "Strict mode GPU reset successed!\n"); > + > + atomic_set(&adev->in_sriov_reset, 0); > + return 0; > +} > + > +/** > * amdgpu_sriov_gpu_reset - reset the asic > * > * @adev: amdgpu device pointer > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 8f5211c..eee67dc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -123,6 +123,7 @@ int amdgpu_cntl_sb_buf_per_se = 0; > int amdgpu_param_buf_per_se = 0; > int amdgpu_job_hang_limit = 0; > int amdgpu_lbpw = -1; > +int amdgpu_sriov_reset_level = 0; > > MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); > module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ > -269,6 +270,9 @@ module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); > MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)"); > module_param_named(lbpw, amdgpu_lbpw, int, 0444); > > +MODULE_PARM_DESC(sriov_reset_level, "what level will gpu reset on, 0: > +loose, 1:strict, other:disable (default 0))"); > +module_param_named(sriov_reset_level, amdgpu_sriov_reset_level, int > +,0444); > + > #ifdef CONFIG_DRM_AMDGPU_SI > > #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index 0db81a4..933823a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -41,7 +41,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) > int r; > > try_again: > - r = amdgpu_sriov_gpu_reset(job->adev, job); > + if (amdgpu_sriov_reset_level == 1) > + r = amdgpu_sriov_gpu_reset_strict(job->adev, job); > + else > + r = amdgpu_sriov_gpu_reset(job->adev, job); > + > if (r == -EAGAIN) { > /* maye two different schedulers all have hang job, try later */ > schedule(); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > index a3cbd5a..5664a10 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > @@ -100,5 +100,6 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); > int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); > int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); > void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); > +int amdgpu_sriov_gpu_reset_strict(struct amdgpu_device *adev, struct > +amdgpu_job *job); > > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > index 2812d88..00a9629 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c > @@ -247,8 +247,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) > return; > } > > - /* Trigger recovery due to world switch failure */ > - amdgpu_sriov_gpu_reset(adev, NULL); > + /* use strict mode if FLR triggered from hypervisor */ > + amdgpu_sriov_gpu_reset_strict(adev, NULL); > } > > static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > index c25a831..c94b6e9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c > @@ -513,8 +513,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) > return; > } > > - /* Trigger recovery due to world switch failure */ > - amdgpu_sriov_gpu_reset(adev, NULL); > + /* use strict mode if FLR triggered from hypervisor */ > + amdgpu_sriov_gpu_reset_strict(adev, NULL); > } > > static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > index 97c94f9..12c3092 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > @@ -430,6 +430,66 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) > spin_unlock(&sched->job_list_lock); > } > > +/** > + * amd_sched_set_sched_hang > + * @sched: the scheduler need to set all pending jobs hang > + * > + * this routine set all unfinished jobs pending in the sched to > + * an error -ETIME statues > + * > + **/ > +void amd_sched_set_sched_hang(struct amd_gpu_scheduler *sched) { > + struct amd_sched_job *s_job; > + > + spin_lock(&sched->job_list_lock); > + list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) > + dma_fence_set_error(&s_job->s_fence->finished, -ETIME); > + > + spin_unlock(&sched->job_list_lock); > +} > + > +/** > + * amd_sched_set_queue_hang > + * @sched: the scheduler need to set all job in kfifo hang > + * > + * this routine set all jobs in the KFIFO of @sched to an error > + * -ETIME status and signal those jobs. > + * > + **/ > + > +void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched) { > + struct amd_sched_entity *entity, *tmp; > + struct amd_sched_job *s_job; > + struct amd_sched_rq *rq; > + int i; > + > + /* set HANG status on all jobs queued and fake signal them */ > + for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++) { > + rq = &sched->sched_rq[i]; > + > + spin_lock(&rq->lock); > + list_for_each_entry_safe(entity, tmp, &rq->entities, list) { > + if (entity->dependency) { > + dma_fence_remove_callback(entity->dependency, &entity->cb); > + dma_fence_put(entity->dependency); > + entity->dependency = NULL; > + } > + > + spin_lock(&entity->queue_lock); > + while(kfifo_out(&entity->job_queue, &s_job, sizeof(s_job)) == sizeof(s_job)) { > + dma_fence_set_error(&s_job->s_fence->finished, -ETIME); > + amd_sched_fence_scheduled(s_job->s_fence); > + amd_sched_fence_finished(s_job->s_fence); > + } > + spin_unlock(&entity->queue_lock); > + } > + spin_unlock(&rq->lock); > + } > + wake_up(&sched->job_scheduled); > +} > + > void amd_sched_job_kickout(struct amd_sched_job *s_job) > { > struct amd_gpu_scheduler *sched = s_job->sched; diff --git > a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > index f9d8f28..f0242aa 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > @@ -167,4 +167,6 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); > bool amd_sched_dependency_optimized(struct dma_fence* fence, > struct amd_sched_entity *entity); > void amd_sched_job_kickout(struct amd_sched_job *s_job); > +void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched); void > +amd_sched_set_sched_hang(struct amd_gpu_scheduler *sched); > #endif