On Thu, May 21, 2020 at 5:39 PM Bhawanpreet Lakha <Bhawanpreet.Lakha@xxxxxxx> wrote: > > [Why] > Previously we used the s3 codepath for gpu reset. This can lead to issues in > certain case where we end of waiting for fences which will never come (because > parts of the hw are off due to gpu reset) and we end up waiting forever causing > a deadlock. > > [How] > Handle GPU reset separately from normal s3 case. We essentially need to redo > everything we do in s3, but avoid any drm calls. > > For GPU reset case > > suspend: > -Acquire DC lock > -Cache current dc_state > -Commit 0 stream/planes to dc (this puts dc into a state where it can be > powered off) > -Disable interrupts > resume > -Edit cached state to force full update > -Commit cached state from suspend > -Build stream and plane updates from the cached state > -Commit stream/plane updates > -Enable interrupts > -Release DC lock > > v2: > -Formatting > -Release dc_state > > Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@xxxxxxx> Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 182 +++++++++++++++++- > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 + > 2 files changed, 182 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index 60fe64aef11b..4110ff8580b7 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -1521,10 +1521,114 @@ static int dm_hw_fini(void *handle) > return 0; > } > > + > +static int dm_enable_vblank(struct drm_crtc *crtc); > +static void dm_disable_vblank(struct drm_crtc *crtc); > + > +static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, > + struct dc_state *state, bool enable) > +{ > + enum dc_irq_source irq_source; > + struct amdgpu_crtc *acrtc; > + int rc = -EBUSY; > + int i = 0; > + > + for (i = 0; i < state->stream_count; i++) { > + acrtc = get_crtc_by_otg_inst( > + adev, state->stream_status[i].primary_otg_inst); > + > + if (acrtc && state->stream_status[i].plane_count != 0) { > + irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst; > + rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; > + DRM_DEBUG("crtc %d - vupdate irq %sabling: r=%d\n", > + acrtc->crtc_id, enable ? "en" : "dis", rc); > + if (rc) > + DRM_WARN("Failed to %s pflip interrupts\n", > + enable ? "enable" : "disable"); > + > + if (enable) { > + rc = dm_enable_vblank(&acrtc->base); > + if (rc) > + DRM_WARN("Failed to enable vblank interrupts\n"); > + } else { > + dm_disable_vblank(&acrtc->base); > + } > + > + } > + } > + > +} > + > +enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc) > +{ > + struct dc_state *context = NULL; > + enum dc_status res = DC_ERROR_UNEXPECTED; > + int i; > + struct dc_stream_state *del_streams[MAX_PIPES]; > + int del_streams_count = 0; > + > + memset(del_streams, 0, sizeof(del_streams)); > + > + context = dc_create_state(dc); > + if (context == NULL) > + goto context_alloc_fail; > + > + dc_resource_state_copy_construct_current(dc, context); > + > + /* First remove from context all streams */ > + for (i = 0; i < context->stream_count; i++) { > + struct dc_stream_state *stream = context->streams[i]; > + > + del_streams[del_streams_count++] = stream; > + } > + > + /* Remove all planes for removed streams and then remove the streams */ > + for (i = 0; i < del_streams_count; i++) { > + if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) { > + res = DC_FAIL_DETACH_SURFACES; > + goto fail; > + } > + > + res = dc_remove_stream_from_ctx(dc, context, del_streams[i]); > + if (res != DC_OK) > + goto fail; > + } > + > + > + res = dc_validate_global_state(dc, context, false); > + > + if (res != DC_OK) { > + DRM_ERROR("%s:resource validation failed, dc_status:%d\n", __func__, res); > + goto fail; > + } > + > + res = dc_commit_state(dc, context); > + > +fail: > + dc_release_state(context); > + > +context_alloc_fail: > + return res; > +} > + > static int dm_suspend(void *handle) > { > struct amdgpu_device *adev = handle; > struct amdgpu_display_manager *dm = &adev->dm; > + int ret = 0; > + > + if (adev->in_gpu_reset) { > + mutex_lock(&dm->dc_lock); > + dm->cached_dc_state = dc_copy_state(dm->dc->current_state); > + > + dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false); > + > + amdgpu_dm_commit_zero_streams(dm->dc); > + > + amdgpu_dm_irq_suspend(adev); > + > + return ret; > + } > > WARN_ON(adev->dm.cached_state); > adev->dm.cached_state = drm_atomic_helper_suspend(adev->ddev); > @@ -1640,6 +1744,46 @@ static void emulated_link_detect(struct dc_link *link) > > } > > +static void dm_gpureset_commit_state(struct dc_state *dc_state, > + struct amdgpu_display_manager *dm) > +{ > + struct { > + struct dc_surface_update surface_updates[MAX_SURFACES]; > + struct dc_plane_info plane_infos[MAX_SURFACES]; > + struct dc_scaling_info scaling_infos[MAX_SURFACES]; > + struct dc_flip_addrs flip_addrs[MAX_SURFACES]; > + struct dc_stream_update stream_update; > + } * bundle; > + int k, m; > + > + bundle = kzalloc(sizeof(*bundle), GFP_KERNEL); > + > + if (!bundle) { > + dm_error("Failed to allocate update bundle\n"); > + goto cleanup; > + } > + > + for (k = 0; k < dc_state->stream_count; k++) { > + bundle->stream_update.stream = dc_state->streams[k]; > + > + for (m = 0; m < dc_state->stream_status->plane_count; m++) { > + bundle->surface_updates[m].surface = > + dc_state->stream_status->plane_states[m]; > + bundle->surface_updates[m].surface->force_full_update = > + true; > + } > + dc_commit_updates_for_stream( > + dm->dc, bundle->surface_updates, > + dc_state->stream_status->plane_count, > + dc_state->streams[k], &bundle->stream_update, dc_state); > + } > + > +cleanup: > + kfree(bundle); > + > + return; > +} > + > static int dm_resume(void *handle) > { > struct amdgpu_device *adev = handle; > @@ -1656,8 +1800,44 @@ static int dm_resume(void *handle) > struct dm_plane_state *dm_new_plane_state; > struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state); > enum dc_connection_type new_connection_type = dc_connection_none; > - int i, r; > + struct dc_state *dc_state; > + int i, r, j; > + > + if (adev->in_gpu_reset) { > + dc_state = dm->cached_dc_state; > > + r = dm_dmub_hw_init(adev); > + if (r) > + DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); > + > + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); > + dc_resume(dm->dc); > + > + amdgpu_dm_irq_resume_early(adev); > + > + for (i = 0; i < dc_state->stream_count; i++) { > + dc_state->streams[i]->mode_changed = true; > + for (j = 0; j < dc_state->stream_status->plane_count; j++) { > + dc_state->stream_status->plane_states[j]->update_flags.raw > + = 0xffffffff; > + } > + } > + > + WARN_ON(!dc_commit_state(dm->dc, dc_state)); > + > + dm_gpureset_commit_state(dm->cached_dc_state, dm); > + > + dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, true); > + > + dc_release_state(dm->cached_dc_state); > + dm->cached_dc_state = NULL; > + > + amdgpu_dm_irq_resume_late(adev); > + > + mutex_unlock(&dm->dc_lock); > + > + return 0; > + } > /* Recreate dc_state - DC invalidates it when setting power state to S3. */ > dc_release_state(dm_state->context); > dm_state->context = dc_create_state(dm->dc); > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h > index 3f0c6298b588..40c912e0bf0c 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h > @@ -315,6 +315,7 @@ struct amdgpu_display_manager { > #endif > > struct drm_atomic_state *cached_state; > + struct dc_state *cached_dc_state; > > struct dm_comressor_info compressor; > > -- > 2.17.1 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx