On Wed, May 10, 2017 at 5:20 AM, S, Shirish <Shirish.S at amd.com> wrote: > From: Shirish S <shirish.s at amd.com> > > amdgpu_device_resume() has a high time consuming > call of amdgpu_late_init() which sets the clock_gating > state of all IP blocks and is blocking. > This patch defers only this setting of clock gating state > operation to post resume of amdgpu driver but ideally before > the UI comes up or in some cases post ui as well. > > With this change the resume time of amdgpu_device comes down > from 1.299s to 0.199s which further helps in reducing the overall > system resume time. > > TEST:(For ChromiumOS on STONEY only) > * UI comes up > * S3 works multiple times > * Resume time is consistent and lower > * amdgpu_late_init() call gets called consistently and no errors reported. > > Signed-off-by: Shirish S <shirish.s at amd.com> > Reviewed-by: Huang Rui <ray.huang at amd.com> Please make sure this doesn't cause problems with S4. A few comments below. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 57 +++++++++++++++++++++++------- > 2 files changed, 47 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 1be8aed..addb204 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1615,6 +1615,9 @@ struct amdgpu_device { > /* amdkfd interface */ > struct kfd_dev *kfd; > > + /* delayed work_func for deferring clockgating during resume */ > + struct delayed_work late_init_work; > + > struct amdgpu_virt virt; > > /* link all shadow bo */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index cfc650c..a6850cf 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -57,6 +57,8 @@ > > MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); > > +#define AMDGPU_RESUME_MS 2000 > + > static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); > static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); > > @@ -1655,22 +1657,11 @@ static int amdgpu_init(struct amdgpu_device *adev) > return 0; > } > > -static int amdgpu_late_init(struct amdgpu_device *adev) > +static int amdgpu_late_set_cg_state(struct amdgpu_device *adev) > { > int i = 0, r; > > for (i = 0; i < adev->num_ip_blocks; i++) { > - if (!adev->ip_blocks[i].status.valid) > - continue; > - if (adev->ip_blocks[i].version->funcs->late_init) { > - r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); > - if (r) { > - DRM_ERROR("late_init of IP block <%s> failed %d\n", > - adev->ip_blocks[i].version->funcs->name, r); > - return r; > - } > - adev->ip_blocks[i].status.late_initialized = true; > - } > /* skip CG for VCE/UVD, it's handled specially */ > if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && > adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { > @@ -1688,6 +1679,27 @@ static int amdgpu_late_init(struct amdgpu_device *adev) > return 0; > } > > +static int amdgpu_late_init(struct amdgpu_device *adev) > +{ > + int i = 0, r; > + > + for (i = 0; i < adev->num_ip_blocks; i++) { > + if (!adev->ip_blocks[i].status.valid) > + continue; > + if (adev->ip_blocks[i].version->funcs->late_init) { > + r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); > + if (r) { > + DRM_ERROR("late_init of IP block <%s> failed %d\n", > + adev->ip_blocks[i].version->funcs->name, r); > + return r; > + } > + adev->ip_blocks[i].status.late_initialized = true; > + } > + } > + > + return 0; > +} > + > static int amdgpu_fini(struct amdgpu_device *adev) > { > int i, r; > @@ -1775,6 +1787,13 @@ static int amdgpu_fini(struct amdgpu_device *adev) > return 0; > } > > +static void amdgpu_late_init_func_handler(struct work_struct *work) > +{ > + struct amdgpu_device *adev = > + container_of(work, struct amdgpu_device, late_init_work.work); > + amdgpu_late_set_cg_state(adev); > +} > + > int amdgpu_suspend(struct amdgpu_device *adev) > { > int i, r; > @@ -2074,6 +2093,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, > INIT_LIST_HEAD(&adev->gtt_list); > spin_lock_init(&adev->gtt_list_lock); > > + INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler); > + > if (adev->asic_type >= CHIP_BONAIRE) { > adev->rmmio_base = pci_resource_start(adev->pdev, 5); > adev->rmmio_size = pci_resource_len(adev->pdev, 5); > @@ -2239,6 +2260,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, > goto failed; > } > > + r = amdgpu_late_set_cg_state(adev); > + if (r) { > + dev_err(adev->dev, "amdgpu_late_set_cg_state failed\n"); > + goto failed; > + } I think we can probably use the delayed work thread here as well to speed up initial driver load. > + > return 0; > > failed: > @@ -2270,6 +2297,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) > amdgpu_fbdev_fini(adev); > r = amdgpu_fini(adev); > adev->accel_working = false; > + cancel_delayed_work_sync(&adev->late_init_work); > /* free i2c buses */ > if (!amdgpu_device_has_dc_support(adev)) > amdgpu_i2c_fini(adev); > @@ -2449,11 +2477,14 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) > if (r) > DRM_ERROR("ib ring test failed (%d).\n", r); > } > - > + /* defer the cg state setting to post resume */ > r = amdgpu_late_init(adev); > if (r) > goto unlock; > > + mod_delayed_work(system_wq, &adev->late_init_work, > + msecs_to_jiffies(AMDGPU_RESUME_MS)); > + > /* pin cursors */ > list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { > struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx