> -----Original Message----- > From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf > Of Christian König > Sent: Wednesday, May 10, 2017 2:09 PM > To: amd-gfx at lists.freedesktop.org > Subject: [PATCH] drm/amdgpu: fix fundamental suspend/resume issue > > From: Christian König <christian.koenig at amd.com> > > Reinitializing the VM manager during suspend/resume is a very very bad > idea since all the VMs are still active and kicking. > > This can lead to random VM faults after resume when new processes > become the same client ID assigned. > > Signed-off-by: Christian König <christian.koenig at amd.com> Reviewed-by: Alex Deucher <alexander.deucher at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 > +++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + > drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 15 ++------------- > drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 15 ++------------- > drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 15 ++------------- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++-------------- > 6 files changed, 30 insertions(+), 54 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index ed97a2e..9803392 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -790,6 +790,7 @@ void amdgpu_vm_reset_id(struct amdgpu_device > *adev, unsigned vmhub, > struct amdgpu_vm_id_manager *id_mgr = &adev- > >vm_manager.id_mgr[vmhub]; > struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; > > + atomic64_set(&id->owner, 0); > id->gds_base = 0; > id->gds_size = 0; > id->gws_base = 0; > @@ -799,6 +800,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device > *adev, unsigned vmhub, > } > > /** > + * amdgpu_vm_reset_all_id - reset VMID to zero > + * > + * @adev: amdgpu device structure > + * > + * Reset VMID to force flush on next use > + */ > +void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev) > +{ > + unsigned i, j; > + > + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { > + struct amdgpu_vm_id_manager *id_mgr = > + &adev->vm_manager.id_mgr[i]; > + > + for (j = 1; j < id_mgr->num_ids; ++j) > + amdgpu_vm_reset_id(adev, i, j); > + } > +} > + > +/** > * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo > * > * @vm: requested vm > @@ -2393,7 +2414,6 @@ void amdgpu_vm_manager_init(struct > amdgpu_device *adev) > for (i = 0; i < AMDGPU_MAX_RINGS; ++i) > adev->vm_manager.seqno[i] = 0; > > - > atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); > atomic64_set(&adev->vm_manager.client_counter, 0); > spin_lock_init(&adev->vm_manager.prt_lock); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index abc0bab..d62547d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -209,6 +209,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, > struct amdgpu_ring *ring, > int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); > void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, > unsigned vmid); > +void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); > int amdgpu_vm_update_directories(struct amdgpu_device *adev, > struct amdgpu_vm *vm); > int amdgpu_vm_clear_freed(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > index a572979..d860939 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > @@ -950,10 +950,6 @@ static int gmc_v6_0_suspend(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - if (adev->vm_manager.enabled) { > - gmc_v6_0_vm_fini(adev); > - adev->vm_manager.enabled = false; > - } > gmc_v6_0_hw_fini(adev); > > return 0; > @@ -968,16 +964,9 @@ static int gmc_v6_0_resume(void *handle) > if (r) > return r; > > - if (!adev->vm_manager.enabled) { > - r = gmc_v6_0_vm_init(adev); > - if (r) { > - dev_err(adev->dev, "vm manager initialization failed > (%d).\n", r); > - return r; > - } > - adev->vm_manager.enabled = true; > - } > + amdgpu_vm_reset_all_ids(adev); > > - return r; > + return 0; > } > > static bool gmc_v6_0_is_idle(void *handle) > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > index a9083a1..2750e5c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > @@ -1117,10 +1117,6 @@ static int gmc_v7_0_suspend(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - if (adev->vm_manager.enabled) { > - gmc_v7_0_vm_fini(adev); > - adev->vm_manager.enabled = false; > - } > gmc_v7_0_hw_fini(adev); > > return 0; > @@ -1135,16 +1131,9 @@ static int gmc_v7_0_resume(void *handle) > if (r) > return r; > > - if (!adev->vm_manager.enabled) { > - r = gmc_v7_0_vm_init(adev); > - if (r) { > - dev_err(adev->dev, "vm manager initialization failed > (%d).\n", r); > - return r; > - } > - adev->vm_manager.enabled = true; > - } > + amdgpu_vm_reset_all_ids(adev); > > - return r; > + return 0; > } > > static bool gmc_v7_0_is_idle(void *handle) > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > index 4ac9978..f56b408 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > @@ -1209,10 +1209,6 @@ static int gmc_v8_0_suspend(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - if (adev->vm_manager.enabled) { > - gmc_v8_0_vm_fini(adev); > - adev->vm_manager.enabled = false; > - } > gmc_v8_0_hw_fini(adev); > > return 0; > @@ -1227,16 +1223,9 @@ static int gmc_v8_0_resume(void *handle) > if (r) > return r; > > - if (!adev->vm_manager.enabled) { > - r = gmc_v8_0_vm_init(adev); > - if (r) { > - dev_err(adev->dev, "vm manager initialization failed > (%d).\n", r); > - return r; > - } > - adev->vm_manager.enabled = true; > - } > + amdgpu_vm_reset_all_ids(adev); > > - return r; > + return 0; > } > > static bool gmc_v8_0_is_idle(void *handle) > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index b9f11fa..ae8fd91 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -797,10 +797,6 @@ static int gmc_v9_0_suspend(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - if (adev->vm_manager.enabled) { > - gmc_v9_0_vm_fini(adev); > - adev->vm_manager.enabled = false; > - } > gmc_v9_0_hw_fini(adev); > > return 0; > @@ -815,17 +811,9 @@ static int gmc_v9_0_resume(void *handle) > if (r) > return r; > > - if (!adev->vm_manager.enabled) { > - r = gmc_v9_0_vm_init(adev); > - if (r) { > - dev_err(adev->dev, > - "vm manager initialization failed (%d).\n", r); > - return r; > - } > - adev->vm_manager.enabled = true; > - } > + amdgpu_vm_reset_all_ids(adev); > > - return r; > + return 0; > } > > static bool gmc_v9_0_is_idle(void *handle) > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx