Am 25.01.24 um 07:18 schrieb Le Ma:
This patch is to eliminate interrupt warning below:
"[drm] Fence fallback timer expired on ring sdma0.0".
An early vm pt clearing job is sent to SDMA ahead of interrupt enabled,
introduced by patch below:
- drm/amdkfd: Export DMABufs from KFD using GEM handles
And re-locating the drm client creation following after drm_dev_register
looks like a more proper flow.
In generally sounds like a good idea to me, question is if we shouldn't
generally call amdgpu_amdkfd_device_init() a bit later now.
The KFD device can't work without the DRM render nodes any more, so
enabling it to early could cause trouble in userspace as well.
Regards,
Christian.
Change-Id: I0fece177b78345187068f92a823d96b3b7581140
Signed-off-by: Le Ma <le.ma@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 +------------
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++++
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index add315644773..69eb0f5574d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -139,14 +139,13 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
-static const struct drm_client_funcs kfd_client_funcs = {
+const struct drm_client_funcs kfd_client_funcs = {
.unregister = drm_client_release,
};
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
int last_valid_bit;
- int ret;
amdgpu_amdkfd_gpuvm_init_mem_limits();
@@ -165,12 +164,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.enable_mes = adev->enable_mes,
};
- ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
- if (ret) {
- dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
- return;
- }
-
/* this is going to have a few of the MSBs set that we need to
* clear
*/
@@ -209,10 +202,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
&gpu_resources);
- if (adev->kfd.init_complete)
- drm_client_register(&adev->kfd.client);
- else
- drm_client_release(&adev->kfd.client);
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 00eed8c10cd4..b2c6f2b3c0fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -40,6 +40,8 @@
extern uint64_t amdgpu_amdkfd_total_mem_size;
+extern const struct drm_client_funcs kfd_client_funcs;
+
enum TLB_FLUSH_TYPE {
TLB_FLUSH_LEGACY = 0,
TLB_FLUSH_LIGHTWEIGHT,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0d0aa4b798ac..d0b98343481d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2293,6 +2293,17 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
drm_fbdev_generic_setup(adev_to_drm(adev), 32);
}
+ if (adev->kfd.init_complete) {
+ ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+ &kfd_client_funcs);
+ if (ret) {
+ dev_err(adev->dev, "Failed to init DRM client: %d\n",
+ ret);
+ goto err_pci;
+ }
+ drm_client_register(&adev->kfd.client);
+ }
+
ret = amdgpu_debugfs_init(adev);
if (ret)
DRM_ERROR("Creating debugfs files failed (%d).\n", ret);