On 1/25/2024 1:37 PM, Le Ma wrote: > This patch is to eliminate interrupt warning below: > > "[drm] Fence fallback timer expired on ring sdma0.0". > > An early vm pt clearing job is sent to SDMA ahead of interrupt enabled, > introduced by patch below: > > - drm/amdkfd: Export DMABufs from KFD using GEM handles > The drm client creation logic in this patch won't work well for dynamic partition switch cases. Better add a ''Fixes' tag also. This looks fine to me, needs to be checked by Felix anyway. Thanks, Lijo > And re-locating the drm client creation following after drm_dev_register > looks like a more proper flow. > > v2: wrap the drm client creation > > Signed-off-by: Le Ma <le.ma@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++-------- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++ > 3 files changed, 27 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > index 77e263660288..41db030ddc4e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > @@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) > static const struct drm_client_funcs kfd_client_funcs = { > .unregister = drm_client_release, > }; > + > +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) > +{ > + int ret; > + > + if (!adev->kfd.init_complete) > + return 0; > + > + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", > + &kfd_client_funcs); > + if (ret) { > + dev_err(adev->dev, "Failed to init DRM client: %d\n", > + ret); > + return ret; > + } > + > + drm_client_register(&adev->kfd.client); > + > + return 0; > +} > + > void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) > { > int i; > int last_valid_bit; > - int ret; > > amdgpu_amdkfd_gpuvm_init_mem_limits(); > > @@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) > .enable_mes = adev->enable_mes, > }; > > - ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); > - if (ret) { > - dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); > - return; > - } > - > /* this is going to have a few of the MSBs set that we need to > * clear > */ > @@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) > > adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, > &gpu_resources); > - if (adev->kfd.init_complete) > - drm_client_register(&adev->kfd.client); > - else > - drm_client_release(&adev->kfd.client); > > amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 584a0cea5572..da175c384adf 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, > struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, > struct mm_struct *mm, > struct svm_range_bo *svm_bo); > + > +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev); > #if defined(CONFIG_DEBUG_FS) > int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 475bd59c9ac2..91d5d9435067 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -2255,6 +2255,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, > if (ret) > goto err_pci; > > + ret = amdgpu_amdkfd_drm_client_create(adev); > + if (ret) > + goto err_pci; > + > /* > * 1. don't init fbdev on hw without DCE > * 2. don't init fbdev if there are no connectors