On 1/5/2025 8:15 AM, Jiang Liu wrote: > Introduce new interface amdgpu_xcp_drm_dev_free() to free a specific > drm_device crreated by amdgpu_xcp_drm_dev_alloc(), which will be used > to do error recovery. > > Signed-off-by: Jiang Liu <gerry@xxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 11 +++- > drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 1 + > drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c | 70 +++++++++++++++++---- > drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h | 1 + > 4 files changed, 70 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c > index e209b5e101df..401fbaa0b6b8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c > @@ -359,6 +359,8 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev, > ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data); > if (ret) > return ret; > + > + adev->xcp_mgr->xcp[i].registered = true; > } > > return 0; > @@ -376,12 +378,19 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev) > if (!adev->xcp_mgr->xcp[i].ddev) > break; > > + // Restore and free the original drm_device. > p_ddev = adev->xcp_mgr->xcp[i].ddev; > - drm_dev_unplug(p_ddev); > + if (adev->xcp_mgr->xcp[i].registered) { > + drm_dev_unplug(p_ddev); > + adev->xcp_mgr->xcp[i].registered = false; > + } > p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev; > p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev; > p_ddev->driver = adev->xcp_mgr->xcp[i].driver; > p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager; > + amdgpu_xcp_drm_dev_free(p_ddev); > + > + adev->xcp_mgr->xcp[i].ddev = NULL; > } > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h > index b63f53242c57..cd06a4a232be 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h > @@ -101,6 +101,7 @@ struct amdgpu_xcp { > uint8_t id; > uint8_t mem_id; > bool valid; > + bool registered; > atomic_t ref_cnt; > struct drm_device *ddev; > struct drm_device *rdev; > diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c > index faed84172dd4..9058d71b4756 100644 > --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c > +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c > @@ -45,18 +45,26 @@ static const struct drm_driver amdgpu_xcp_driver = { > > static int8_t pdev_num; > static struct xcp_device *xcp_dev[MAX_XCP_PLATFORM_DEVICE]; > +static struct mutex xcp_mutex; I think this needs to be static DEFINE_MUTEX(xcp_mutex). > > int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) > { > struct platform_device *pdev; > struct xcp_device *pxcp_dev; > char dev_name[20]; > - int ret; > + int ret, index; > > + mutex_lock(&xcp_mutex); > + ret = -ENODEV; Preference would be do this inside the below if() to associate the error with the condition. > if (pdev_num >= MAX_XCP_PLATFORM_DEVICE) > - return -ENODEV; > + goto out_unlock; > > - snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", pdev_num); > + for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) { > + if (!xcp_dev[index]) > + break; > + } > + > + snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", index); > pdev = platform_device_register_simple(dev_name, -1, NULL, 0); > if (IS_ERR(pdev)) > return PTR_ERR(pdev); Seems mutex is left locked here. > @@ -72,10 +80,11 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) > goto out_devres; > } > > - xcp_dev[pdev_num] = pxcp_dev; > - xcp_dev[pdev_num]->pdev = pdev; > + xcp_dev[index] = pxcp_dev; > + xcp_dev[index]->pdev = pdev; > *ddev = &pxcp_dev->drm; > pdev_num++; > + mutex_unlock(&xcp_mutex); > > return 0; > > @@ -83,21 +92,58 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) > devres_release_group(&pdev->dev, NULL); > out_unregister: > platform_device_unregister(pdev); > +out_unlock: > + mutex_unlock(&xcp_mutex); > > return ret; > } > EXPORT_SYMBOL(amdgpu_xcp_drm_dev_alloc); > > -void amdgpu_xcp_drv_release(void) > +static void amdgpu_xcp_drm_dev_destroy(int index) > +{ > + struct platform_device *pdev; > + > + pdev = xcp_dev[index]->pdev; > + devres_release_group(&pdev->dev, NULL); > + platform_device_unregister(pdev); > + xcp_dev[index] = NULL; > + pdev_num--; > +} > + > +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev) > { > - for (--pdev_num; pdev_num >= 0; --pdev_num) { > - struct platform_device *pdev = xcp_dev[pdev_num]->pdev; > + int index; > + struct xcp_device *pxcp_dev; > + > + if (ddev == NULL) > + return; > > - devres_release_group(&pdev->dev, NULL); > - platform_device_unregister(pdev); > - xcp_dev[pdev_num] = NULL; > + pxcp_dev = container_of(ddev, struct xcp_device, drm); > + > + mutex_lock(&xcp_mutex); > + for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) { > + if (xcp_dev[index] == pxcp_dev) { > + amdgpu_xcp_drm_dev_destroy(index); > + break; > + } > + } > + mutex_unlock(&xcp_mutex); > +} > +EXPORT_SYMBOL(amdgpu_xcp_drm_dev_free); > + > +void amdgpu_xcp_drv_release(void) > +{ > + int index; > + > + mutex_lock(&xcp_mutex); > + for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) { > + if (xcp_dev[index]) { > + WARN_ON(xcp_dev[index]); Why is this WARN check needed? There is already a if() check for valid index. Also, would suggest to separate out amdgpu_xcp.c from xcp_drv.c. xcp_drv introducing a new interface may be kept in a separate patch. Thanks, Lijo > + amdgpu_xcp_drm_dev_destroy(index); > + } > } > - pdev_num = 0; > + WARN_ON(pdev_num != 0); > + mutex_unlock(&xcp_mutex); > } > EXPORT_SYMBOL(amdgpu_xcp_drv_release); > > diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h > index c1c4b679bf95..580a1602c8e3 100644 > --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h > +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h > @@ -25,5 +25,6 @@ > #define _AMDGPU_XCP_DRV_H_ > > int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev); > +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev); > void amdgpu_xcp_drv_release(void); > #endif /* _AMDGPU_XCP_DRV_H_ */