On 1/5/2025 8:15 AM, Jiang Liu wrote:Introduce new interface amdgpu_xcp_drm_dev_free() to free a specific drm_device crreated by amdgpu_xcp_drm_dev_alloc(), which will be used to do error recovery.
Signed-off-by: Jiang Liu <gerry@xxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 11 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 1 + drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c | 70 +++++++++++++++++---- drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h | 1 + 4 files changed, 70 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index e209b5e101df..401fbaa0b6b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -359,6 +359,8 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev, ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data); if (ret) return ret; + + adev->xcp_mgr->xcp[i].registered = true; }
return 0; @@ -376,12 +378,19 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev) if (!adev->xcp_mgr->xcp[i].ddev) break;
+ // Restore and free the original drm_device. p_ddev = adev->xcp_mgr->xcp[i].ddev; - drm_dev_unplug(p_ddev); + if (adev->xcp_mgr->xcp[i].registered) { + drm_dev_unplug(p_ddev); + adev->xcp_mgr->xcp[i].registered = false; + } p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev; p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev; p_ddev->driver = adev->xcp_mgr->xcp[i].driver; p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager; + amdgpu_xcp_drm_dev_free(p_ddev); + + adev->xcp_mgr->xcp[i].ddev = NULL; } }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index b63f53242c57..cd06a4a232be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -101,6 +101,7 @@ struct amdgpu_xcp { uint8_t id; uint8_t mem_id; bool valid; + bool registered; atomic_t ref_cnt; struct drm_device *ddev; struct drm_device *rdev; diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c index faed84172dd4..9058d71b4756 100644 --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c @@ -45,18 +45,26 @@ static const struct drm_driver amdgpu_xcp_driver = {
static int8_t pdev_num; static struct xcp_device *xcp_dev[MAX_XCP_PLATFORM_DEVICE]; +static struct mutex xcp_mutex;
I think this needs to be static DEFINE_MUTEX(xcp_mutex). int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) { struct platform_device *pdev; struct xcp_device *pxcp_dev; char dev_name[20]; - int ret; + int ret, index;
+ mutex_lock(&xcp_mutex); + ret = -ENODEV;
Preference would be do this inside the below if() to associate the errorwith the condition. if (pdev_num >= MAX_XCP_PLATFORM_DEVICE) - return -ENODEV; + goto out_unlock;
- snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", pdev_num); + for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) { + if (!xcp_dev[index]) + break; + } + + snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", index); pdev = platform_device_register_simple(dev_name, -1, NULL, 0); if (IS_ERR(pdev)) return PTR_ERR(pdev);
Seems mutex is left locked here.
Will fixed in next version:) @@ -72,10 +80,11 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) goto out_devres; }
- xcp_dev[pdev_num] = pxcp_dev; - xcp_dev[pdev_num]->pdev = pdev; + xcp_dev[index] = pxcp_dev; + xcp_dev[index]->pdev = pdev; *ddev = &pxcp_dev->drm; pdev_num++; + mutex_unlock(&xcp_mutex);
return 0;
@@ -83,21 +92,58 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) devres_release_group(&pdev->dev, NULL); out_unregister: platform_device_unregister(pdev); +out_unlock: + mutex_unlock(&xcp_mutex);
return ret; } EXPORT_SYMBOL(amdgpu_xcp_drm_dev_alloc);
-void amdgpu_xcp_drv_release(void) +static void amdgpu_xcp_drm_dev_destroy(int index) +{ + struct platform_device *pdev; + + pdev = xcp_dev[index]->pdev; + devres_release_group(&pdev->dev, NULL); + platform_device_unregister(pdev); + xcp_dev[index] = NULL; + pdev_num--; +} + +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev) { - for (--pdev_num; pdev_num >= 0; --pdev_num) { - struct platform_device *pdev = xcp_dev[pdev_num]->pdev; + int index; + struct xcp_device *pxcp_dev; + + if (ddev == NULL) + return;
- devres_release_group(&pdev->dev, NULL); - platform_device_unregister(pdev); - xcp_dev[pdev_num] = NULL; + pxcp_dev = container_of(ddev, struct xcp_device, drm); + + mutex_lock(&xcp_mutex); + for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) { + if (xcp_dev[index] == pxcp_dev) { + amdgpu_xcp_drm_dev_destroy(index); + break; + } + } + mutex_unlock(&xcp_mutex); +} +EXPORT_SYMBOL(amdgpu_xcp_drm_dev_free); + +void amdgpu_xcp_drv_release(void) +{ + int index; + + mutex_lock(&xcp_mutex); + for (index = 0; index < MAX_XCP_PLATFORM_DEVICE; index++) { + if (xcp_dev[index]) { + WARN_ON(xcp_dev[index]);
Why is this WARN check needed? There is already a if() check for validindex.Also, would suggest to separate out amdgpu_xcp.c from xcp_drv.c. xcp_drvintroducing a new interface may be kept in a separate patch.
With new implementation, all xcp devices should have already be removed when amdgpu_xcp_drv_release() gets called, So hope to verify whether it works as expected.
Thanks! Thanks,Lijo+ amdgpu_xcp_drm_dev_destroy(index); + } } - pdev_num = 0; + WARN_ON(pdev_num != 0); + mutex_unlock(&xcp_mutex); } EXPORT_SYMBOL(amdgpu_xcp_drv_release);
diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h index c1c4b679bf95..580a1602c8e3 100644 --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h @@ -25,5 +25,6 @@ #define _AMDGPU_XCP_DRV_H_
int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev); +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev); void amdgpu_xcp_drv_release(void); #endif /* _AMDGPU_XCP_DRV_H_ */
|