On 8/7/2023 10:20, Alex Deucher wrote:
On Tue, Aug 1, 2023 at 4:15 PM Mario Limonciello
<mario.limonciello@xxxxxxx> wrote:
Accessing the blob for amdgpu discovery from debugfs triggers:
[ 1924.487667] kernel BUG at mm/usercopy.c:102!
usercopy_abort() explains that it needs to be solved by creating
a cache to store the data.
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2748#note_2023519
Signed-off-by: Mario Limonciello <mario.limonciello@xxxxxxx>
Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx>
Although this avoids the issue; I've concluded it's an inappropriate fix
and will abandon it. It turns out to spit out 64k discovery blobs that
should have been 8k.
6.5-rc and ASDN already picked up a better solution.
db3b5cb64a9c ("drm/amdgpu: Use apt name for FW reserved region")
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 17 ++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++
3 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a3b86b86dc477..66a2251bdeba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -791,8 +791,11 @@ struct amdgpu_device {
bool accel_working;
struct notifier_block acpi_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
- struct debugfs_blob_wrapper debugfs_vbios_blob;
- struct debugfs_blob_wrapper debugfs_discovery_blob;
+#if defined(CONFIG_DEBUG_FS)
+ struct debugfs_blob_wrapper debugfs_vbios_blob;
+ struct debugfs_blob_wrapper debugfs_discovery_blob;
+ struct kmem_cache *discovery_blob_cache;
+#endif
struct mutex srbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
struct mutex grbm_idx_mutex;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 56e89e76ff179..55ea5be14b188 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2180,7 +2180,15 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
debugfs_create_blob("amdgpu_vbios", 0444, root,
&adev->debugfs_vbios_blob);
- adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
+
+ adev->discovery_blob_cache = kmem_cache_create_usercopy("amdgpu_discovery",
+ adev->mman.discovery_tmr_size,
+ 0, 0, 0,
+ adev->mman.discovery_tmr_size,
+ NULL);
+ adev->debugfs_discovery_blob.data = kmem_cache_alloc(adev->discovery_blob_cache, GFP_KERNEL);
+ memcpy(adev->debugfs_discovery_blob.data, adev->mman.discovery_bin,
+ adev->mman.discovery_tmr_size);
adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
debugfs_create_blob("amdgpu_discovery", 0444, root,
&adev->debugfs_discovery_blob);
@@ -2188,6 +2196,12 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
return 0;
}
+void amdgpu_debugfs_fini(struct amdgpu_device *adev)
+{
+ kmem_cache_free(adev->discovery_blob_cache, adev->debugfs_discovery_blob.data);
+ kmem_cache_destroy(adev->discovery_blob_cache);
+}
+
#else
int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
@@ -2197,4 +2211,5 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
return 0;
}
+inline void amdgpu_debugfs_fini(struct amdgpu_device *adev) {}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0593ef8fe0a63..1a3b30dff5171 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2276,6 +2276,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
+ amdgpu_debugfs_fini(adev);
+
amdgpu_xcp_dev_unplug(adev);
drm_dev_unplug(dev);
--
2.34.1