Am 21.08.2018 um 23:23 schrieb Andrey Grodzovsky: > Problem: > When executing echo 1 > /sys/class/drm/card0/device/remove kasan warning > as bellow and page fault happen because adev->gart.pages already freed by the > time amdgpu_gart_unbind is called. > > BUG: KASAN: user-memory-access in amdgpu_gart_unbind+0x98/0x180 [amdgpu] > Write of size 8 at addr 0000000000003648 by task bash/1828 > CPU: 2 PID: 1828 Comm: bash Tainted: G W O 4.18.0-rc1-dev+ #29 > Hardware name: Gigabyte Technology Co., Ltd. AX370-Gaming/AX370-Gaming-CF, BIOS F3 06/19/2017 > Call Trace: > dump_stack+0x71/0xab > kasan_report+0x109/0x390 > amdgpu_gart_unbind+0x98/0x180 [amdgpu] > ttm_tt_unbind+0x43/0x60 [ttm] > ttm_bo_move_ttm+0x83/0x1c0 [ttm] > ttm_bo_handle_move_mem+0xb97/0xd00 [ttm] > ttm_bo_evict+0x273/0x530 [ttm] > ttm_mem_evict_first+0x29c/0x360 [ttm] > ttm_bo_force_list_clean+0xfc/0x210 [ttm] > ttm_bo_clean_mm+0xe7/0x160 [ttm] > amdgpu_ttm_fini+0xda/0x1d0 [amdgpu] > amdgpu_bo_fini+0xf/0x60 [amdgpu] > gmc_v8_0_sw_fini+0x36/0x70 [amdgpu] > amdgpu_device_fini+0x2d0/0x7d0 [amdgpu] > amdgpu_driver_unload_kms+0x6a/0xd0 [amdgpu] > drm_dev_unregister+0x79/0x180 [drm] > amdgpu_pci_remove+0x2a/0x60 [amdgpu] > pci_device_remove+0x5b/0x100 > device_release_driver_internal+0x236/0x360 > pci_stop_bus_device+0xbf/0xf0 > pci_stop_and_remove_bus_device_locked+0x16/0x30 > remove_store+0xda/0xf0 > kernfs_fop_write+0x186/0x220 > __vfs_write+0xcc/0x330 > vfs_write+0xe6/0x250 > ksys_write+0xb1/0x140 > do_syscall_64+0x77/0x1e0 > entry_SYSCALL_64_after_hwframe+0x44/0xa9 > RIP: 0033:0x7f66ebbb32c0 > > Fix: > Split gmc_v{6,7,8,9}_0_gart_fini to pospone amdgpu_gart_fini to after > memory managers are shut down since gart unbind happens > as part of this procudure. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> > --- > 1 | 0 > drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 9 ++------- > drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 16 ++-------------- > drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 16 ++-------------- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++-------------- > 5 files changed, 8 insertions(+), 49 deletions(-) > create mode 100644 1 > > diff --git a/1 b/1 > new file mode 100644 > index 0000000..e69de29 Good cleanup, but what the heck is that? Christian. > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > index c14cf1c..0a0a4dc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > @@ -633,12 +633,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev) > amdgpu_gart_table_vram_unpin(adev); > } > > -static void gmc_v6_0_gart_fini(struct amdgpu_device *adev) > -{ > - amdgpu_gart_table_vram_free(adev); > - amdgpu_gart_fini(adev); > -} > - > static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, > u32 status, u32 addr, u32 mc_client) > { > @@ -936,8 +930,9 @@ static int gmc_v6_0_sw_fini(void *handle) > > amdgpu_gem_force_release(adev); > amdgpu_vm_manager_fini(adev); > - gmc_v6_0_gart_fini(adev); > + amdgpu_gart_table_vram_free(adev); > amdgpu_bo_fini(adev); > + amdgpu_gart_fini(adev); > release_firmware(adev->gmc.fw); > adev->gmc.fw = NULL; > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > index 0c3a161..afbadfc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > @@ -750,19 +750,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev) > } > > /** > - * gmc_v7_0_gart_fini - vm fini callback > - * > - * @adev: amdgpu_device pointer > - * > - * Tears down the driver GART/VM setup (CIK). > - */ > -static void gmc_v7_0_gart_fini(struct amdgpu_device *adev) > -{ > - amdgpu_gart_table_vram_free(adev); > - amdgpu_gart_fini(adev); > -} > - > -/** > * gmc_v7_0_vm_decode_fault - print human readable fault info > * > * @adev: amdgpu_device pointer > @@ -1091,8 +1078,9 @@ static int gmc_v7_0_sw_fini(void *handle) > > amdgpu_gem_force_release(adev); > amdgpu_vm_manager_fini(adev); > - gmc_v7_0_gart_fini(adev); > + amdgpu_gart_table_vram_free(adev); > amdgpu_bo_fini(adev); > + amdgpu_gart_fini(adev); > release_firmware(adev->gmc.fw); > adev->gmc.fw = NULL; > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > index 274c932..d871dae 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > @@ -969,19 +969,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) > } > > /** > - * gmc_v8_0_gart_fini - vm fini callback > - * > - * @adev: amdgpu_device pointer > - * > - * Tears down the driver GART/VM setup (CIK). > - */ > -static void gmc_v8_0_gart_fini(struct amdgpu_device *adev) > -{ > - amdgpu_gart_table_vram_free(adev); > - amdgpu_gart_fini(adev); > -} > - > -/** > * gmc_v8_0_vm_decode_fault - print human readable fault info > * > * @adev: amdgpu_device pointer > @@ -1192,8 +1179,9 @@ static int gmc_v8_0_sw_fini(void *handle) > > amdgpu_gem_force_release(adev); > amdgpu_vm_manager_fini(adev); > - gmc_v8_0_gart_fini(adev); > + amdgpu_gart_table_vram_free(adev); > amdgpu_bo_fini(adev); > + amdgpu_gart_fini(adev); > release_firmware(adev->gmc.fw); > adev->gmc.fw = NULL; > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index 0bf8439..46183c7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -1003,26 +1003,12 @@ static int gmc_v9_0_sw_init(void *handle) > return 0; > } > > -/** > - * gmc_v9_0_gart_fini - vm fini callback > - * > - * @adev: amdgpu_device pointer > - * > - * Tears down the driver GART/VM setup (CIK). > - */ > -static void gmc_v9_0_gart_fini(struct amdgpu_device *adev) > -{ > - amdgpu_gart_table_vram_free(adev); > - amdgpu_gart_fini(adev); > -} > - > static int gmc_v9_0_sw_fini(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > amdgpu_gem_force_release(adev); > amdgpu_vm_manager_fini(adev); > - gmc_v9_0_gart_fini(adev); > > /* > * TODO: > @@ -1035,7 +1021,9 @@ static int gmc_v9_0_sw_fini(void *handle) > */ > amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); > > + amdgpu_gart_table_vram_free(adev); > amdgpu_bo_fini(adev); > + amdgpu_gart_fini(adev); > > return 0; > }