[AMD Official Use Only - AMD Internal Distribution Only] Hi Sunil Khatri, -----Original Message----- From: Khatri, Sunil <Sunil.Khatri@xxxxxxx> Sent: Monday, November 11, 2024 2:47 PM To: Zhang, Jesse(Jie) <Jesse.Zhang@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx>; Prosyak, Vitaly <Vitaly.Prosyak@xxxxxxx>; Huang, Tim <Tim.Huang@xxxxxxx> Subject: Re: [PATCH] drm/amdgpu: fix double free vcn ip_dump This isnt needed any more as the issue is introduced with per ip changes in VCN. Changes to accomodate per ip changes for ip dump is already reviewed and in progress to be merged. With that each IP of VCN have it own memory which is freed only once. We dont need this change anymore as we need per vcn ip/instance memory which is a design change and is taken care with changes in merge pipeline. Thanks, I see your patch and will drop it. Regards Jesse Regards Sunil Khatri On 11/11/2024 12:00 PM, Jesse.zhang@xxxxxxx wrote: > [ 90.441868] ------------[ cut here ]------------ > [ 90.441873] kernel BUG at mm/slub.c:553! > [ 90.441885] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI > [ 90.441892] CPU: 0 PID: 1523 Comm: amd_pci_unplug Tainted: G E 6.10.0+ #47 > [ 90.441900] Hardware name: AMD Splinter/Splinter-PHX2, BIOS TS41102C_925 01/05/2024 > [ 90.441907] RIP: 0010:__slab_free+0x1ce/0x320 > [ 90.441916] Code: f7 c3 00 02 00 00 0f 84 6c ff ff ff fb 0f 1f 44 00 00 e9 61 ff ff ff 41 f7 46 08 87 04 00 00 0f 85 d6 fe ff ff e9 ca fe ff ff <0f> 0b 49 3b 5c 24 28 75 bd 48 8b 44 24 28 49 89 4c 24 28 ba 01 00 > [ 90.441927] RSP: 0018:ffffb9c801cefcb0 EFLAGS: 00010246 > [ 90.441934] RAX: ffff8cdb481dcf00 RBX: 0000000000200012 RCX: ffff8cdb481dce00 > [ 90.441940] RDX: ffff8cdb481dce00 RSI: ffffe3f904207700 RDI: ffff8cdb40042a00 > [ 90.441945] RBP: ffffb9c801cefd50 R08: 0000000000000001 R09: ffffffffc149f632 > [ 90.441950] R10: ffff8cdb481dce00 R11: ffff8ce26e621e18 R12: ffffe3f904207700 > [ 90.441956] R13: ffff8cdb481dce00 R14: ffff8cdb40042a00 R15: ffff8cdb481dce00 > [ 90.441962] FS: 00007f0a4f3fec40(0000) GS:ffff8ce26e600000(0000) knlGS:0000000000000000 > [ 90.441968] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 90.441974] CR2: 000055bf74ba8930 CR3: 0000000108f48000 CR4: 0000000000750ef0 > [ 90.441979] PKRU: 55555554 > [ 90.441983] Call Trace: > [ 90.441987] <TASK> > [ 90.441991] ? show_regs+0x6b/0x80 > [ 90.441999] ? __die_body+0x24/0x70 > [ 90.442005] ? die+0x42/0x70 > [ 90.442011] ? do_trap+0xda/0xf0 > [ 90.442018] ? do_error_trap+0x76/0xa0 > [ 90.442023] ? __slab_free+0x1ce/0x320 > [ 90.442030] ? exc_invalid_op+0x57/0x80 > [ 90.442036] ? __slab_free+0x1ce/0x320 > [ 90.442042] ? asm_exc_invalid_op+0x1f/0x30 > [ 90.442053] ? vcn_v4_0_sw_fini+0xc2/0x110 [amdgpu] > [ 90.442308] ? __slab_free+0x1ce/0x320 > [ 90.442316] ? release_firmware.part.0+0x2e/0x50 > [ 90.442323] ? srso_alias_return_thunk+0x5/0xfbef5 > [ 90.442332] ? srso_alias_return_thunk+0x5/0xfbef5 > [ 90.442338] ? vcn_v4_0_sw_fini+0xc2/0x110 [amdgpu] > [ 90.442496] kfree+0x23e/0x2f0 > [ 90.442502] vcn_v4_0_sw_fini+0xc2/0x110 [amdgpu] > [ 90.442653] amdgpu_device_fini_sw+0x133/0x700 [amdgpu] > [ 90.442835] amdgpu_driver_release_kms+0x1a/0x30 [amdgpu] > [ 90.442981] drm_dev_release+0x2d/0x50 [drm] > [ 90.443003] drm_minor_release+0x3d/0x60 [drm] > [ 90.443026] drm_release+0x90/0xd0 [drm] > [ 90.443052] __fput+0xfa/0x2f0 > [ 90.443059] __fput_sync+0x1e/0x30 > [ 90.443064] __x64_sys_close+0x42/0x90 > [ 90.443071] x64_sys_call+0x18f6/0x20d0 > [ 90.443076] do_syscall_64+0x6f/0x110 > [ 90.443083] ? do_syscall_64+0x7b/0x110 > [ 90.443089] entry_SYSCALL_64_after_hwframe+0x76/0x7e > [ 90.443096] RIP: 0033:0x7f0a51514f67 > [ 90.443102] Code: ff e8 0d 16 02 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 73 ba f7 ff > [ 90.443113] RSP: 002b:00007ffec29b16e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 > [ 90.443121] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0a51514f67 > [ 90.443127] RDX: 0000000000000001 RSI: 00007f0a51776485 RDI: 0000000000000003 > [ 90.443132] RBP: 00007ffec29b1770 R08: 000055f942381170 R09: 0000000000000000 > [ 90.443138] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 > [ 90.443143] R13: 000055f90a2a41c0 R14: 0000000000000000 R15: 0000000000000001 > [ 90.443152] </TASK> > > Set ip_dump to null after releasing vcn ip_dump. > > Signed-off-by: Jesse Zhang <jesse.zhang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 5 ++++- > drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 5 ++++- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 5 ++++- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 5 ++++- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 5 ++++- > drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 5 ++++- > 6 files changed, 24 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > index a327c3bf84f2..b23b55539b43 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c > @@ -263,7 +263,10 @@ static int vcn_v2_0_sw_fini(struct > amdgpu_ip_block *ip_block) > > r = amdgpu_vcn_sw_fini(adev, inst); > > - kfree(adev->vcn.ip_dump); > + if (adev->vcn.ip_dump) { > + kfree(adev->vcn.ip_dump); > + adev->vcn.ip_dump = NULL; > + } > > return r; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > index b78c6da0a3cd..df3855e7b5c1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c > @@ -331,7 +331,10 @@ static int vcn_v3_0_sw_fini(struct > amdgpu_ip_block *ip_block) > > r = amdgpu_vcn_sw_fini(adev, inst); > > - kfree(adev->vcn.ip_dump); > + if (adev->vcn.ip_dump) { > + kfree(adev->vcn.ip_dump); > + adev->vcn.ip_dump = NULL; > + } > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > index 5c3b718ebdfa..d8b46f440ba5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c > @@ -295,7 +295,10 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) > amdgpu_vcn_sysfs_reset_mask_fini(adev); > r = amdgpu_vcn_sw_fini(adev, inst); > > - kfree(adev->vcn.ip_dump); > + if (adev->vcn.ip_dump) { > + kfree(adev->vcn.ip_dump); > + adev->vcn.ip_dump = NULL; > + } > > return r; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > index aa06b2fdeb7a..c71793431433 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > @@ -257,7 +257,10 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) > amdgpu_vcn_sysfs_reset_mask_fini(adev); > r = amdgpu_vcn_sw_fini(adev, inst); > > - kfree(adev->vcn.ip_dump); > + if (adev->vcn.ip_dump) { > + kfree(adev->vcn.ip_dump); > + adev->vcn.ip_dump = NULL; > + } > > return r; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c > index 4e7da56a9f34..4c4b8a711b99 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c > @@ -252,7 +252,10 @@ static int vcn_v4_0_5_sw_fini(struct > amdgpu_ip_block *ip_block) > > r = amdgpu_vcn_sw_fini(adev, inst); > > - kfree(adev->vcn.ip_dump); > + if (adev->vcn.ip_dump) { > + kfree(adev->vcn.ip_dump); > + adev->vcn.ip_dump = NULL; > + } > > return r; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c > b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c > index a72de204f130..71f2cb75e91f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c > @@ -226,7 +226,10 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) > amdgpu_vcn_sysfs_reset_mask_fini(adev); > r = amdgpu_vcn_sw_fini(adev, inst); > > - kfree(adev->vcn.ip_dump); > + if (adev->vcn.ip_dump) { > + kfree(adev->vcn.ip_dump); > + adev->vcn.ip_dump = NULL; > + } > > return r; > }