Re: [PATCH] drm/amdgpu: Implement WQ_MEM_RECLAIM workqueue for Process Isolation

Christian König <christian.koenig@xxxxxxx> · Mon, 17 Mar 2025 15:06:56 +0100

Am 17.03.25 um 02:22 schrieb Srinivasan Shanmugam:
> The `amdgpu_enforce_isolation_wq` is allocated with the WQ_MEM_RECLAIM
> flag to mitigate workqueue flushing related to memory reclamation & to
> ensure proper memory handling during deferred work execution.
>
> Fixes the below:
>
> [ 2333.852549] workqueue: WQ_MEM_RECLAIM gfx_0.0.0:drm_sched_run_job_work [gpu_sched] is flushing !WQ_MEM_RECLAIM events:amdgpu_gfx_enforce_isolation_handler [amdgpu]
> [ 2333.853008] WARNING: CPU: 10 PID: 6250 at kernel/workqueue.c:3704 check_flush_dependency+0x124/0x130
> [ 2333.853020] Modules linked in: amdgpu(OE) amdxcp drm_exec gpu_sched drm_buddy drm_ttm_helper ttm drm_suballoc_helper drm_client_lib drm_display_helper cec rc_core drm_kms_helper rfcomm nf_conntrack_netlink xfrm_user xfrm_algo xt_addrtype br_netfilter xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables libcrc32c nfnetlink bridge stp llc overlay cmac algif_hash algif_skcipher af_alg bnep intel_rapl_msr amd_atl intel_rapl_common snd_hda_codec_realtek binfmt_misc snd_hda_codec_generic snd_hda_scodec_component snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi edac_mce_amd snd_hda_codec snd_hda_core snd_hwdep kvm_amd snd_pcm kvm iwlmvm crct10dif_pclmul snd_seq_midi polyval_clmulni nls_iso8859_1 snd_seq_midi_event mac80211 polyval_generic snd_rawmidi ghash_clmulni_intel libarc4 sha512_ssse3 joydev sha256_ssse3 snd_seq btusb sha1_ssse3 snd_seq_device aesni_intel input_leds btrtl crypto_simd cryptd
> [ 2333.853178]  btintel snd_timer iwlwifi rapl btbcm btmtk gigabyte_wmi mxm_wmi snd wmi_bmof k10temp bluetooth ccp cfg80211 soundcore mac_hid sch_fq_codel msr parport_pc nfsd ppdev lp parport auth_rpcgss nfs_acl drm lockd grace efi_pstore sunrpc ip_tables x_tables autofs4 hid_generic crc32_pclmul nvme i2c_piix4 usbhid i2c_smbus hid nvme_core ahci igb libahci dca i2c_algo_bit video wmi
> [ 2333.853274] CPU: 10 UID: 0 PID: 6250 Comm: kworker/u48:2 Tainted: G     U  W  OE      6.12.0-amdrelease6dot4forleftoverlocals #27
> [ 2333.853282] Tainted: [U]=USER, [W]=WARN, [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
> [ 2333.853286] Hardware name: Gigabyte Technology Co., Ltd. X570 AORUS PRO WIFI/X570 AORUS PRO WIFI, BIOS F36a 02/16/2022
> [ 2333.853291] Workqueue: gfx_0.0.0 drm_sched_run_job_work [gpu_sched]
> [ 2333.853302] RIP: 0010:check_flush_dependency+0x124/0x130
> [ 2333.853307] Code: 55 18 4d 89 e0 48 8d 8b 90 01 00 00 48 c7 c7 10 98 3e 8e c6 05 9e 30 7b 02 01 48 8b 70 08 48 81 c6 90 01 00 00 e8 9c 21 fd ff <0f> 0b e9 ff fe ff ff e9 98 d9 0c 01 90 90 90 90 90 90 90 90 90 90
> [ 2333.853313] RSP: 0018:ffff9fdd82c07ac0 EFLAGS: 00010086
> [ 2333.853319] RAX: 0000000000000000 RBX: ffff89b840050e00 RCX: 0000000000000027
> [ 2333.853323] RDX: ffff89bb6dbf1a88 RSI: 0000000000000001 RDI: ffff89bb6dbf1a80
> [ 2333.853327] RBP: ffff9fdd82c07ae8 R08: 0000000000000003 R09: 0000000000000001
> [ 2333.853331] R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffc1864470
> [ 2333.853335] R13: ffff89b856e58d80 R14: 0000000000000000 R15: ffff89bb6d808900
> [ 2333.853340] FS:  0000000000000000(0000) GS:ffff89bb6da00000(0000) knlGS:0000000000000000
> [ 2333.853344] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 2333.853348] CR2: 00005613aff86098 CR3: 0000000337e5c000 CR4: 0000000000350ef0
> [ 2333.853353] Call Trace:
> [ 2333.853358]  <TASK>
> [ 2333.853365]  ? show_regs+0x69/0x80
> [ 2333.853373]  ? __warn+0x93/0x1a0
> [ 2333.853382]  ? check_flush_dependency+0x124/0x130
> [ 2333.853389]  ? report_bug+0x18f/0x1a0
> [ 2333.853399]  ? handle_bug+0x63/0xa0
> [ 2333.853407]  ? exc_invalid_op+0x19/0x70
> [ 2333.853414]  ? asm_exc_invalid_op+0x1b/0x20
> [ 2333.853422]  ? __pfx_amdgpu_gfx_enforce_isolation_handler+0x10/0x10 [amdgpu]
> [ 2333.853749]  ? check_flush_dependency+0x124/0x130
> [ 2333.853759]  __flush_work+0xee/0x600
> [ 2333.853766]  ? srso_return_thunk+0x5/0x5f
> [ 2333.853778]  ? srso_return_thunk+0x5/0x5f
> [ 2333.853783]  ? __mutex_lock+0xc08/0xe20
> [ 2333.853792]  ? srso_return_thunk+0x5/0x5f
> [ 2333.853798]  ? trace_hardirqs_on+0x1e/0xd0
> [ 2333.853804]  ? srso_return_thunk+0x5/0x5f
> [ 2333.853815]  cancel_delayed_work_sync+0x71/0x80

That cancel_delayed_work_sync is triggering this warning was a bug in the upstream code.

Question is if we ever call flush_work() on the work item?

Regards,
Christian.

> [ 2333.853823]  amdgpu_gfx_kfd_sch_ctrl+0x14f/0x290 [amdgpu]
> [ 2333.854090]  amdgpu_gfx_enforce_isolation_ring_begin_use+0x1d4/0x3e0 [amdgpu]
> [ 2333.854347]  ? cancel_delayed_work_sync+0x4f/0x80
> [ 2333.854356]  gfx_v12_0_ring_begin_use+0x1b/0x30 [amdgpu]
> [ 2333.854618]  amdgpu_ring_alloc+0x48/0x70 [amdgpu]
> [ 2333.854854]  amdgpu_ib_schedule+0x16f/0x8a0 [amdgpu]
> [ 2333.855098]  ? srso_return_thunk+0x5/0x5f
> [ 2333.855105]  amdgpu_job_run+0xad/0x260 [amdgpu]
> [ 2333.855405]  drm_sched_run_job_work+0x258/0x440 [gpu_sched]
> [ 2333.855415]  process_one_work+0x21e/0x680
> [ 2333.855427]  worker_thread+0x190/0x330
> [ 2333.855434]  ? __pfx_worker_thread+0x10/0x10
> [ 2333.855439]  kthread+0xe7/0x120
> [ 2333.855444]  ? __pfx_kthread+0x10/0x10
> [ 2333.855450]  ret_from_fork+0x3c/0x60
> [ 2333.855455]  ? __pfx_kthread+0x10/0x10
> [ 2333.855460]  ret_from_fork_asm+0x1a/0x30
> [ 2333.855474]  </TASK>
> [ 2333.855477] irq event stamp: 103430
> [ 2333.855480] hardirqs last  enabled at (103429): [<ffffffff8da9237a>] irqentry_exit+0x3a/0x90
> [ 2333.855486] hardirqs last disabled at (103430): [<ffffffff8da99e54>] __schedule+0xf84/0x1b00
> [ 2333.855490] softirqs last  enabled at (85926): [<ffffffff8c8fdabd>] __irq_exit_rcu+0x7d/0xa0
> [ 2333.855495] softirqs last disabled at (85919): [<ffffffff8c8fdabd>] __irq_exit_rcu+0x7d/0xa0
>
> Fixes: afefd6f24502 ("drm/amdgpu: Implement Enforce Isolation Handler for KGD/KFD serialization")
> Cc: Christian König <christian.koenig@xxxxxxx>
> Cc: Alex Deucher <alexander.deucher@xxxxxxx>
> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 10 ++++++----
>  3 files changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 87062c1adcdf..cbd31c164f9f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -123,6 +123,8 @@
>  
>  #define GFX_SLICE_PERIOD_MS		250
>  
> +extern struct workqueue_struct *amdgpu_enforce_isolation_wq;
> +
>  struct amdgpu_gpu_instance {
>  	struct amdgpu_device		*adev;
>  	int				mgpu_fan_enabled;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 7ca2ebdd3c95..562304d703a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -105,6 +105,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
>  #define AMDGPU_VBIOS_SKIP (1U << 0)
>  #define AMDGPU_VBIOS_OPTIONAL (1U << 1)
>  
> +struct workqueue_struct *amdgpu_enforce_isolation_wq;
> +
>  static const struct drm_driver amdgpu_kms_driver;
>  
>  const char *amdgpu_asic_name[] = {
> @@ -4323,6 +4325,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>  		adev->gfx.enforce_isolation[i].xcp_id = i;
>  	}
>  
> +	/* Allocate the enforce isolation workqueue with WQ_MEM_RECLAIM */
> +	amdgpu_enforce_isolation_wq = alloc_workqueue("amdgpu_enforce_isolation_wq",
> +						      WQ_MEM_RECLAIM, 0);
> +	if (!amdgpu_enforce_isolation_wq) {
> +		dev_err(adev->dev, "Failed to allocate enforcement isolation workqueue\n");
> +		return -ENOMEM;
> +	}
> +
>  	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>  
>  	adev->gfx.gfx_off_req_count = 1;
> @@ -4821,6 +4831,13 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
>  	if (adev->mman.discovery_bin)
>  		amdgpu_discovery_fini(adev);
>  
> +	/* Clean up the enforcement isolation workqueue */
> +	if (amdgpu_enforce_isolation_wq) {
> +		flush_workqueue(amdgpu_enforce_isolation_wq);
> +		destroy_workqueue(amdgpu_enforce_isolation_wq);
> +		amdgpu_enforce_isolation_wq = NULL;
> +	}
> +
>  	amdgpu_reset_put_reset_domain(adev->reset_domain);
>  	adev->reset_domain = NULL;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 984e6ff6e463..0dabffe395bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -1938,8 +1938,9 @@ static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
>  
>  		if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
>  		    adev->gfx.kfd_sch_inactive[idx]) {
> -			schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
> -					      msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
> +			queue_delayed_work(amdgpu_enforce_isolation_wq,
> +					   &adev->gfx.enforce_isolation[idx].work,
> +					   msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
>  		}
>  	} else {
>  		if (adev->gfx.kfd_sch_req_count[idx] == 0) {
> @@ -1995,8 +1996,9 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
>  	}
>  	if (fences) {
>  		/* we've already had our timeslice, so let's wrap this up */
> -		schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
> -				      msecs_to_jiffies(1));
> +		queue_delayed_work(amdgpu_enforce_isolation_wq,
> +				   &adev->gfx.enforce_isolation[idx].work,
> +				   msecs_to_jiffies(1));
>  	} else {
>  		/* Tell KFD to resume the runqueue */
>  		if (adev->kfd.init_complete) {