On 2023-11-10 14:08, Yat Sin, David
wrote:
[JZ]kfd_pc_sample_release is not called from kfd_ioctl_pc_sample. It is in process quit process.[AMD Official Use Only - General]-----Original Message----- From: Zhu, James <James.Zhu@xxxxxxx> Sent: Friday, November 3, 2023 9:12 AM To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx>; Greathouse, Joseph <Joseph.Greathouse@xxxxxxx>; Yat Sin, David <David.YatSin@xxxxxxx>; Zhu, James <James.Zhu@xxxxxxx> Subject: [PATCH 22/24] drm/amdkfd: add pc sampling release when process release Add pc sampling release when process release, it will force to stop all activate sessions with this process. Signed-off-by: James Zhu <James.Zhu@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c | 26 ++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 3 +++ 3 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c index 66670cdb813a..00d8d3f400a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c @@ -274,6 +274,32 @@ static int kfd_pc_sample_destroy(struct kfd_process_device *pdd, uint32_t trace_ return 0; } +void kfd_pc_sample_release(struct kfd_process_device *pdd) { + struct pc_sampling_entry *pcs_entry; + struct idr *idp; + uint32_t id; + + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { + pr_err("PC Sampling does not support sched_policy %i", sched_policy); + return; + }You do not need to check the sched_policy here, already checked in kfd_ioctl_pc_sample(..) , so cannot have a hosttrap session if policy is NO_HWS.
+ + /* force to release all PC sampling task for this process */ + idp = &pdd->dev->pcs_data.hosttrap_entry.base.pc_sampling_idr; + mutex_lock(&pdd->dev->pcs_data.mutex); + idr_for_each_entry(idp, pcs_entry, id) { + if (pcs_entry->pdd != pdd) + continue; + mutex_unlock(&pdd->dev->pcs_data.mutex);Can we not release the mutex here and just tell the worker thread to exit by setting the stop_enable bit. I find we have a lot of places where we are acquiring/releasing the mutex within loops and this results in a lot of extra states that we have to account for during the start/stop/destroy calls.+ if (pcs_entry->enabled) + kfd_pc_sample_stop(pdd, pcs_entry); + kfd_pc_sample_destroy(pdd, id, pcs_entry); + mutex_lock(&pdd->dev->pcs_data.mutex); + } + mutex_unlock(&pdd->dev->pcs_data.mutex); +} + int kfd_pc_sample(struct kfd_process_device *pdd, struct kfd_ioctl_pc_sample_args __user *args) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h index cb93909e6bd3..4ea064fdaa98 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h @@ -30,6 +30,7 @@ int kfd_pc_sample(struct kfd_process_device *pdd, struct kfd_ioctl_pc_sample_args __user *args); +void kfd_pc_sample_release(struct kfd_process_device *pdd); void kfd_pc_sample_handler(struct work_struct *work); #endif /* KFD_PC_SAMPLING_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d22d804f180d..54f3db7eaae2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -43,6 +43,7 @@ struct mm_struct; #include "kfd_svm.h" #include "kfd_smi_events.h" #include "kfd_debug.h" +#include "kfd_pc_sampling.h" /* * List of struct kfd_process (field kfd_process). @@ -1020,6 +1021,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n", pdd->dev->id, p->pasid); + kfd_pc_sample_release(pdd); + kfd_process_device_destroy_cwsr_dgpu(pdd); kfd_process_device_destroy_ib_mem(pdd); -- 2.25.1