From: Mukul Joshi <mukul.joshi@xxxxxxx> Currently, even if kfd_locked is set, a process is first created and then removed to work around a race condition in updating kfd_locked flag. Rework kfd_locked handling to ensure no processes is created if kfd_locked is set. This is achieved by updating kfd_locked under kfd_processes_mutex. With this there is no need for kfd_locked to be an atomic counter. Instead, it can be a regular integer. Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 ------- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 21 ++++++++++++++++----- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 +++++++- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index cf1a97583901..82749405be79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -146,13 +146,6 @@ static int kfd_open(struct inode *inode, struct file *filep) if (IS_ERR(process)) return PTR_ERR(process); - if (kfd_is_locked()) { - dev_dbg(kfd_device, "kfd is locked!\n" - "process %d unreferenced", process->pasid); - kfd_unref_process(process); - return -EAGAIN; - } - /* filep now owns the reference returned by kfd_create_process */ filep->private_data = process; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1a482f0855d2..7aa4f4ee7752 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -42,7 +42,7 @@ * once locked, kfd driver will stop any further GPU execution. * create process (open) will return -EAGAIN. */ -static atomic_t kfd_locked = ATOMIC_INIT(0); +static int kfd_locked; #ifdef CONFIG_DRM_AMDGPU_CIK extern const struct kfd2kgd_calls gfx_v7_kfd2kgd; @@ -872,7 +872,9 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) return ret; } - atomic_dec(&kfd_locked); + mutex_lock(&kfd_processes_mutex); + --kfd_locked; + mutex_unlock(&kfd_processes_mutex); for (i = 0; i < kfd->num_nodes; i++) { node = kfd->nodes[i]; @@ -885,21 +887,27 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) bool kfd_is_locked(void) { - return (atomic_read(&kfd_locked) > 0); + lockdep_assert_held(&kfd_processes_mutex); + return (kfd_locked > 0); } void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) { struct kfd_node *node; int i; + int count; if (!kfd->init_complete) return; /* for runtime suspend, skip locking kfd */ if (!run_pm) { + mutex_lock(&kfd_processes_mutex); + count = ++kfd_locked; + mutex_unlock(&kfd_processes_mutex); + /* For first KFD device suspend all the KFD processes */ - if (atomic_inc_return(&kfd_locked) == 1) + if (count == 1) kfd_suspend_all_processes(); } @@ -925,7 +933,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) /* for runtime resume, skip unlocking kfd */ if (!run_pm) { - count = atomic_dec_return(&kfd_locked); + mutex_lock(&kfd_processes_mutex); + count = --kfd_locked; + mutex_unlock(&kfd_processes_mutex); + WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); if (count == 0) ret = kfd_resume_all_processes(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 5cfebcc8b305..400b4dcbdf05 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -201,6 +201,8 @@ extern int amdgpu_no_queue_eviction_on_vm_fault; /* Enable eviction debug messages */ extern bool debug_evictions; +extern struct mutex kfd_processes_mutex; + enum cache_policy { cache_policy_coherent, cache_policy_noncoherent diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9e667fa38df1..75cd4eafaa44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -50,7 +50,7 @@ struct mm_struct; * Unique/indexed by mm_struct* */ DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); -static DEFINE_MUTEX(kfd_processes_mutex); +DEFINE_MUTEX(kfd_processes_mutex); DEFINE_SRCU(kfd_processes_srcu); @@ -818,6 +818,12 @@ struct kfd_process *kfd_create_process(struct file *filep) */ mutex_lock(&kfd_processes_mutex); + if (kfd_is_locked()) { + mutex_unlock(&kfd_processes_mutex); + pr_debug("KFD is locked! Cannot create process"); + return ERR_PTR(-EINVAL); + } + /* A prior open of /dev/kfd could have already created the process. */ process = find_process(thread, false); if (process) { -- 2.39.2