If one thread takes read lock, one thread to acquire write lock, then other thread can not acquire read lock while the writer is stalled. This causes below deadlock case: thread 1: prefetch range migrate to VRAM, take mmap read lock thread 2: svm_range_evict_svm_bo_worker, migrate to RAM, take mmap read lock thread 3: svm_range_restore_work, for xnack off case only, take mmap write lock to flush deferred list To avoid deadlock, use write_trylock, go to sleep if lock contention, then trylock again. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index b71d47afd243..1983849c4070 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1608,22 +1608,25 @@ static int svm_range_validate_and_map(struct mm_struct *mm, * @svms: the svm range list * @mm: the mm structure * - * Context: Returns with mmap write lock held, pending deferred work flushed + * Context: Non-atomic context, may sleep * + * Returns with mmap write lock held, pending deferred work flushed */ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm) { -retry_flush_work: - flush_work(&svms->deferred_list_work); - mmap_write_lock(mm); - - if (list_empty(&svms->deferred_range_list)) - return; - mmap_write_unlock(mm); - pr_debug("retry flush\n"); - goto retry_flush_work; + while (true) { + flush_work(&svms->deferred_list_work); + if (!mmap_write_trylock(mm)) { + usleep_range(1000, 2000); + continue; + } + if (list_empty(&svms->deferred_range_list)) + return; + mmap_write_unlock(mm); + pr_debug("retry flush\n"); + } } static void svm_range_restore_work(struct work_struct *work) -- 2.17.1