On 2017-10-28 10:44 AM, Christian König wrote: > Am 28.10.2017 um 01:35 schrieb Felix Kuehling: >> The kfd_process doesn't own a reference to the mm_struct, so it can >> disappear without warning even while the kfd_process still exists. >> >> Therefore, avoid dereferencing the kfd_process.mm pointer and make >> it opaque. Use get_task_mm to get a temporary reference to the mm >> when it's needed. >> >> v2: removed unnecessary WARN_ON >> >> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> >> Reviewed-by: Oded Gabbay <oded.gabbay at gmail.com> (v1) >> --- >>  drivers/gpu/drm/amd/amdkfd/kfd_events.c | 19 +++++++++++++++---- >>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 7 ++++++- >>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 - >>  3 files changed, 21 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c >> index 944abfa..61ce547 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c >> @@ -24,8 +24,8 @@ >>  #include <linux/slab.h> >>  #include <linux/types.h> >>  #include <linux/sched/signal.h> >> +#include <linux/sched/mm.h> >>  #include <linux/uaccess.h> >> -#include <linux/mm.h> >>  #include <linux/mman.h> >>  #include <linux/memory.h> >>  #include "kfd_priv.h" >> @@ -904,14 +904,24 @@ void kfd_signal_iommu_event(struct kfd_dev >> *dev, unsigned int pasid, >>       * running so the lookup function returns a locked process. >>       */ >>      struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); >> +   struct mm_struct *mm; >>       if (!p) >>          return; /* Presumably process exited. */ >>  +   /* Take a safe reference to the mm_struct, which may otherwise >> +    * disappear even while the kfd_process is still referenced. >> +    */ >> +   mm = get_task_mm(p->lead_thread); >> +   if (!mm) { >> +       mutex_unlock(&p->mutex); >> +       return; /* Process is exiting */ >> +   } >> + >>      memset(&memory_exception_data, 0, sizeof(memory_exception_data)); >>  -   down_read(&p->mm->mmap_sem); >> -   vma = find_vma(p->mm, address); >> +   down_read(&mm->mmap_sem); >> +   vma = find_vma(mm, address); >>       memory_exception_data.gpu_id = dev->id; >>      memory_exception_data.va = address; >> @@ -937,7 +947,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, >> unsigned int pasid, >>          } >>      } >>  -   up_read(&p->mm->mmap_sem); >> +   up_read(&mm->mmap_sem); >> +   mmput(mm); >>       mutex_lock(&p->event_mutex); >>  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> index 7d86ec9..1a483a7 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> @@ -494,7 +494,12 @@ struct kfd_process { >>       */ >>      struct hlist_node kfd_processes; >>  -   struct mm_struct *mm; >> +   /* >> +    * Opaque pointer to mm_struct. We don't hold a reference to >> +    * it so it should never be dereferenced from here. This is >> +    * only used for looking up processes by their mm. >> +    */ >> +   void *mm; > > Using a void* here still doesn't look like the right thing to do. For added context, this is how it's used: static struct kfd_process *create_process(const struct task_struct *thread) { ... process->mm = thread->mm; ... hash_add_rcu(kfd_processes_table, &process->kfd_processes, (uintptr_t)process->mm); ... } static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) { struct kfd_process *process; hash_for_each_possible_rcu(kfd_processes_table, process, kfd_processes, (uintptr_t)mm) if (process->mm == mm) return process; return NULL; } > >>       struct mutex mutex; >>  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> index 3ccb3b5..695fa2a 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> @@ -200,7 +200,6 @@ static void kfd_process_destroy_delayed(struct >> rcu_head *rcu) >>      struct kfd_process *p; >>       p = container_of(rcu, struct kfd_process, rcu); >> -   WARN_ON(atomic_read(&p->mm->mm_count) <= 0); >>       mmdrop(p->mm); > > If you are concerned that p->mm is used after this just set it to NULL. I'm not concerned about it. Regards,  Felix > > Regards, > Christian. > >>  > >