On 2019-10-28 4:10 p.m., Jason Gunthorpe wrote: > From: Jason Gunthorpe <jgg@xxxxxxxxxxxx> > > find_vma() must be called under the mmap_sem, reorganize this code to > do the vma check after entering the lock. > > Further, fix the unlocked use of struct task_struct's mm, instead use > the mm from hmm_mirror which has an active mm_grab. Also the mm_grab > must be converted to a mm_get before acquiring mmap_sem or calling > find_vma(). > > Fixes: 66c45500bfdc ("drm/amdgpu: use new HMM APIs and helpers") > Fixes: 0919195f2b0d ("drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads") > Cc: Alex Deucher <alexander.deucher@xxxxxxx> > Cc: Christian König <christian.koenig@xxxxxxx> > Cc: David (ChunMing) Zhou <David1.Zhou@xxxxxxx> > Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx> One question inline to confirm my understanding. Otherwise this patch is Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 37 ++++++++++++++----------- > 1 file changed, 21 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index dff41d0a85fe96..c0e41f1f0c2365 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -35,6 +35,7 @@ > #include <linux/hmm.h> > #include <linux/pagemap.h> > #include <linux/sched/task.h> > +#include <linux/sched/mm.h> > #include <linux/seq_file.h> > #include <linux/slab.h> > #include <linux/swap.h> > @@ -788,7 +789,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; > struct ttm_tt *ttm = bo->tbo.ttm; > struct amdgpu_ttm_tt *gtt = (void *)ttm; > - struct mm_struct *mm = gtt->usertask->mm; > + struct mm_struct *mm; > unsigned long start = gtt->userptr; > struct vm_area_struct *vma; > struct hmm_range *range; > @@ -796,25 +797,14 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > uint64_t *pfns; > int r = 0; > > - if (!mm) /* Happens during process shutdown */ > - return -ESRCH; > - > if (unlikely(!mirror)) { > DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); > - r = -EFAULT; > - goto out; > + return -EFAULT; > } > > - vma = find_vma(mm, start); > - if (unlikely(!vma || start < vma->vm_start)) { > - r = -EFAULT; > - goto out; > - } > - if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && > - vma->vm_file)) { > - r = -EPERM; > - goto out; > - } > + mm = mirror->hmm->mmu_notifier.mm; > + if (!mmget_not_zero(mm)) /* Happens during process shutdown */ This works because mirror->hmm->mmu_notifier holds an mmgrab reference to the mm? So the MM will not just go away, but if the mmget refcount is 0, it means the mm is marked for destruction and shouldn't be used any more. > + return -ESRCH; > > range = kzalloc(sizeof(*range), GFP_KERNEL); > if (unlikely(!range)) { > @@ -847,6 +837,17 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); > > down_read(&mm->mmap_sem); > + vma = find_vma(mm, start); > + if (unlikely(!vma || start < vma->vm_start)) { > + r = -EFAULT; > + goto out_unlock; > + } > + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && > + vma->vm_file)) { > + r = -EPERM; > + goto out_unlock; > + } > + > r = hmm_range_fault(range, 0); > up_read(&mm->mmap_sem); > > @@ -865,15 +866,19 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > } > > gtt->range = range; > + mmput(mm); > > return 0; > > +out_unlock: > + up_read(&mm->mmap_sem); > out_free_pfns: > hmm_range_unregister(range); > kvfree(pfns); > out_free_ranges: > kfree(range); > out: > + mmput(mm); > return r; > } >