Re: [PATCH] drm/amdgpu: handle userptr corner cases with HMM path

"Yang, Philip" <Philip.Yang@xxxxxxx> · Mon, 4 Mar 2019 16:48:48 +0000

Hi Felix,

I will split this and submit two patches for review.

I didn't realize mprotect can have userptr with more than 2 vmas. For 
malloc, it will always be one or two vmas. I will loop over all VMAs to 
call hmm_vma_fault to get userptr pages.

Thanks,
Philip

On 2019-03-01 7:46 p.m., Kuehling, Felix wrote:
> Since you're addressing two distinct bugs, please split this into two patches.
> 
> For the multiple VMAs, should we generalize that to handle any number of VMAs? It's not a typical case, but you could easily construct situations with mprotect where different parts of the same buffer have different VMAs and then register that as a single user pointer. Or you could user MAP_FIXED to map multiple files to adjacent virtual addresses.
> 
> There may be two ways to handle this:
> 1. If the userptr address range spans more than one VMA, fail
> 2. Loop over all the VMAs in the address range
> 
> Thanks,
>    Felix
> 
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Yang, Philip
> Sent: Friday, March 01, 2019 12:30 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Yang, Philip <Philip.Yang@xxxxxxx>
> Subject: [PATCH] drm/amdgpu: handle userptr corner cases with HMM path
> 
> Those corner cases are found by kfdtest.KFDIPCTest.
> 
> userptr may cross two vmas if the forked child process (not call exec
> after fork) malloc buffer, then free it, and then malloc larger size
> buf, kerenl will create new vma adjacent to old vma which was cloned
> from parent process, some pages of userptr are in the first vma, the
> rest pages are in the second vma. HMM expects range only have one vma,
> we have to use two ranges to handle this case. See is_mergeable_anon_vma
> in mm/mmap.c for details.
> 
> kfd userptr restore may have concurrent userptr invalidation, reschedule
> to restore and then needs call hmm_vma_range_done to remove range from
> hmm->ranges list, otherwise hmm_vma_fault add same range to the list
> will cause loop in the list because range->next point to range itself.
> 
> Change-Id: I641ba7406c32bd8b7ae715f52bd896d53fe56801
> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 73 +++++++++++++------
>   2 files changed, 71 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index f8104760f1e6..179af9d3ab19 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1738,6 +1738,23 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>   	return 0;
>   }
>   
> +/* Untrack invalid userptr BOs
> + *
> + * Stop HMM track the userptr update
> + */
> +static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info)
> +{
> +	struct kgd_mem *mem, *tmp_mem;
> +	struct amdgpu_bo *bo;
> +
> +	list_for_each_entry_safe(mem, tmp_mem,
> +				 &process_info->userptr_inval_list,
> +				 validate_list.head) {
> +		bo = mem->bo;
> +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> +	}
> +}
> +
>   /* Validate invalid userptr BOs
>    *
>    * Validates BOs on the userptr_inval_list, and moves them back to the
> @@ -1855,12 +1872,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>   out_free:
>   	kfree(pd_bo_list_entries);
>   out_no_mem:
> -	list_for_each_entry_safe(mem, tmp_mem,
> -				 &process_info->userptr_inval_list,
> -				 validate_list.head) {
> -		bo = mem->bo;
> -		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> -	}
> +	untrack_invalid_user_pages(process_info);
>   
>   	return ret;
>   }
> @@ -1904,8 +1916,10 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
>   	 * and we can just restart the queues.
>   	 */
>   	if (!list_empty(&process_info->userptr_inval_list)) {
> -		if (atomic_read(&process_info->evicted_bos) != evicted_bos)
> +		if (atomic_read(&process_info->evicted_bos) != evicted_bos) {
> +			untrack_invalid_user_pages(process_info);
>   			goto unlock_out; /* Concurrent eviction, try again */
> +		}
>   
>   		if (validate_invalid_user_pages(process_info))
>   			goto unlock_out;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index cd0ccfbbcb84..e5736225f513 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -711,7 +711,7 @@ struct amdgpu_ttm_tt {
>   	struct task_struct	*usertask;
>   	uint32_t		userflags;
>   #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
> -	struct hmm_range	range;
> +	struct hmm_range	range, range2;
>   #endif
>   };
>   
> @@ -727,58 +727,81 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>   {
>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>   	struct mm_struct *mm = gtt->usertask->mm;
> -	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> +	unsigned long start = gtt->userptr;
> +	unsigned long end = start + ttm->num_pages * PAGE_SIZE;
>   	struct hmm_range *range = &gtt->range;
> +	struct hmm_range *range2 = &gtt->range2;
> +	struct vm_area_struct *vma, *vma_next = NULL;
> +	uint64_t *pfns, f;
>   	int r = 0, i;
>   
>   	if (!mm) /* Happens during process shutdown */
>   		return -ESRCH;
>   
> -	amdgpu_hmm_init_range(range);
> -
>   	down_read(&mm->mmap_sem);
>   
> -	range->vma = find_vma(mm, gtt->userptr);
> -	if (!range_in_vma(range->vma, gtt->userptr, end))
> -		r = -EFAULT;
> -	else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> -		range->vma->vm_file)
> +	/* user pages may cross vma bound */
> +	vma = find_vma(mm, start);
> +	if (unlikely(!range_in_vma(vma, start, end))) {
> +		vma_next = find_extend_vma(mm, end);
> +		if (unlikely(!vma_next)) {
> +			r = -EFAULT;
> +			goto out;
> +		}
> +		amdgpu_hmm_init_range(range2);
> +	}
> +	amdgpu_hmm_init_range(range);
> +
> +	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> +		vma->vm_file)) {
>   		r = -EPERM;
> -	if (r)
>   		goto out;
> +	}
>   
> -	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> -				     GFP_KERNEL);
> -	if (range->pfns == NULL) {
> +	pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), GFP_KERNEL);
> +	if (unlikely(!pfns)) {
>   		r = -ENOMEM;
>   		goto out;
>   	}
> -	range->start = gtt->userptr;
> -	range->end = end;
>   
> -	range->pfns[0] = range->flags[HMM_PFN_VALID];
> -	range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> -				0 : range->flags[HMM_PFN_WRITE];
> -	for (i = 1; i < ttm->num_pages; i++)
> -		range->pfns[i] = range->pfns[0];
> +	f = range->flags[HMM_PFN_VALID];
> +	f |= amdgpu_ttm_tt_is_readonly(ttm) ? 0 : range->flags[HMM_PFN_WRITE];
> +	memset64(pfns, f, ttm->num_pages);
>   
> +	range->pfns = pfns;
> +	range->vma = vma;
> +	range->start = start;
> +	range->end = min(end, vma->vm_end);
>   	/* This may trigger page table update */
>   	r = hmm_vma_fault(range, true);
> -	if (r)
> +	if (unlikely(r))
>   		goto out_free_pfns;
>   
> +	if (unlikely(vma_next)) {
> +		range2->pfns = pfns + (range->end - range->start) / PAGE_SIZE;
> +		range2->vma = vma_next;
> +		range2->start = range->end;
> +		range2->end = end;
> +		r = hmm_vma_fault(range2, true);
> +		if (unlikely(r))
> +			goto out_free_pfns;
> +	}
> +
>   	up_read(&mm->mmap_sem);
>   
>   	for (i = 0; i < ttm->num_pages; i++)
> -		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
> +		pages[i] = hmm_pfn_to_page(range, pfns[i]);
>   
>   	return 0;
>   
>   out_free_pfns:
> -	kvfree(range->pfns);
> +	kvfree(pfns);
>   	range->pfns = NULL;
> +	if (unlikely(vma_next))
> +		range2->pfns = NULL;
>   out:
>   	up_read(&mm->mmap_sem);
> +
>   	return r;
>   }
>   
> @@ -802,6 +825,10 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
>   		kvfree(gtt->range.pfns);
>   		gtt->range.pfns = NULL;
>   	}
> +	if (gtt->range2.pfns) {
> +		r = hmm_vma_range_done(&gtt->range2);
> +		gtt->range2.pfns = NULL;
> +	}
>   
>   	return r;
>   }
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx