[PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme

Alexander.Deucher@xxxxxxx (Deucher, Alexander) · Mon, 27 Mar 2017 15:59:57 +0000



> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:39 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
> 
> 1) Adapt to vulkan:
> Now use double SWITCH BUFFER to replace the 128 nops w/a,
> because when vulkan introduced, umd can insert 7 ~ 16 IBs
> per submit which makes 256 DW size cannot hold the whole
> DMAframe (if we still insert those 128 nops), CP team suggests
> use double SWITCH_BUFFERs, instead of tricky 128 NOPs w/a.
> 
> 2) To fix the CE VM fault issue when MCBP introduced:
> Need one more COND_EXEC wrapping IB part (original one us
> for VM switch part).
> 
> this change can fix vm fault issue caused by below scenario
> without this change:
> 
> >CE passed original COND_EXEC (no MCBP issued this moment),
>  proceed as normal.
> 
> >DE catch up to this COND_EXEC, but this time MCBP issued,
>  thus DE treats all following packages as NOP. The following
>  VM switch packages now looks just as NOP to DE, so DE
>  dosen't do VM flush at all.
> 
> >Now CE proceeds to the first IBc, and triggers VM fault,
>  because DE didn't do VM flush for this DMAframe.
> 
> 3) change estimated alloc size for gfx9.
> with new DMAframe scheme, we need modify emit_frame_size
> for gfx9
> 
> with above changes, no more 128 NOPs w/a after VM flush
> 
> Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>

I haven't really kept up on this whole saga with the DE and CE so assuming there are no regressions on bare metal, patches 12, 13 are:
Acked-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77
> +++++++++++++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 29 ++++++++-----
>  3 files changed, 69 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index d103270..b300929 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
>  		return r;
>  	}
> 
> -	if (ring->funcs->init_cond_exec)
> -		patch_offset = amdgpu_ring_init_cond_exec(ring);
> -
>  	if (vm) {
>  		amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go
> too fast than DE */
> 
> @@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
>  		}
>  	}
> 
> -	if (ring->funcs->emit_hdp_flush
> +	if (ring->funcs->init_cond_exec)
> +		patch_offset = amdgpu_ring_init_cond_exec(ring);
> +
> +		if (ring->funcs->emit_hdp_flush
>  #ifdef CONFIG_X86_64
>  	    && !(adev->flags & AMD_IS_APU)
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9ff445c..74be4fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
> struct amdgpu_job *job)
>  		id->oa_size != job->oa_size);
>  	int r;
> 
> -	if (ring->funcs->emit_pipeline_sync && (
> -	    job->vm_needs_flush || gds_switch_needed ||
> -	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
> -		amdgpu_ring_emit_pipeline_sync(ring);
> +	if (job->vm_needs_flush || gds_switch_needed ||
> +		amdgpu_vm_is_gpu_reset(adev, id) ||
> +		amdgpu_vm_ring_has_compute_vm_bug(ring)) {
> +		unsigned patch_offset = 0;
> 
> -	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
> -	    amdgpu_vm_is_gpu_reset(adev, id))) {
> -		struct fence *fence;
> -		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job-
> >vm_pd_addr);
> +		if (ring->funcs->init_cond_exec)
> +			patch_offset = amdgpu_ring_init_cond_exec(ring);
> 
> -		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
> -		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
> +		if (ring->funcs->emit_pipeline_sync &&
> +			(job->vm_needs_flush || gds_switch_needed ||
> +			amdgpu_vm_ring_has_compute_vm_bug(ring)))
> +			amdgpu_ring_emit_pipeline_sync(ring);
> 
> -		r = amdgpu_fence_emit(ring, &fence);
> -		if (r)
> -			return r;
> +		if (ring->funcs->emit_vm_flush && (job->vm_needs_flush
> ||
> +			amdgpu_vm_is_gpu_reset(adev, id))) {
> +			struct fence *fence;
> +			u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev,
> job->vm_pd_addr);
> 
> -		mutex_lock(&adev->vm_manager.lock);
> -		fence_put(id->last_flush);
> -		id->last_flush = fence;
> -		mutex_unlock(&adev->vm_manager.lock);
> -	}
> +			trace_amdgpu_vm_flush(pd_addr, ring->idx, job-
> >vm_id);
> +			amdgpu_ring_emit_vm_flush(ring, job->vm_id,
> pd_addr);
> 
> -	if (gds_switch_needed) {
> -		id->gds_base = job->gds_base;
> -		id->gds_size = job->gds_size;
> -		id->gws_base = job->gws_base;
> -		id->gws_size = job->gws_size;
> -		id->oa_base = job->oa_base;
> -		id->oa_size = job->oa_size;
> -		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> -					    job->gds_base, job->gds_size,
> -					    job->gws_base, job->gws_size,
> -					    job->oa_base, job->oa_size);
> -	}
> +			r = amdgpu_fence_emit(ring, &fence);
> +			if (r)
> +				return r;
> 
> +			mutex_lock(&adev->vm_manager.lock);
> +			fence_put(id->last_flush);
> +			id->last_flush = fence;
> +			mutex_unlock(&adev->vm_manager.lock);
> +		}
> +
> +		if (gds_switch_needed) {
> +			id->gds_base = job->gds_base;
> +			id->gds_size = job->gds_size;
> +			id->gws_base = job->gws_base;
> +			id->gws_size = job->gws_size;
> +			id->oa_base = job->oa_base;
> +			id->oa_size = job->oa_size;
> +			amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> +							job->gds_base, job-
> >gds_size,
> +							job->gws_base, job-
> >gws_size,
> +							job->oa_base, job-
> >oa_size);
> +		}
> +
> +		if (ring->funcs->patch_cond_exec)
> +			amdgpu_ring_patch_cond_exec(ring, patch_offset);
> +
> +		/* the double SWITCH_BUFFER here *cannot* be skipped by
> COND_EXEC */
> +		if (ring->funcs->emit_switch_buffer) {
> +			amdgpu_ring_emit_switch_buffer(ring);
> +			amdgpu_ring_emit_switch_buffer(ring);
> +		}
> +	}
>  	return 0;
>  }
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ce6fa03..eb8551b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>  		/* sync PFP to ME, otherwise we might get invalid PFP reads
> */
>  		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
>  		amdgpu_ring_write(ring, 0x0);
> -		/* Emits 128 dw nop to prevent CE access VM before
> vm_flush finish */
> -		amdgpu_ring_insert_nop(ring, 128);
>  	}
>  }
> 
> @@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_gfx = {
>  	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
>  	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
>  	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
> -	.emit_frame_size =
> -		20 + /* gfx_v9_0_ring_emit_gds_switch */
> -		7 + /* gfx_v9_0_ring_emit_hdp_flush */
> -		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
> -		8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence,
> vm fence */
> -		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> -		128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
> -		2 + /* gfx_v9_ring_emit_sb */
> -		3, /* gfx_v9_ring_emit_cntxcntl */
> +	.emit_frame_size = /* totally 242 maximum if 16 IBs */
> +		5 +  /* COND_EXEC */
> +		7 +  /* PIPELINE_SYNC */
> +		46 + /* VM_FLUSH */
> +		8 +  /* FENCE for VM_FLUSH */
> +		20 + /* GDS switch */
> +		4 + /* double SWITCH_BUFFER,
> +		       the first COND_EXEC jump to the place just
> +			   prior to this double SWITCH_BUFFER  */
> +		5 + /* COND_EXEC */
> +		7 +	 /*	HDP_flush */
> +		4 +	 /*	VGT_flush */
> +		14 + /*	CE_META */
> +		31 + /*	DE_META */
> +		3 + /* CNTX_CTRL */
> +		5 + /* HDP_INVL */
> +		8 + 8 + /* FENCE x2 */
> +		2, /* SWITCH_BUFFER */
>  	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
>  	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
>  	.emit_fence = gfx_v9_0_ring_emit_fence,
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx