[PATCH] drm/amdgpu: fix a kcq hang issue for SRIOV

ckoenig.leichtzumerken@xxxxxxxxx (Christian König) · Tue, 27 Mar 2018 09:48:07 +0200

Am 27.03.2018 um 07:58 schrieb Emily Deng:
> issue:
> the vmflush in KCQ could be preempted (not like GFX ring
> which doesn't allow preemption in ring buffer) and this lead
> to vm flush fail when there is a world switch during
> the vm flush procedure (between write invalidate request
> and query invalidate ack)
>
> fix:
> separate vm flush for gfx and compute ring, and use
> the new format command in compute's vm flush which
> use only one package so no preemption could allowed

NAK, as already discussed multiple times now that only circumvents the 
problem, but not really fixes it.

Just executing the "amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + 
eng, req);" multiple times has the same effect and we need to figure out 
why.

Regards,
Christian.

>
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> Signed-off-by: Emily Deng <Emily.Deng at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 10 +++++++++-
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 18 +++++++++++++-----
>   4 files changed, 25 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index a7e2229..986659f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1790,6 +1790,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
>   #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>   #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
> +#define amdgpu_ring_emit_reg_wait1(r, d0, d1, v, m) (r)->funcs->emit_reg_wait1((r), (d0), (d1), (v), (m))
>   #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
>   #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>   #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 1d0d250..d85df5d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -152,6 +152,8 @@ struct amdgpu_ring_funcs {
>   	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
>   	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
>   			      uint32_t val, uint32_t mask);
> +	void (*emit_reg_wait1)(struct amdgpu_ring *ring, uint32_t reg0,
> +				uint32_t reg1, uint32_t val, uint32_t mask);
>   	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
>   	/* priority functions */
>   	void (*set_priority) (struct amdgpu_ring *ring,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 1ae3de1..509c9d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -4078,6 +4078,13 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v9_0_ring_emit_reg_wait_compute(struct amdgpu_ring *ring,
> +					uint32_t reg0, uint32_t reg1,
> +					uint32_t val, uint32_t mask)
> +{
> +	gfx_v9_0_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, val, mask, 0x20);
> +}
> +
>   static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   						 enum amdgpu_interrupt_state state)
>   {
> @@ -4415,7 +4422,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>   		7 + /* gfx_v9_0_ring_emit_hdp_flush */
>   		5 + /* hdp invalidate */
>   		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> +		(SOC15_FLUSH_GPU_TLB_NUM_WREG - 1) * 5 +
>   		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>   		2 + /* gfx_v9_0_ring_emit_vm_flush */
>   		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
> @@ -4433,6 +4440,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>   	.set_priority = gfx_v9_0_ring_set_priority_compute,
>   	.emit_wreg = gfx_v9_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> +	.emit_reg_wait1 = gfx_v9_0_ring_emit_reg_wait_compute,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index e687363..968447d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -385,11 +385,19 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +    /* The world switch cannot be allowed to occur while
> +       some invalidation controller code is waiting for an ack.
> +       To workaround the hardware restriction, replace the original
> +       two command with one command for compute ring */
> +	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && amdgpu_sriov_vf(adev)) {
> +		amdgpu_ring_emit_reg_wait1(ring, hub->vm_inv_eng0_req + eng,
> +				   hub->vm_inv_eng0_ack + eng, req, 1 << vmid);
> +	} else {
> +		amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> +		/* wait for the invalidate to complete */
> +		amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> +			   1 << vmid, 1 << vmid);
> +	}
>   
>   	return pd_addr;
>   }