> -----Original Message----- > From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf > Of Monk Liu > Sent: Friday, March 24, 2017 6:39 AM > To: amd-gfx at lists.freedesktop.org > Cc: Liu, Monk > Subject: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme > > 1) Adapt to vulkan: > Now use double SWITCH BUFFER to replace the 128 nops w/a, > because when vulkan introduced, umd can insert 7 ~ 16 IBs > per submit which makes 256 DW size cannot hold the whole > DMAframe (if we still insert those 128 nops), CP team suggests > use double SWITCH_BUFFERs, instead of tricky 128 NOPs w/a. > > 2) To fix the CE VM fault issue when MCBP introduced: > Need one more COND_EXEC wrapping IB part (original one us > for VM switch part). > > this change can fix vm fault issue caused by below scenario > without this change: > > >CE passed original COND_EXEC (no MCBP issued this moment), > proceed as normal. > > >DE catch up to this COND_EXEC, but this time MCBP issued, > thus DE treats all following packages as NOP. The following > VM switch packages now looks just as NOP to DE, so DE > dosen't do VM flush at all. > > >Now CE proceeds to the first IBc, and triggers VM fault, > because DE didn't do VM flush for this DMAframe. > > 3) change estimated alloc size for gfx9. > with new DMAframe scheme, we need modify emit_frame_size > for gfx9 > > with above changes, no more 128 NOPs w/a after VM flush > > Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a > Signed-off-by: Monk Liu <Monk.Liu at amd.com> I haven't really kept up on this whole saga with the DE and CE so assuming there are no regressions on bare metal, patches 12, 13 are: Acked-by: Alex Deucher <alexander.deucher at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 8 ++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77 > +++++++++++++++++++++------------- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 29 ++++++++----- > 3 files changed, 69 insertions(+), 45 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index d103270..b300929 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, > unsigned num_ibs, > return r; > } > > - if (ring->funcs->init_cond_exec) > - patch_offset = amdgpu_ring_init_cond_exec(ring); > - > if (vm) { > amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go > too fast than DE */ > > @@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, > unsigned num_ibs, > } > } > > - if (ring->funcs->emit_hdp_flush > + if (ring->funcs->init_cond_exec) > + patch_offset = amdgpu_ring_init_cond_exec(ring); > + > + if (ring->funcs->emit_hdp_flush > #ifdef CONFIG_X86_64 > && !(adev->flags & AMD_IS_APU) > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 9ff445c..74be4fa 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, > struct amdgpu_job *job) > id->oa_size != job->oa_size); > int r; > > - if (ring->funcs->emit_pipeline_sync && ( > - job->vm_needs_flush || gds_switch_needed || > - amdgpu_vm_ring_has_compute_vm_bug(ring))) > - amdgpu_ring_emit_pipeline_sync(ring); > + if (job->vm_needs_flush || gds_switch_needed || > + amdgpu_vm_is_gpu_reset(adev, id) || > + amdgpu_vm_ring_has_compute_vm_bug(ring)) { > + unsigned patch_offset = 0; > > - if (ring->funcs->emit_vm_flush && (job->vm_needs_flush || > - amdgpu_vm_is_gpu_reset(adev, id))) { > - struct fence *fence; > - u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job- > >vm_pd_addr); > + if (ring->funcs->init_cond_exec) > + patch_offset = amdgpu_ring_init_cond_exec(ring); > > - trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id); > - amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); > + if (ring->funcs->emit_pipeline_sync && > + (job->vm_needs_flush || gds_switch_needed || > + amdgpu_vm_ring_has_compute_vm_bug(ring))) > + amdgpu_ring_emit_pipeline_sync(ring); > > - r = amdgpu_fence_emit(ring, &fence); > - if (r) > - return r; > + if (ring->funcs->emit_vm_flush && (job->vm_needs_flush > || > + amdgpu_vm_is_gpu_reset(adev, id))) { > + struct fence *fence; > + u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, > job->vm_pd_addr); > > - mutex_lock(&adev->vm_manager.lock); > - fence_put(id->last_flush); > - id->last_flush = fence; > - mutex_unlock(&adev->vm_manager.lock); > - } > + trace_amdgpu_vm_flush(pd_addr, ring->idx, job- > >vm_id); > + amdgpu_ring_emit_vm_flush(ring, job->vm_id, > pd_addr); > > - if (gds_switch_needed) { > - id->gds_base = job->gds_base; > - id->gds_size = job->gds_size; > - id->gws_base = job->gws_base; > - id->gws_size = job->gws_size; > - id->oa_base = job->oa_base; > - id->oa_size = job->oa_size; > - amdgpu_ring_emit_gds_switch(ring, job->vm_id, > - job->gds_base, job->gds_size, > - job->gws_base, job->gws_size, > - job->oa_base, job->oa_size); > - } > + r = amdgpu_fence_emit(ring, &fence); > + if (r) > + return r; > > + mutex_lock(&adev->vm_manager.lock); > + fence_put(id->last_flush); > + id->last_flush = fence; > + mutex_unlock(&adev->vm_manager.lock); > + } > + > + if (gds_switch_needed) { > + id->gds_base = job->gds_base; > + id->gds_size = job->gds_size; > + id->gws_base = job->gws_base; > + id->gws_size = job->gws_size; > + id->oa_base = job->oa_base; > + id->oa_size = job->oa_size; > + amdgpu_ring_emit_gds_switch(ring, job->vm_id, > + job->gds_base, job- > >gds_size, > + job->gws_base, job- > >gws_size, > + job->oa_base, job- > >oa_size); > + } > + > + if (ring->funcs->patch_cond_exec) > + amdgpu_ring_patch_cond_exec(ring, patch_offset); > + > + /* the double SWITCH_BUFFER here *cannot* be skipped by > COND_EXEC */ > + if (ring->funcs->emit_switch_buffer) { > + amdgpu_ring_emit_switch_buffer(ring); > + amdgpu_ring_emit_switch_buffer(ring); > + } > + } > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index ce6fa03..eb8551b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct > amdgpu_ring *ring, > /* sync PFP to ME, otherwise we might get invalid PFP reads > */ > amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, > 0)); > amdgpu_ring_write(ring, 0x0); > - /* Emits 128 dw nop to prevent CE access VM before > vm_flush finish */ > - amdgpu_ring_insert_nop(ring, 128); > } > } > > @@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs > gfx_v9_0_ring_funcs_gfx = { > .get_rptr = gfx_v9_0_ring_get_rptr_gfx, > .get_wptr = gfx_v9_0_ring_get_wptr_gfx, > .set_wptr = gfx_v9_0_ring_set_wptr_gfx, > - .emit_frame_size = > - 20 + /* gfx_v9_0_ring_emit_gds_switch */ > - 7 + /* gfx_v9_0_ring_emit_hdp_flush */ > - 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ > - 8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence, > vm fence */ > - 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ > - 128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */ > - 2 + /* gfx_v9_ring_emit_sb */ > - 3, /* gfx_v9_ring_emit_cntxcntl */ > + .emit_frame_size = /* totally 242 maximum if 16 IBs */ > + 5 + /* COND_EXEC */ > + 7 + /* PIPELINE_SYNC */ > + 46 + /* VM_FLUSH */ > + 8 + /* FENCE for VM_FLUSH */ > + 20 + /* GDS switch */ > + 4 + /* double SWITCH_BUFFER, > + the first COND_EXEC jump to the place just > + prior to this double SWITCH_BUFFER */ > + 5 + /* COND_EXEC */ > + 7 + /* HDP_flush */ > + 4 + /* VGT_flush */ > + 14 + /* CE_META */ > + 31 + /* DE_META */ > + 3 + /* CNTX_CTRL */ > + 5 + /* HDP_INVL */ > + 8 + 8 + /* FENCE x2 */ > + 2, /* SWITCH_BUFFER */ > .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ > .emit_ib = gfx_v9_0_ring_emit_ib_gfx, > .emit_fence = gfx_v9_0_ring_emit_fence, > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx