From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Converting the GFX v10.0 ring helpers to use the variadic amdgpu_ring_write(). Also small cleanups in gfx_v10_0_cp_gfx_start(), gfx_v10_0_ring_emit_ce_meta() and gfx_v10_0_ring_emit_de_meta. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Cc: Christian König <christian.koenig@xxxxxxx> Cc: Sunil Khatri <sunil.khatri@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 399 ++++++++++++----------- 2 files changed, 204 insertions(+), 197 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4f467864ed09..1b428dda706a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -611,7 +611,7 @@ amdgpu_ring_write11(struct amdgpu_ring *ring, NULL)(__VA_ARGS__) static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, - void *src, int count_dw) + const void *src, int count_dw) { unsigned occupied, chunk1, chunk2; u32 buf_mask = ring->buf_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 003522c2d902..63fc94c5d989 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3683,15 +3683,16 @@ static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue /* Cleaner shader MC address */ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | - PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ - amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ - amdgpu_ring_write(kiq_ring, 0); /* oac mask */ - amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ + amdgpu_ring_write(kiq_ring, + PACKET3(PACKET3_SET_RESOURCES, 6), + PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_QUEUE_TYPE(0), /* vmid_mask:0 queue_type:0 (KIQ) */ + lower_32_bits(queue_mask), /* queue mask lo */ + upper_32_bits(queue_mask), /* queue mask hi */ + lower_32_bits(shader_mc_addr), /* cleaner shader addr lo */ + upper_32_bits(shader_mc_addr), /* cleaner shader addr hi */ + 0, /* oac mask */ + 0); /* gds heap base:0, gds heap size:0 */ } static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring, @@ -3715,10 +3716,9 @@ static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring, WARN_ON(1); } - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + amdgpu_ring_write(kiq_ring, + PACKET3(PACKET3_MAP_QUEUES, 5), + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ PACKET3_MAP_QUEUES_QUEUE(ring->queue) | PACKET3_MAP_QUEUES_PIPE(ring->pipe) | @@ -3726,12 +3726,12 @@ static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring, PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | - PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + PACKET3_MAP_QUEUES_NUM_QUEUES(1), /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index), + lower_32_bits(mqd_addr), + upper_32_bits(mqd_addr), + lower_32_bits(wptr_addr), + upper_32_bits(wptr_addr)); } static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, @@ -3741,23 +3741,21 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, { uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3(PACKET3_UNMAP_QUEUES, 4), PACKET3_UNMAP_QUEUES_ACTION(action) | PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | - PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, - PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1) /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); if (action == PREEMPT_QUEUES_NO_UNMAP) { - amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); - amdgpu_ring_write(kiq_ring, seq); + amdgpu_ring_write(kiq_ring, + lower_32_bits(gpu_addr), + upper_32_bits(gpu_addr), + seq); } else { - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_fill(kiq_ring, 0, 3); } } @@ -3768,18 +3766,17 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, { uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); amdgpu_ring_write(kiq_ring, + PACKET3(PACKET3_QUERY_STATUS, 5), PACKET3_QUERY_STATUS_CONTEXT_ID(0) | PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | - PACKET3_QUERY_STATUS_COMMAND(2)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_QUERY_STATUS_COMMAND(2), /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | - PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); - amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); - amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel), + lower_32_bits(addr), + upper_32_bits(addr), + lower_32_bits(seq), + upper_32_bits(seq)); } static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, @@ -3918,12 +3915,13 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, bool wc, uint32_t reg, uint32_t val) { - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | - WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); - amdgpu_ring_write(ring, reg); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, val); + amdgpu_ring_write(ring, + PACKET3(PACKET3_WRITE_DATA, 3), + WRITE_DATA_ENGINE_SEL(eng_sel) | + WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0), + reg, + 0, + val); } static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, @@ -3931,21 +3929,21 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, uint32_t addr1, uint32_t ref, uint32_t mask, uint32_t inv) { - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, + PACKET3(PACKET3_WAIT_REG_MEM, 5), /* memory (1) or register (0) */ (WAIT_REG_MEM_MEM_SPACE(mem_space) | WAIT_REG_MEM_OPERATION(opt) | /* wait */ WAIT_REG_MEM_FUNCTION(3) | /* equal */ - WAIT_REG_MEM_ENGINE(eng_sel))); - - if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ - amdgpu_ring_write(ring, addr0); - amdgpu_ring_write(ring, addr1); - amdgpu_ring_write(ring, ref); - amdgpu_ring_write(ring, mask); - amdgpu_ring_write(ring, inv); /* poll interval */ + WAIT_REG_MEM_ENGINE(eng_sel)), + addr0, + addr1, + ref, + mask, + inv); /* poll interval */ } static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) @@ -3964,10 +3962,11 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) return r; } - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, scratch - - PACKET3_SET_UCONFIG_REG_START); - amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_write(ring, + PACKET3(PACKET3_SET_UCONFIG_REG, 1), + scratch - PACKET3_SET_UCONFIG_REG_START, + 0xDEADBEEF); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { @@ -6239,8 +6238,8 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) struct amdgpu_ring *ring; const struct cs_section_def *sect = NULL; const struct cs_extent_def *ext = NULL; - int r, i; int ctx_reg_offset; + int r; /* init the CP */ WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, @@ -6256,43 +6255,46 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) return r; } - amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + amdgpu_ring_write(ring, + PACKET3(PACKET3_PREAMBLE_CNTL, 0), + PACKET3_PREAMBLE_BEGIN_CLEAR_STATE, - amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - amdgpu_ring_write(ring, 0x80000000); - amdgpu_ring_write(ring, 0x80000000); + PACKET3(PACKET3_CONTEXT_CONTROL, 1), + 0x80000000, + 0x80000000); for (sect = gfx10_cs_data; sect->section != NULL; ++sect) { for (ext = sect->section; ext->extent != NULL; ++ext) { if (sect->id == SECT_CONTEXT) { amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, - ext->reg_count)); - amdgpu_ring_write(ring, ext->reg_index - + ext->reg_count), + ext->reg_index - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - amdgpu_ring_write(ring, ext->extent[i]); + amdgpu_ring_write_multiple(ring, ext->extent, + ext->reg_count); } } } ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); - amdgpu_ring_write(ring, ctx_reg_offset); - amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); - amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); + amdgpu_ring_write(ring, + PACKET3(PACKET3_SET_CONTEXT_REG, 1), + ctx_reg_offset, + adev->gfx.config.pa_sc_tile_steering_override, - amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); - amdgpu_ring_write(ring, 0); + PACKET3(PACKET3_PREAMBLE_CNTL, 0), + PACKET3_PREAMBLE_END_CLEAR_STATE, - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); - amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); - amdgpu_ring_write(ring, 0x8000); - amdgpu_ring_write(ring, 0x8000); + PACKET3(PACKET3_CLEAR_STATE, 0), + 0, + + PACKET3(PACKET3_SET_BASE, 2), + PACKET3_BASE_INDEX(CE_PARTITION_BASE), + 0x8000, + 0x8000); amdgpu_ring_commit(ring); @@ -6306,9 +6308,7 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) return r; } - amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); - amdgpu_ring_write(ring, 0); - + amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0), 0); amdgpu_ring_commit(ring); } return 0; @@ -8564,6 +8564,8 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned int vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + if (ib->flags & AMDGPU_IB_FLAG_CE) header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2); else @@ -8582,15 +8584,14 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); } - amdgpu_ring_write(ring, header); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, + header, #ifdef __BIG_ENDIAN - (2 << 0) | + (2 << 0) | #endif - lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, control); + lower_32_bits(ib->gpu_addr), + upper_32_bits(ib->gpu_addr), + control); } static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, @@ -8601,6 +8602,8 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned int vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + /* Currently, there is a high possibility to get wave ID mismatch * between ME and GDS, leading to a hw deadlock, because ME generates * different wave IDs than the GDS expects. This situation happens @@ -8612,20 +8615,20 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, * GDS to 0 for this ring (me/pipe). */ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); - amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + amdgpu_ring_write(ring, + PACKET3(PACKET3_SET_CONFIG_REG, 1), + mmGDS_COMPUTE_MAX_WAVE_ID, + ring->adev->gds.gds_compute_max_wave_id); } - amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, + PACKET3(PACKET3_INDIRECT_BUFFER, 2), #ifdef __BIG_ENDIAN - (2 << 0) | + (2 << 0) | #endif - lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, control); + lower_32_bits(ib->gpu_addr), + upper_32_bits(ib->gpu_addr), + control); } static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, @@ -8634,18 +8637,6 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; - /* RELEASE_MEM - flush caches, send int */ - amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); - amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | - PACKET3_RELEASE_MEM_GCR_GL2_WB | - PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ - PACKET3_RELEASE_MEM_GCR_GLM_WB | - PACKET3_RELEASE_MEM_CACHE_POLICY(3) | - PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - PACKET3_RELEASE_MEM_EVENT_INDEX(5))); - amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | - PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); - /* * the address should be Qword aligned if 64bit write, Dword * aligned if only send 32bit data low (discard data high) @@ -8654,11 +8645,24 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, BUG_ON(addr & 0x7); else BUG_ON(addr & 0x3); - amdgpu_ring_write(ring, lower_32_bits(addr)); - amdgpu_ring_write(ring, upper_32_bits(addr)); - amdgpu_ring_write(ring, lower_32_bits(seq)); - amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + /* RELEASE_MEM - flush caches, send int */ + PACKET3(PACKET3_RELEASE_MEM, 6), + (PACKET3_RELEASE_MEM_GCR_SEQ | + PACKET3_RELEASE_MEM_GCR_GL2_WB | + PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ + PACKET3_RELEASE_MEM_GCR_GLM_WB | + PACKET3_RELEASE_MEM_CACHE_POLICY(3) | + PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + PACKET3_RELEASE_MEM_EVENT_INDEX(5)), + (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | + PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)), + lower_32_bits(addr), + upper_32_bits(addr), + lower_32_bits(seq), + upper_32_bits(seq), + 0); } static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -8675,8 +8679,8 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, uint16_t pasid, uint32_t flush_type, bool all_hub, uint8_t dst_sel) { - amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); amdgpu_ring_write(ring, + PACKET3(PACKET3_INVALIDATE_TLBS, 0), PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | PACKET3_INVALIDATE_TLBS_PASID(pasid) | @@ -8691,8 +8695,7 @@ static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { /* sync PFP to ME, otherwise we might get invalid PFP reads */ - amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); - amdgpu_ring_write(ring, 0x0); + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0), 0); } } @@ -8705,40 +8708,42 @@ static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); /* write fence seq to the "addr" */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); - amdgpu_ring_write(ring, lower_32_bits(addr)); - amdgpu_ring_write(ring, upper_32_bits(addr)); - amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, + PACKET3(PACKET3_WRITE_DATA, 3), + (WRITE_DATA_ENGINE_SEL(0) | WRITE_DATA_DST_SEL(5) | + WR_CONFIRM), + lower_32_bits(addr), + upper_32_bits(addr), + lower_32_bits(seq)); if (flags & AMDGPU_FENCE_FLAG_INT) { /* set register to trigger INT */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ + amdgpu_ring_write(ring, + PACKET3(PACKET3_WRITE_DATA, 3), + (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0) | + WR_CONFIRM), + SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS), + 0, + 0x20000000); /* src_id is 178 */ } } static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); - amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0), 0); } static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { - uint32_t dw2 = 0; + uint32_t dw2; if (ring->adev->gfx.mcbp) gfx_v10_0_ring_emit_ce_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); - dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + dw2 = 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { /* set load_global_config & load_global_uconfig */ dw2 |= 0x8001; @@ -8758,9 +8763,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, dw2 |= 0x10000000; } - amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - amdgpu_ring_write(ring, dw2); - amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1), dw2, 0); } static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, @@ -8768,11 +8771,12 @@ static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, { unsigned int ret; - amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(addr)); - amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, + PACKET3(PACKET3_COND_EXEC, 3), + lower_32_bits(addr), + upper_32_bits(addr), /* discard following DWs if *cond_exec_gpu_addr==0 */ - amdgpu_ring_write(ring, 0); + 0); ret = ring->wptr & ring->buf_mask; /* patch dummy value later */ amdgpu_ring_write(ring, 0); @@ -8839,22 +8843,21 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) offset = offsetof(struct v10_gfx_meta_data, ce_payload); ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | - WRITE_DATA_DST_SEL(8) | - WR_CONFIRM) | - WRITE_DATA_CACHE_POLICY(0)); - amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); if (resume) - amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, - sizeof(ce_payload) >> 2); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; else - amdgpu_ring_write_multiple(ring, (void *)&ce_payload, - sizeof(ce_payload) >> 2); + ce_payload_cpu_addr = (void *)&ce_payload; + + amdgpu_ring_write(ring, + PACKET3(PACKET3_WRITE_DATA, cnt), + (WRITE_DATA_ENGINE_SEL(2) | WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0), + lower_32_bits(ce_payload_gpu_addr), + upper_32_bits(ce_payload_gpu_addr)); + + amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, + sizeof(ce_payload) >> 2); } static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) @@ -8867,7 +8870,10 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) offset = offsetof(struct v10_gfx_meta_data, de_payload); de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + if (resume) + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + else + de_payload_cpu_addr = (void *)&de_payload; gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + AMDGPU_CSA_SIZE - adev->gds.gds_size, @@ -8877,20 +8883,15 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); cnt = (sizeof(de_payload) >> 2) + 4 - 2; - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | - WRITE_DATA_DST_SEL(8) | - WR_CONFIRM) | - WRITE_DATA_CACHE_POLICY(0)); - amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); + amdgpu_ring_write(ring, + PACKET3(PACKET3_WRITE_DATA, cnt), + (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0), + lower_32_bits(de_payload_gpu_addr), + upper_32_bits(de_payload_gpu_addr)); - if (resume) - amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, - sizeof(de_payload) >> 2); - else - amdgpu_ring_write_multiple(ring, (void *)&de_payload, - sizeof(de_payload) >> 2); + amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, + sizeof(de_payload) >> 2); } static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, @@ -8898,31 +8899,32 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, { uint32_t v = secure ? FRAME_TMZ : 0; - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); + amdgpu_ring_write(ring, + PACKET3(PACKET3_FRAME_CONTROL, 0), + v | FRAME_CMD(start ? 0 : 1)); } static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs) { struct amdgpu_device *adev = ring->adev; + u64 gpu_addr = adev->wb.gpu_addr + reg_val_offs * 4; - amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); - amdgpu_ring_write(ring, 0 | /* src: register*/ - (5 << 8) | /* dst: memory */ - (1 << 20)); /* write confirm */ - amdgpu_ring_write(ring, reg); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + - reg_val_offs * 4)); - amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + - reg_val_offs * 4)); + amdgpu_ring_write(ring, + PACKET3(PACKET3_COPY_DATA, 4), + 0 | /* src: register*/ + (5 << 8) | /* dst: memory */ + (1 << 20), /* write confirm */ + reg, + 0, + lower_32_bits(gpu_addr), + upper_32_bits(gpu_addr)); } static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { - uint32_t cmd = 0; + uint32_t cmd; switch (ring->funcs->type) { case AMDGPU_RING_TYPE_GFX: @@ -8935,11 +8937,13 @@ static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, cmd = WR_CONFIRM; break; } - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, cmd); - amdgpu_ring_write(ring, reg); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, + PACKET3(PACKET3_WRITE_DATA, 3), + cmd, + reg, + 0, + val); } static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, @@ -9416,15 +9420,17 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); - /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ - amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); - amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ - amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ - amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ - amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ - amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ - amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ - amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ + + amdgpu_ring_write(ring, + /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ + PACKET3(PACKET3_ACQUIRE_MEM, 6), + 0, /* CP_COHER_CNTL */ + 0xffffffff, /* CP_COHER_SIZE */ + 0xffffff, /* CP_COHER_SIZE_HI */ + 0, /* CP_COHER_BASE */ + 0, /* CP_COHER_BASE_HI */ + 0x0000000A, /* POLL_INTERVAL */ + gcr_cntl); /* GCR_CNTL */ } static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) @@ -9726,8 +9732,9 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block) static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) { /* Emit the cleaner shader */ - amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); - amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ + amdgpu_ring_write(ring, + PACKET3(PACKET3_RUN_CLEANER_SHADER, 0), + 0); /* RESERVED field, programmed to zero */ } static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { -- 2.47.1