From: Marek Olšák <marek.olsak@xxxxxxx> I'm not increasing the DRM version because GDS isn't totally without bugs yet. Signed-off-by: Marek Olšák <marek.olsak@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 17 ++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 ++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 36 +++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 5 ++++ 5 files changed, 77 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index ecbcefe49a98..f89f5734d985 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -30,20 +30,22 @@ struct amdgpu_bo; struct amdgpu_gds_asic_info { uint32_t total_size; uint32_t gfx_partition_size; uint32_t cs_partition_size; }; struct amdgpu_gds { struct amdgpu_gds_asic_info mem; struct amdgpu_gds_asic_info gws; struct amdgpu_gds_asic_info oa; + uint32_t gds_compute_max_wave_id; + /* At present, GDS, GWS and OA resources for gfx (graphics) * is always pre-allocated and available for graphics operation. * Such resource is shared between all gfx clients. * TODO: move this operation to user space * */ struct amdgpu_bo* gds_gfx_bo; struct amdgpu_bo* gws_gfx_bo; struct amdgpu_bo* oa_gfx_bo; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 7984292f9282..d971ea914755 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2257,20 +2257,36 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | #endif (ib->gpu_addr & 0xFFFFFFFC)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); amdgpu_ring_write(ring, control); } @@ -5050,20 +5066,21 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs; } static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; + adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); if (adev->gds.mem.total_size == 64 * 1024) { adev->gds.mem.gfx_partition_size = 4096; adev->gds.mem.cs_partition_size = 4096; adev->gds.gws.gfx_partition_size = 4; adev->gds.gws.cs_partition_size = 4; adev->gds.oa.gfx_partition_size = 4; adev->gds.oa.cs_partition_size = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a26747681ed6..dcdae74fc0e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6077,20 +6077,36 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | #endif (ib->gpu_addr & 0xFFFFFFFC)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); amdgpu_ring_write(ring, control); } @@ -6989,20 +7005,21 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) { adev->gfx.rlc.funcs = &iceland_rlc_funcs; } static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; + adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); if (adev->gds.mem.total_size == 64 * 1024) { adev->gds.mem.gfx_partition_size = 4096; adev->gds.mem.cs_partition_size = 4096; adev->gds.gws.gfx_partition_size = 4; adev->gds.gws.cs_partition_size = 4; adev->gds.oa.gfx_partition_size = 4; adev->gds.oa.cs_partition_size = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 262ee3cf6f1c..63b898fc0467 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4003,20 +4003,36 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | #endif lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, control); } @@ -4839,20 +4855,40 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) adev->gds.mem.total_size = 0x10000; break; case CHIP_RAVEN: adev->gds.mem.total_size = 0x1000; break; default: adev->gds.mem.total_size = 0x10000; break; } + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA20: + adev->gds.gds_compute_max_wave_id = 0x7ff; + break; + case CHIP_VEGA12: + adev->gds.gds_compute_max_wave_id = 0x27f; + break; + case CHIP_RAVEN: + if (adev->rev_id >= 0x8) + adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ + else + adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ + break; + default: + /* this really depends on the chip */ + adev->gds.gds_compute_max_wave_id = 0x7ff; + break; + } + adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; if (adev->gds.mem.total_size == 64 * 1024) { adev->gds.mem.gfx_partition_size = 4096; adev->gds.mem.cs_partition_size = 4096; adev->gds.gws.gfx_partition_size = 4; adev->gds.gws.cs_partition_size = 4; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index faaad04814e4..662d379ea624 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -561,20 +561,25 @@ union drm_amdgpu_cs { /* Preamble flag, which means the IB could be dropped if no context switch */ #define AMDGPU_IB_FLAG_PREAMBLE (1<<1) /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ #define AMDGPU_IB_FLAG_PREEMPT (1<<2) /* The IB fence should do the L2 writeback but not invalidate any shader * caches (L2/vL1/sL1/I$). */ #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. + * This will reset wave ID counters for the IB. + */ +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ __u32 flags; /** Virtual address to begin IB execution */ __u64 va_start; /** Size of submission */ __u32 ib_bytes; /** HW IP to submit to */ __u32 ip_type; -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx