From: "Jiadong.Zhu" <Jiadong.Zhu@xxxxxxx> Set ring functions with software ring callbacks on gfx9. The software ring could be tested by debugfs_test_ib case. v2: Set sw_ring 2 to enable software ring by default. v3: Remove the parameter for software ring enablement. v4: Use amdgpu_ring_init/fini for software rings. v5: Update for code format. Fix conflict. Cc: Christian Koenig <Christian.Koenig@xxxxxxx> Cc: Luben Tuikov <Luben.Tuikov@xxxxxxx> Cc: Andrey Grodzovsky <Andrey.Grodzovsky@xxxxxxx> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 117 +++++++++++++++++++++-- 5 files changed, 120 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 96d058c4cd4b..525df0b4d55f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -207,6 +207,7 @@ extern bool amdgpu_ignore_bad_page_threshold; extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer; extern int amdgpu_async_gfx_ring; extern int amdgpu_mcbp; +extern int amdgpu_sw_ring; extern int amdgpu_discovery; extern int amdgpu_mes; extern int amdgpu_mes_kiq; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 9996dadb39f7..93b25d9a87f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -348,6 +348,8 @@ struct amdgpu_gfx { bool is_poweron; + /* software ring */ + unsigned num_sw_gfx_rings; struct amdgpu_ring_mux muxer; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 13db99d653bd..4eaf3bd332f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -33,6 +33,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" +#include "amdgpu_sw_ring.h" #include "atom.h" /* @@ -121,6 +122,11 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) { uint32_t count; + if (ring->is_sw_ring) { + amdgpu_sw_ring_commit(ring); + return; + } + /* We pad to match fetch size */ count = ring->funcs->align_mask + 1 - (ring->wptr & ring->funcs->align_mask); @@ -343,7 +349,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ void amdgpu_ring_fini(struct amdgpu_ring *ring) { - /* Not to finish a ring which is not initialized */ if (!(ring->adev) || (!ring->is_mes_queue && !(ring->adev->rings[ring->idx]))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 40b1277b4f0c..275b885363c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -38,7 +38,8 @@ struct amdgpu_vm; /* max number of rings */ #define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_HWIP_RINGS 8 -#define AMDGPU_MAX_GFX_RINGS 2 +/*2 software ring and 1 real ring*/ +#define AMDGPU_MAX_GFX_RINGS 3 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 #define AMDGPU_MAX_UVD_ENC_RINGS 2 diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5349ca4d19e3..e85565b0e52a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -47,6 +47,7 @@ #include "amdgpu_ras.h" +#include "amdgpu_sw_ring.h" #include "gfx_v9_4.h" #include "gfx_v9_0.h" #include "gfx_v9_4_2.h" @@ -55,7 +56,8 @@ #include "asic_reg/pwr/pwr_10_0_sh_mask.h" #include "asic_reg/gc/gc_9_0_default.h" -#define GFX9_NUM_GFX_RINGS 1 +#define GFX9_NUM_GFX_RINGS 3 +#define GFX9_NUM_SW_GFX_RINGS 2 #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L @@ -2270,6 +2272,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_0_sw_init(void *handle) { int i, j, k, r, ring_id; + unsigned int hw_prio; struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2356,13 +2359,41 @@ static int gfx_v9_0_sw_init(void *handle) sprintf(ring->name, "gfx_%d", i); ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + ring->is_sw_ring = (adev->gfx.num_sw_gfx_rings > 1) && (i > 0); + + if (adev->gfx.num_sw_gfx_rings > 1 && i == 2) + hw_prio = AMDGPU_RING_PRIO_2; + else + hw_prio = AMDGPU_RING_PRIO_DEFAULT; + if (adev->gfx.num_sw_gfx_rings > 0 && i == 0) + ring->no_scheduler = true; + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, - AMDGPU_RING_PRIO_DEFAULT, NULL); + hw_prio, NULL); if (r) return r; + + if (ring->is_sw_ring) + ring->wptr = 0; } + /*init the muxer and add sw rings */ + if (adev->gfx.num_sw_gfx_rings > 0) { + r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], + adev->gfx.num_sw_gfx_rings); + if (r) { + DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); + return r; + } + for (i = 1; i < adev->gfx.num_gfx_rings; i++) { + r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, &adev->gfx.gfx_ring[i]); + if (r) { + DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); + return r; + } + } + } /* set up the compute queues - allocate horizontally across pipes */ ring_id = 0; for (i = 0; i < adev->gfx.mec.num_mec; ++i) { @@ -2413,6 +2444,9 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->gfx.num_sw_gfx_rings > 0) + amdgpu_ring_mux_fini(&adev->gfx.muxer); + for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -4709,8 +4743,9 @@ static int gfx_v9_0_early_init(void *handle) if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) adev->gfx.num_gfx_rings = 0; - else - adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + + adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + adev->gfx.num_sw_gfx_rings = GFX9_NUM_SW_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_0_set_kiq_pm4_funcs(adev); @@ -5877,7 +5912,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, switch (me_id) { case 0: - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + if (adev->gfx.num_sw_gfx_rings > 1) { + for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++) + amdgpu_fence_process(&adev->gfx.gfx_ring[i]); + } else + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); break; case 1: case 2: @@ -6882,6 +6921,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_mem_sync = gfx_v9_0_emit_mem_sync, }; + +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { + .type = AMDGPU_RING_TYPE_GFX, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .secure_submission_supported = true, + .vmhub = AMDGPU_GFXHUB_0, + .get_rptr = amdgpu_sw_ring_get_rptr_gfx, + .get_wptr = amdgpu_sw_ring_get_wptr_gfx, + .set_wptr = amdgpu_sw_ring_set_wptr_gfx, + .emit_frame_size = /* totally 242 maximum if 16 IBs */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, + * the first COND_EXEC jump to the place just + * prior to this double SWITCH_BUFFER + */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 14 + /* CE_META */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 2 + /* SWITCH_BUFFER */ + 7, /* gfx_v9_0_emit_mem_sync */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ + .emit_ib = gfx_v9_0_ring_emit_ib_gfx, + .emit_fence = gfx_v9_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, + .test_ring = gfx_v9_0_ring_test_ring, + .test_ib = gfx_v9_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_switch_buffer = gfx_v9_ring_emit_sb, + .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, + .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, + .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, + .emit_wreg = gfx_v9_0_ring_emit_wreg, + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_0_ring_soft_recovery, + .emit_mem_sync = gfx_v9_0_emit_mem_sync, +}; + static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .type = AMDGPU_RING_TYPE_COMPUTE, .align_mask = 0xff, @@ -6956,9 +7051,15 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; - + if (adev->gfx.num_sw_gfx_rings > 0) { + //first one is the real ring + adev->gfx.gfx_ring[0].funcs = &gfx_v9_0_ring_funcs_gfx; + for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++) + adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; + } else { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; + } for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; } -- 2.25.1