Inlined: On 2022-10-10 02:07, jiadong.zhu@xxxxxxx wrote: > From: "Jiadong.Zhu" <Jiadong.Zhu@xxxxxxx> > > Set ring functions with software ring callbacks on gfx9. > > The software ring could be tested by debugfs_test_ib case. > > v2: Set sw_ring 2 to enable software ring by default. > v3: Remove the parameter for software ring enablement. > v4: Use amdgpu_ring_init/fini for software rings. > v5: Update for code format. Fix conflict. > v6: Remove unnecessary checks and enable software ring on gfx9 by default. > v7: Use static array for software ring names and priorities. > v8: Stop creating software rings if no gfx ring existed. > > Acked-by: Luben Tuikov <luben.tuikov@xxxxxxx> Tags appear in chronological order, and thus Acked-by and Reviewed-by tags appear after your Signed-off-by tag. Please move the Acked-by tag after your Signed-off-by and thenafter add any subsequent/new tags below it. I don't see any glaring problems with this patch, but it should be heavily tested. Regards, Luben > Cc: Christian Koenig <Christian.Koenig@xxxxxxx> > Cc: Luben Tuikov <Luben.Tuikov@xxxxxxx> > Cc: Andrey Grodzovsky <Andrey.Grodzovsky@xxxxxxx> > Cc: Michel Dänzer <michel@xxxxxxxxxxx> > Cc: Likun Gao <Likun.Gao@xxxxxxx> > Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 20 ++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h | 2 + > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 113 ++++++++++++++++++- > 5 files changed, 136 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index 9996dadb39f7..4fdfc3ec134a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -348,6 +348,7 @@ struct amdgpu_gfx { > > bool is_poweron; > > + struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; > struct amdgpu_ring_mux muxer; > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 40b1277b4f0c..f08ee1ac281c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -39,6 +39,7 @@ struct amdgpu_vm; > #define AMDGPU_MAX_RINGS 28 > #define AMDGPU_MAX_HWIP_RINGS 8 > #define AMDGPU_MAX_GFX_RINGS 2 > +#define AMDGPU_MAX_SW_GFX_RINGS 2 > #define AMDGPU_MAX_COMPUTE_RINGS 8 > #define AMDGPU_MAX_VCE_RINGS 3 > #define AMDGPU_MAX_UVD_ENC_RINGS 2 > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c > index 43cab8a37754..2e64ffccc030 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c > @@ -29,6 +29,14 @@ > > #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2) > > +static const struct ring_info { > + unsigned int hw_pio; > + const char *ring_name; > +} sw_ring_info[] = { > + { AMDGPU_RING_PRIO_DEFAULT, "gfx_low"}, > + { AMDGPU_RING_PRIO_2, "gfx_high"}, > +}; > + > int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, > unsigned int entry_size) > { > @@ -215,3 +223,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) > { > WARN_ON(!ring->is_sw_ring); > } > + > +const char *amdgpu_sw_ring_name(int idx) > +{ > + return idx < ARRAY_SIZE(sw_ring_info) ? > + sw_ring_info[idx].ring_name : NULL; > +} > + > +unsigned int amdgpu_sw_ring_priority(int idx) > +{ > + return idx < ARRAY_SIZE(sw_ring_info) ? > + sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT; > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h > index d91629589577..28399f4b0e5c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h > @@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); > void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); > void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); > > +const char *amdgpu_sw_ring_name(int idx); > +unsigned int amdgpu_sw_ring_priority(int idx); > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 6b609f33261f..8d4fbc9e3fc0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -47,6 +47,7 @@ > > #include "amdgpu_ras.h" > > +#include "amdgpu_ring_mux.h" > #include "gfx_v9_4.h" > #include "gfx_v9_0.h" > #include "gfx_v9_4_2.h" > @@ -56,6 +57,7 @@ > #include "asic_reg/gc/gc_9_0_default.h" > > #define GFX9_NUM_GFX_RINGS 1 > +#define GFX9_NUM_SW_GFX_RINGS 2 > #define GFX9_MEC_HPD_SIZE 4096 > #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L > #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L > @@ -2273,6 +2275,7 @@ static int gfx_v9_0_sw_init(void *handle) > struct amdgpu_ring *ring; > struct amdgpu_kiq *kiq; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > + unsigned int hw_prio; > > switch (adev->ip_versions[GC_HWIP][0]) { > case IP_VERSION(9, 0, 1): > @@ -2356,6 +2359,9 @@ static int gfx_v9_0_sw_init(void *handle) > sprintf(ring->name, "gfx_%d", i); > ring->use_doorbell = true; > ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; > + > + /* disable scheduler on the real ring */ > + ring->no_scheduler = true; > r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, > AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, > AMDGPU_RING_PRIO_DEFAULT, NULL); > @@ -2363,6 +2369,41 @@ static int gfx_v9_0_sw_init(void *handle) > return r; > } > > + /* set up the software rings */ > + if (adev->gfx.num_gfx_rings) { > + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { > + ring = &adev->gfx.sw_gfx_ring[i]; > + ring->ring_obj = NULL; > + sprintf(ring->name, amdgpu_sw_ring_name(i)); > + ring->use_doorbell = true; > + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; > + ring->is_sw_ring = true; > + hw_prio = amdgpu_sw_ring_priority(i); > + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, > + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, > + NULL); > + if (r) > + return r; > + ring->wptr = 0; > + } > + > + /* init the muxer and add software rings */ > + r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], > + GFX9_NUM_SW_GFX_RINGS); > + if (r) { > + DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); > + return r; > + } > + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { > + r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, > + &adev->gfx.sw_gfx_ring[i]); > + if (r) { > + DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); > + return r; > + } > + } > + } > + > /* set up the compute queues - allocate horizontally across pipes */ > ring_id = 0; > for (i = 0; i < adev->gfx.mec.num_mec; ++i) { > @@ -2413,6 +2454,12 @@ static int gfx_v9_0_sw_fini(void *handle) > int i; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > + if (adev->gfx.num_gfx_rings) { > + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) > + amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); > + amdgpu_ring_mux_fini(&adev->gfx.muxer); > + } > + > for (i = 0; i < adev->gfx.num_gfx_rings; i++) > amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); > for (i = 0; i < adev->gfx.num_compute_rings; i++) > @@ -5877,7 +5924,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, > > switch (me_id) { > case 0: > - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); > + /* Fence signals are handled on the software rings*/ > + if (adev->gfx.num_gfx_rings) { > + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) > + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); > + } > break; > case 1: > case 2: > @@ -6882,6 +6933,61 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { > .emit_mem_sync = gfx_v9_0_emit_mem_sync, > }; > > +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { > + .type = AMDGPU_RING_TYPE_GFX, > + .align_mask = 0xff, > + .nop = PACKET3(PACKET3_NOP, 0x3FFF), > + .support_64bit_ptrs = true, > + .secure_submission_supported = true, > + .vmhub = AMDGPU_GFXHUB_0, > + .get_rptr = amdgpu_sw_ring_get_rptr_gfx, > + .get_wptr = amdgpu_sw_ring_get_wptr_gfx, > + .set_wptr = amdgpu_sw_ring_set_wptr_gfx, > + .emit_frame_size = /* totally 242 maximum if 16 IBs */ > + 5 + /* COND_EXEC */ > + 7 + /* PIPELINE_SYNC */ > + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + > + 2 + /* VM_FLUSH */ > + 8 + /* FENCE for VM_FLUSH */ > + 20 + /* GDS switch */ > + 4 + /* double SWITCH_BUFFER, > + * the first COND_EXEC jump to the place just > + * prior to this double SWITCH_BUFFER > + */ > + 5 + /* COND_EXEC */ > + 7 + /* HDP_flush */ > + 4 + /* VGT_flush */ > + 14 + /* CE_META */ > + 31 + /* DE_META */ > + 3 + /* CNTX_CTRL */ > + 5 + /* HDP_INVL */ > + 8 + 8 + /* FENCE x2 */ > + 2 + /* SWITCH_BUFFER */ > + 7, /* gfx_v9_0_emit_mem_sync */ > + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ > + .emit_ib = gfx_v9_0_ring_emit_ib_gfx, > + .emit_fence = gfx_v9_0_ring_emit_fence, > + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, > + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, > + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, > + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, > + .test_ring = gfx_v9_0_ring_test_ring, > + .test_ib = gfx_v9_0_ring_test_ib, > + .insert_nop = amdgpu_sw_ring_insert_nop, > + .pad_ib = amdgpu_ring_generic_pad_ib, > + .emit_switch_buffer = gfx_v9_ring_emit_sb, > + .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, > + .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, > + .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, > + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, > + .emit_wreg = gfx_v9_0_ring_emit_wreg, > + .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, > + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, > + .soft_recovery = gfx_v9_0_ring_soft_recovery, > + .emit_mem_sync = gfx_v9_0_emit_mem_sync, > +}; > + > static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { > .type = AMDGPU_RING_TYPE_COMPUTE, > .align_mask = 0xff, > @@ -6959,6 +7065,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) > for (i = 0; i < adev->gfx.num_gfx_rings; i++) > adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; > > + if (adev->gfx.num_gfx_rings) { > + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) > + adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; > + } > + > for (i = 0; i < adev->gfx.num_compute_rings; i++) > adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; > }