From: "Jiadong.Zhu" <Jiadong.Zhu@xxxxxxx>
Set ring functions with software ring callbacks
on gfx9.
The software ring could be tested by debugfs_test_ib
case.
v2: set sw_ring 2 to enable software ring by default.
v3: remove the parameter for software ring enablement.
Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 16 +++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +-
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 116 +++++++++++++++++++++--
5 files changed, 128 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 96d058c4cd4b..525df0b4d55f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -207,6 +207,7 @@ extern bool amdgpu_ignore_bad_page_threshold;
extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp;
+extern int amdgpu_sw_ring;
extern int amdgpu_discovery;
extern int amdgpu_mes;
extern int amdgpu_mes_kiq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 0de8e3cd0f1c..5eec82014f0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -348,6 +348,8 @@ struct amdgpu_gfx {
bool is_poweron;
+ /*software ring*/
+ unsigned num_sw_gfx_rings;
struct amdgpu_ring_mux muxer;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13db99d653bd..5b70a2c36d81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#include "amdgpu_sw_ring.h"
#include "atom.h"
/*
@@ -121,6 +122,11 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
{
uint32_t count;
+ if (ring->is_sw_ring) {
+ amdgpu_sw_ring_commit(ring);
+ return;
+ }
+
/* We pad to match fetch size */
count = ring->funcs->align_mask + 1 -
(ring->wptr & ring->funcs->align_mask);
@@ -183,6 +189,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
u32 *num_sched;
u32 hw_ip;
+ if (adev->gfx.num_sw_gfx_rings > 0 && ring->is_sw_ring) {
+ return amdgpu_sw_ring_init(adev, ring, max_dw, irq_src, irq_type,
+ hw_prio, sched_score);
+ }
+
/* Set the hw submission limit higher for KIQ because
* it's used for a number of gfx/compute tasks by both
* KFD and KGD which may have outstanding fences and
@@ -343,7 +354,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
*/
void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
-
+ if (ring->is_sw_ring) {
+ amdgpu_sw_ring_fini(ring);
+ return;
+ }
/* Not to finish a ring which is not initialized */
if (!(ring->adev) ||
(!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index fe33a683bfba..ba6d8c753f7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -38,7 +38,8 @@ struct amdgpu_vm;
/* max number of rings */
#define AMDGPU_MAX_RINGS 28
#define AMDGPU_MAX_HWIP_RINGS 8
-#define AMDGPU_MAX_GFX_RINGS 2
+/*2 software ring and 1 real ring*/
+#define AMDGPU_MAX_GFX_RINGS 3
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
#define AMDGPU_MAX_UVD_ENC_RINGS 2
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5349ca4d19e3..774e44e1074a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
#include "amdgpu_ras.h"
+#include "amdgpu_sw_ring.h"
#include "gfx_v9_4.h"
#include "gfx_v9_0.h"
#include "gfx_v9_4_2.h"
@@ -55,7 +56,8 @@
#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
#include "asic_reg/gc/gc_9_0_default.h"
-#define GFX9_NUM_GFX_RINGS 1
+#define GFX9_NUM_GFX_RINGS 3
+#define GFX9_NUM_SW_GFX_RINGS 2
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@@ -2270,6 +2272,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
static int gfx_v9_0_sw_init(void *handle)
{
int i, j, k, r, ring_id;
+ unsigned int hw_prio;
struct amdgpu_ring *ring;
struct amdgpu_kiq *kiq;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2356,13 +2359,40 @@ static int gfx_v9_0_sw_init(void *handle)
sprintf(ring->name, "gfx_%d", i);
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ ring->is_sw_ring = (adev->gfx.num_sw_gfx_rings > 1) && (i > 0);
+
+ if (adev->gfx.num_sw_gfx_rings > 1 && i == 2)
+ hw_prio = AMDGPU_RING_PRIO_2;
+ else
+ hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+ if (adev->gfx.num_sw_gfx_rings > 0 && i == 0)
+ ring->no_scheduler = true;
+
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
+ hw_prio, NULL);
if (r)
return r;
+
+ if (ring->is_sw_ring)
+ ring->wptr = 0;
}
+ /*init the muxer and add sw rings */
+ if (adev->gfx.num_sw_gfx_rings > 0) {
+ r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0]);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
+ return r;
+ }
+ for (i = 1; i < adev->gfx.num_gfx_rings; i++) {
+ r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, &adev->gfx.gfx_ring[i]);
+ if (r) {
+ DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
+ return r;
+ }
+ }
+ }
/* set up the compute queues - allocate horizontally across pipes */
ring_id = 0;
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
@@ -2413,6 +2443,9 @@ static int gfx_v9_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->gfx.num_sw_gfx_rings > 0)
+ amdgpu_ring_mux_fini(&adev->gfx.muxer);
+
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -4709,8 +4742,9 @@ static int gfx_v9_0_early_init(void *handle)
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
adev->gfx.num_gfx_rings = 0;
- else
- adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+
+ adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
+ adev->gfx.num_sw_gfx_rings = GFX9_NUM_SW_GFX_RINGS;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
gfx_v9_0_set_kiq_pm4_funcs(adev);
@@ -5877,7 +5911,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
switch (me_id) {
case 0:
- amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ if (adev->gfx.num_sw_gfx_rings > 1) {
+ for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[i]);
+ } else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
break;
case 1:
case 2:
@@ -6882,6 +6920,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .vmhub = AMDGPU_GFXHUB_0,
+ .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
+ .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
+ .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ * the first COND_EXEC jump to the place just
+ * prior to this double SWITCH_BUFFER
+ */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2 + /* SWITCH_BUFFER */
+ 7, /* gfx_v9_0_emit_mem_sync */
+ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v9_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v9_0_ring_test_ring,
+ .test_ib = gfx_v9_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v9_ring_emit_sb,
+ .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+ .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v9_0_emit_mem_sync,
+};
+
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.type = AMDGPU_RING_TYPE_COMPUTE,
.align_mask = 0xff,
@@ -6956,9 +7050,15 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
-
+ if (adev->gfx.num_sw_gfx_rings > 0) {
+ //first one is the real ring
+ adev->gfx.gfx_ring[0].funcs = &gfx_v9_0_ring_funcs_gfx;
+ for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
+ } else {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
+ }
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
}