Am 2020-07-27 um 5:26 a.m. schrieb Christian König: > Am 27.07.20 um 10:21 schrieb Monk Liu: >> what: >> KCQ cost many clocks during world switch which impacts a lot to multi-VF >> performance >> >> how: >> introduce a paramter to control the number of KCQ to avoid performance >> drop if there is no KQC needed >> >> notes: >> this paramter only affects gfx 8/9/10 > > Sounds like a good idea to me, but that needs a different name. > Outside AMD most people don't know what a KCQ is. > > Just use compute queue or similar as name for this. Just "compute queue" would be confusing for ROCm users. Maybe "legacy compute queues"? Regards, Felix > > Another comment below. > >> >> Signed-off-by: Monk Liu <Monk.Liu@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 27 >> +++++++++++++------------- >> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 >> +++++++++++++++-------------- >> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 >> ++++++++++++++-------------- >> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 >> +++++++++++++++--------------- >> 7 files changed, 69 insertions(+), 56 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index e97c088..71a3d6a 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -201,6 +201,7 @@ extern int amdgpu_si_support; >> #ifdef CONFIG_DRM_AMDGPU_CIK >> extern int amdgpu_cik_support; >> #endif >> +extern int amdgpu_num_kcq_user_set; >> #define AMDGPU_VM_MAX_NUM_CTX 4096 >> #define AMDGPU_SG_THRESHOLD (256*1024*1024) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> index 62ecac9..61c7583 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> @@ -1199,6 +1199,9 @@ static int amdgpu_device_check_arguments(struct >> amdgpu_device *adev) >> amdgpu_gmc_tmz_set(adev); >> + if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0) >> + amdgpu_num_kcq_user_set = 8; > > This needs a warning or error message if we overwrite invalid user > provided parameters. > > Christian. > >> + >> return 0; >> } >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> index 6291f5f..03a94e9 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> @@ -150,6 +150,7 @@ int amdgpu_noretry; >> int amdgpu_force_asic_type = -1; >> int amdgpu_tmz = 0; >> int amdgpu_reset_method = -1; /* auto */ >> +int amdgpu_num_kcq_user_set = 8; >> struct amdgpu_mgpu_info mgpu_info = { >> .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), >> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); >> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto >> (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); >> module_param_named(reset_method, amdgpu_reset_method, int, 0444); >> +MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if >> set to greater than 8 or less than 0, only affect gfx 8+)"); >> +module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444); >> + >> static const struct pci_device_id pciidlist[] = { >> #ifdef CONFIG_DRM_AMDGPU_SI >> {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> index 8eff017..0b59049 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> @@ -202,7 +202,7 @@ bool >> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, >> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) >> { >> - int i, queue, pipe, mec; >> + int i, queue, pipe, mec, j = 0; >> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); >> /* policy for amdgpu compute queue ownership */ >> @@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct >> amdgpu_device *adev) >> if (multipipe_policy) { >> /* policy: amdgpu owns the first two queues of the >> first MEC */ >> - if (mec == 0 && queue < 2) >> - set_bit(i, adev->gfx.mec.queue_bitmap); >> + if (mec == 0 && queue < 2) { >> + if (j++ < adev->gfx.num_compute_rings) >> + set_bit(i, adev->gfx.mec.queue_bitmap); >> + else >> + break; >> + } >> } else { >> /* policy: amdgpu owns all queues in the first pipe */ >> - if (mec == 0 && pipe == 0) >> - set_bit(i, adev->gfx.mec.queue_bitmap); >> + if (mec == 0 && pipe == 0) { >> + if (j++ < adev->gfx.num_compute_rings) >> + set_bit(i, adev->gfx.mec.queue_bitmap); >> + else >> + break; >> + } >> } >> } >> - /* update the number of active compute rings */ >> - adev->gfx.num_compute_rings = >> - bitmap_weight(adev->gfx.mec.queue_bitmap, >> AMDGPU_MAX_COMPUTE_QUEUES); >> - >> - /* If you hit this case and edited the policy, you probably just >> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */ >> - if (WARN_ON(adev->gfx.num_compute_rings > >> AMDGPU_MAX_COMPUTE_RINGS)) >> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> + dev_info(adev->dev, "mec queue bitmap weight=%d\n", >> bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); >> } >> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> index db9f1e8..2ad8393 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct >> amdgpu_device *adev) >> amdgpu_gfx_compute_queue_acquire(adev); >> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE; >> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> - AMDGPU_GEM_DOMAIN_GTT, >> - &adev->gfx.mec.hpd_eop_obj, >> - &adev->gfx.mec.hpd_eop_gpu_addr, >> - (void **)&hpd); >> - if (r) { >> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> - gfx_v10_0_mec_fini(adev); >> - return r; >> - } >> + if (mec_hpd_size) { >> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> + AMDGPU_GEM_DOMAIN_GTT, >> + &adev->gfx.mec.hpd_eop_obj, >> + &adev->gfx.mec.hpd_eop_gpu_addr, >> + (void **)&hpd); >> + if (r) { >> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> + gfx_v10_0_mec_fini(adev); >> + return r; >> + } >> - memset(hpd, 0, mec_hpd_size); >> + memset(hpd, 0, mec_hpd_size); >> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> + } >> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { >> mec_hdr = (const struct gfx_firmware_header_v1_0 >> *)adev->gfx.mec_fw->data; >> @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle) >> break; >> } >> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set; >> gfx_v10_0_set_kiq_pm4_funcs(adev); >> gfx_v10_0_set_ring_funcs(adev); >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> index 8d72089..6d95b4b 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct >> amdgpu_device *adev) >> amdgpu_gfx_compute_queue_acquire(adev); >> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; >> + if (mec_hpd_size) { >> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> + AMDGPU_GEM_DOMAIN_VRAM, >> + &adev->gfx.mec.hpd_eop_obj, >> + &adev->gfx.mec.hpd_eop_gpu_addr, >> + (void **)&hpd); >> + if (r) { >> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> + return r; >> + } >> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> - AMDGPU_GEM_DOMAIN_VRAM, >> - &adev->gfx.mec.hpd_eop_obj, >> - &adev->gfx.mec.hpd_eop_gpu_addr, >> - (void **)&hpd); >> - if (r) { >> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> - return r; >> - } >> - >> - memset(hpd, 0, mec_hpd_size); >> + memset(hpd, 0, mec_hpd_size); >> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> + } >> return 0; >> } >> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle) >> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; >> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set; >> adev->gfx.funcs = &gfx_v8_0_gfx_funcs; >> gfx_v8_0_set_ring_funcs(adev); >> gfx_v8_0_set_irq_funcs(adev); >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> index e4e751f..43bcfe3 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct >> amdgpu_device *adev) >> /* take ownership of the relevant compute queues */ >> amdgpu_gfx_compute_queue_acquire(adev); >> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; >> + if (mec_hpd_size) { >> + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> + AMDGPU_GEM_DOMAIN_VRAM, >> + &adev->gfx.mec.hpd_eop_obj, >> + &adev->gfx.mec.hpd_eop_gpu_addr, >> + (void **)&hpd); >> + if (r) { >> + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> + gfx_v9_0_mec_fini(adev); >> + return r; >> + } >> - r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> - AMDGPU_GEM_DOMAIN_VRAM, >> - &adev->gfx.mec.hpd_eop_obj, >> - &adev->gfx.mec.hpd_eop_gpu_addr, >> - (void **)&hpd); >> - if (r) { >> - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> - gfx_v9_0_mec_fini(adev); >> - return r; >> - } >> - >> - memset(hpd, 0, mec_hpd_size); >> + memset(hpd, 0, mec_hpd_size); >> - amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> + } >> mec_hdr = (const struct gfx_firmware_header_v1_0 >> *)adev->gfx.mec_fw->data; >> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle) >> adev->gfx.num_gfx_rings = 0; >> else >> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; >> - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> + adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set; >> gfx_v9_0_set_kiq_pm4_funcs(adev); >> gfx_v9_0_set_ring_funcs(adev); >> gfx_v9_0_set_irq_funcs(adev); > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cfelix.kuehling%40amd.com%7Ce56893660d9d41b9389f08d8320f21b0%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637314387926556351&sdata=jqGPZmx8HRKt4V1uZlGDIQWW5vKckF%2B%2Fc%2FX6%2F7joznQ%3D&reserved=0 > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx