I already replied to v3 of your patch on Tuesday. Did you see my comments about further simplifying the two queue allocation policies? Regards, Felix Am 2020-07-29 um 11:03 p.m. schrieb Liu, Monk: > [AMD Official Use Only - Internal Distribution Only] > > Ping > > _____________________________________ > Monk Liu|GPU Virtualization Team |AMD > > > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Liu, Monk > Sent: Tuesday, July 28, 2020 5:32 PM > To: Koenig, Christian <Christian.Koenig@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxx > Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx> > Subject: RE: FW: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3) > > [AMD Official Use Only - Internal Distribution Only] > > [AMD Official Use Only - Internal Distribution Only] > > I repeated the patch broadcast through git-send-email > > _____________________________________ > Monk Liu|GPU Virtualization Team |AMD > > > -----Original Message----- > From: Koenig, Christian <Christian.Koenig@xxxxxxx> > Sent: Tuesday, July 28, 2020 5:04 PM > To: Liu, Monk <Monk.Liu@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxx > Cc: Kuehling, Felix <Felix.Kuehling@xxxxxxx> > Subject: Re: FW: [PATCH] drm/amdgpu: introduce a new parameter to configure how many KCQ we want(v3) > > The patch looks totally mangled to me, e.g. some spaces and new lines are missing. > > Probably because it was forwarded. > > Christian. > > Am 28.07.20 um 10:59 schrieb Liu, Monk: >> [AMD Official Use Only - Internal Distribution Only] >> >> -----Original Message----- >> From: Monk Liu <Monk.Liu@xxxxxxx> >> Sent: Tuesday, July 28, 2020 2:59 PM >> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx >> Cc: Liu, Monk <Monk.Liu@xxxxxxx> >> Subject: [PATCH] drm/amdgpu: introduce a new parameter to configure >> how many KCQ we want(v3) >> >> what: >> the MQD's save and restore of KCQ (kernel compute queue) cost lots of >> clocks during world switch which impacts a lot to multi-VF performance >> >> how: >> introduce a paramter to control the number of KCQ to avoid performance >> drop if there is no kernel compute queue needed >> >> notes: >> this paramter only affects gfx 8/9/10 >> >> v2: >> refine namings >> >> v3: >> choose queues for each ring to that try best to cross pipes evenly. >> >> TODO: >> in the future we will let hypervisor driver to set this paramter >> automatically thus no need for user to configure it through modprobe >> in virtual machine >> >> Signed-off-by: Monk Liu <Monk.Liu@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++ >> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 58 +++++++++++++++--------------- >> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 30 ++++++++-------- >> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 +++++++-------- >> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 31 ++++++++-------- >> 7 files changed, 87 insertions(+), 71 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> index e97c088..de11136 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> @@ -201,6 +201,7 @@ extern int amdgpu_si_support; #ifdef >> CONFIG_DRM_AMDGPU_CIK extern int amdgpu_cik_support; #endif >> +extern int amdgpu_num_kcq; >> >> #define AMDGPU_VM_MAX_NUM_CTX4096 >> #define AMDGPU_SG_THRESHOLD(256*1024*1024) >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> index 62ecac9..cf445bab 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> @@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct >> amdgpu_device *adev) >> >> amdgpu_gmc_tmz_set(adev); >> >> +if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { amdgpu_num_kcq = 8; >> +dev_warn(adev->dev, "set kernel compute queue number to 8 due to >> +invalid paramter provided by user\n"); } >> + >> return 0; >> } >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> index 6291f5f..b545c40 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c >> @@ -150,6 +150,7 @@ int amdgpu_noretry; >> int amdgpu_force_asic_type = -1; >> int amdgpu_tmz = 0; >> int amdgpu_reset_method = -1; /* auto */ >> +int amdgpu_num_kcq = -1; >> >> struct amdgpu_mgpu_info mgpu_info = { >> .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), >> @@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); >> MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), >> 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); >> module_param_named(reset_method, amdgpu_reset_method, int, 0444); >> >> +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want >> +to setup (8 if set to greater than 8 or less than 0, only affect gfx >> +8+)"); module_param_named(num_kcq, amdgpu_num_kcq, int, 0444); >> + >> static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI >> {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff >> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> index 8eff017..f83a9a7 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c >> @@ -202,40 +202,42 @@ bool >> amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, >> >> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) { >> -int i, queue, pipe, mec; >> +int i, queue, pipe; >> bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); >> +int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * >> + adev->gfx.mec.num_queue_per_pipe, >> + adev->gfx.num_compute_rings); >> + >> +if (multipipe_policy) { >> +/* policy: make queues evenly cross all pipes on MEC1 only */ for (i >> += 0; i < max_queues_per_mec; i++) { pipe = i % >> +adev->gfx.mec.num_pipe_per_mec; queue = (i / >> +adev->gfx.mec.num_pipe_per_mec) % >> +adev->gfx.mec.num_queue_per_pipe; >> + >> +set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, >> +adev->gfx.mec.queue_bitmap); >> +} >> +} else { >> +int mec; >> >> -/* policy for amdgpu compute queue ownership */ -for (i = 0; i < >> AMDGPU_MAX_COMPUTE_QUEUES; ++i) { -queue = i % >> adev->gfx.mec.num_queue_per_pipe; -pipe = (i / >> adev->gfx.mec.num_queue_per_pipe) -% adev->gfx.mec.num_pipe_per_mec; >> -mec = (i / adev->gfx.mec.num_queue_per_pipe) -/ >> adev->gfx.mec.num_pipe_per_mec; >> - >> -/* we've run out of HW */ >> -if (mec >= adev->gfx.mec.num_mec) >> -break; >> +/* policy: amdgpu owns all queues in the given pipe */ for (i = 0; i >> +< adev->gfx.num_compute_rings; ++i) { queue = i % >> +adev->gfx.mec.num_queue_per_pipe; >> +pipe = (i / adev->gfx.mec.num_queue_per_pipe) % >> +adev->gfx.mec.num_pipe_per_mec; mec = (i / >> +adev->gfx.mec.num_queue_per_pipe) >> +/ adev->gfx.mec.num_pipe_per_mec; >> >> -if (multipipe_policy) { >> -/* policy: amdgpu owns the first two queues of the first MEC */ -if >> (mec == 0 && queue < 2) -set_bit(i, adev->gfx.mec.queue_bitmap); -} >> else { >> -/* policy: amdgpu owns all queues in the first pipe */ -if (mec == 0 >> && pipe == 0) -set_bit(i, adev->gfx.mec.queue_bitmap); >> +/* we've run out of HW */ >> +if (mec >= adev->gfx.mec.num_mec) >> +break; >> + >> +set_bit(i, adev->gfx.mec.queue_bitmap); >> } >> } >> >> -/* update the number of active compute rings */ >> -adev->gfx.num_compute_rings = >> -bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); >> - >> -/* If you hit this case and edited the policy, you probably just >> - * need to increase AMDGPU_MAX_COMPUTE_RINGS */ -if >> (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) >> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> +dev_info(adev->dev, "mec queue bitmap weight=%d\n", >> +bitmap_weight(adev->gfx.mec.queue_bitmap, >> +AMDGPU_MAX_COMPUTE_QUEUES)); >> } >> >> void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> index db9f1e8..3a93b3c 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> @@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev) >> amdgpu_gfx_compute_queue_acquire(adev); >> mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE; >> >> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> - AMDGPU_GEM_DOMAIN_GTT, >> - &adev->gfx.mec.hpd_eop_obj, >> - &adev->gfx.mec.hpd_eop_gpu_addr, >> - (void **)&hpd); >> -if (r) { >> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> -gfx_v10_0_mec_fini(adev); -return r; -} >> +if (mec_hpd_size) { >> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> + AMDGPU_GEM_DOMAIN_GTT, >> + &adev->gfx.mec.hpd_eop_obj, >> + &adev->gfx.mec.hpd_eop_gpu_addr, >> + (void **)&hpd); >> +if (r) { >> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> +gfx_v10_0_mec_fini(adev); return r; } >> >> -memset(hpd, 0, mec_hpd_size); >> +memset(hpd, 0, mec_hpd_size); >> >> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> +} >> >> if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { >> mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; @@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle) >> break; >> } >> >> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> +adev->gfx.num_compute_rings = amdgpu_num_kcq; >> >> gfx_v10_0_set_kiq_pm4_funcs(adev); >> gfx_v10_0_set_ring_funcs(adev); >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> index 8d72089..eb4b812 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> @@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) >> amdgpu_gfx_compute_queue_acquire(adev); >> >> mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; >> +if (mec_hpd_size) { >> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> + AMDGPU_GEM_DOMAIN_VRAM, >> + &adev->gfx.mec.hpd_eop_obj, >> + &adev->gfx.mec.hpd_eop_gpu_addr, >> + (void **)&hpd); >> +if (r) { >> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; >> +} >> >> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> - AMDGPU_GEM_DOMAIN_VRAM, >> - &adev->gfx.mec.hpd_eop_obj, >> - &adev->gfx.mec.hpd_eop_gpu_addr, >> - (void **)&hpd); >> -if (r) { >> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); -return r; >> -} >> - >> -memset(hpd, 0, mec_hpd_size); >> +memset(hpd, 0, mec_hpd_size); >> >> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> +} >> >> return 0; >> } >> @@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle) >> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >> >> adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; >> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> +adev->gfx.num_compute_rings = amdgpu_num_kcq; >> adev->gfx.funcs = &gfx_v8_0_gfx_funcs; >> gfx_v8_0_set_ring_funcs(adev); >> gfx_v8_0_set_irq_funcs(adev); >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> index e4e751f..43ad044 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> @@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) >> /* take ownership of the relevant compute queues */ >> amdgpu_gfx_compute_queue_acquire(adev); >> mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; >> +if (mec_hpd_size) { >> +r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> + AMDGPU_GEM_DOMAIN_VRAM, >> + &adev->gfx.mec.hpd_eop_obj, >> + &adev->gfx.mec.hpd_eop_gpu_addr, >> + (void **)&hpd); >> +if (r) { >> +dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> +gfx_v9_0_mec_fini(adev); return r; } >> >> -r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, >> - AMDGPU_GEM_DOMAIN_VRAM, >> - &adev->gfx.mec.hpd_eop_obj, >> - &adev->gfx.mec.hpd_eop_gpu_addr, >> - (void **)&hpd); >> -if (r) { >> -dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); >> -gfx_v9_0_mec_fini(adev); -return r; -} >> - >> -memset(hpd, 0, mec_hpd_size); >> +memset(hpd, 0, mec_hpd_size); >> >> -amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> -amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> +amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); >> +amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); >> +} >> >> mec_hdr = (const struct gfx_firmware_header_v1_0 >> *)adev->gfx.mec_fw->data; >> >> @@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle) >> adev->gfx.num_gfx_rings = 0; >> else >> adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; >> -adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; >> +adev->gfx.num_compute_rings = amdgpu_num_kcq; >> gfx_v9_0_set_kiq_pm4_funcs(adev); >> gfx_v9_0_set_ring_funcs(adev); >> gfx_v9_0_set_irq_funcs(adev); >> -- >> 2.7.4 >> > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx