On 2019-10-30 20:17, Zhao, Yong wrote: > The kernel queue functions for v9 and v10 are the same except > pm_map_process_v* which have small difference, so they should be reused. > This eliminates the need of reapplying several patches which were > applied on v9 but not on v10, such as bigger GWS and more than 2 > SDMA engine support which were introduced on Arcturus. This looks reasonable in principle. See one suggestion inline to simplify it further. > > Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802 > Signed-off-by: Yong Zhao <Yong.Zhao@xxxxxxx> > --- > drivers/gpu/drm/amd/amdkfd/Makefile | 1 - > drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 4 +- > drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 1 - > .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 ------------------ > .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 49 ++- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 - > 6 files changed, 44 insertions(+), 331 deletions(-) > delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c > > diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile > index 48155060a57c..017a8b7156da 100644 > --- a/drivers/gpu/drm/amd/amdkfd/Makefile > +++ b/drivers/gpu/drm/amd/amdkfd/Makefile > @@ -41,7 +41,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ > $(AMDKFD_PATH)/kfd_kernel_queue_cik.o \ > $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \ > $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \ > - $(AMDKFD_PATH)/kfd_kernel_queue_v10.o \ > $(AMDKFD_PATH)/kfd_packet_manager.o \ > $(AMDKFD_PATH)/kfd_process_queue_manager.o \ > $(AMDKFD_PATH)/kfd_device_queue_manager.o \ > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > index 11d244891393..0d966408ea87 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > @@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, > case CHIP_RAVEN: > case CHIP_RENOIR: > case CHIP_ARCTURUS: > - kernel_queue_init_v9(&kq->ops_asic_specific); > - break; > case CHIP_NAVI10: > case CHIP_NAVI12: > case CHIP_NAVI14: > - kernel_queue_init_v10(&kq->ops_asic_specific); > + kernel_queue_init_v9(&kq->ops_asic_specific); > break; > default: > WARN(1, "Unexpected ASIC family %u", > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h > index 365fc674fea4..a7116a939029 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h > @@ -102,6 +102,5 @@ struct kernel_queue { > void kernel_queue_init_cik(struct kernel_queue_ops *ops); > void kernel_queue_init_vi(struct kernel_queue_ops *ops); > void kernel_queue_init_v9(struct kernel_queue_ops *ops); > -void kernel_queue_init_v10(struct kernel_queue_ops *ops); > > #endif /* KFD_KERNEL_QUEUE_H_ */ > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c > deleted file mode 100644 > index bfd6221acae9..000000000000 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c > +++ /dev/null [snip] > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c > index f0e4910a8865..d8f7343bfe71 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c > @@ -62,8 +62,9 @@ void kernel_queue_init_v9(struct kernel_queue_ops *ops) > ops->submit_packet = submit_packet_v9; > } > > -static int pm_map_process_v9(struct packet_manager *pm, > - uint32_t *buffer, struct qcm_process_device *qpd) > +static int pm_map_process_v9_base(struct packet_manager *pm, > + uint32_t *buffer, struct qcm_process_device *qpd, > + unsigned int sq_shader_tba_hi_trap_en_shift) > { > struct pm4_mes_map_process *packet; > uint64_t vm_page_table_base_addr = qpd->page_table_base; > @@ -85,10 +86,16 @@ static int pm_map_process_v9(struct packet_manager *pm, > > packet->sh_mem_config = qpd->sh_mem_config; > packet->sh_mem_bases = qpd->sh_mem_bases; > - packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); > - packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8); > - packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); > - packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); > + if (qpd->tba_addr) { > + packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); > + packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8); > + if (sq_shader_tba_hi_trap_en_shift) { > + packet->sq_shader_tba_hi |= > + 1 << sq_shader_tba_hi_trap_en_shift; If you pass in a mask instead of a shift, you don't need the conditional. I.e. packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8) | sq_shader_tba_hi_trap_en_mask; > + } > + packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); > + packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); > + } > > packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); > packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); > @@ -101,6 +108,11 @@ static int pm_map_process_v9(struct packet_manager *pm, > return 0; > } > > +static int pm_map_process_v9(struct packet_manager *pm, > + uint32_t *buffer, struct qcm_process_device *qpd) { > + return pm_map_process_v9_base(pm, buffer, qpd, 0); > +} > + > static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, > uint64_t ib, size_t ib_size_in_dwords, bool chain) > { > @@ -352,3 +364,28 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { > .query_status_size = sizeof(struct pm4_mes_query_status), > .release_mem_size = 0, > }; > + > +#include "gc/gc_10_1_0_sh_mask.h" > + > +static int pm_map_process_v10(struct packet_manager *pm, > + uint32_t *buffer, struct qcm_process_device *qpd) { > + return pm_map_process_v9_base(pm, buffer, qpd, > + SQ_SHADER_TBA_HI__TRAP_EN__SHIFT); With my suggestion above, pass in (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) here. Regards, Felix > +} > + > +const struct packet_manager_funcs kfd_v10_pm_funcs = { > + .map_process = pm_map_process_v10, > + .runlist = pm_runlist_v9, > + .set_resources = pm_set_resources_v9, > + .map_queues = pm_map_queues_v9, > + .unmap_queues = pm_unmap_queues_v9, > + .query_status = pm_query_status_v9, > + .release_mem = NULL, > + .map_process_size = sizeof(struct pm4_mes_map_process), > + .runlist_size = sizeof(struct pm4_mes_runlist), > + .set_resources_size = sizeof(struct pm4_mes_set_resources), > + .map_queues_size = sizeof(struct pm4_mes_map_queues), > + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), > + .query_status_size = sizeof(struct pm4_mes_query_status), > + .release_mem_size = 0, > +}; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 62db4d20ed32..5127ddee24c8 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -996,9 +996,6 @@ void pm_release_ib(struct packet_manager *pm); > > /* Following PM funcs can be shared among VI and AI */ > unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); > -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, > - struct scheduling_resources *res); > - > > uint64_t kfd_get_number_elems(struct kfd_dev *kfd); > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx