On 21/07/14 05:33, Jerome Glisse wrote: > On Thu, Jul 17, 2014 at 04:29:21PM +0300, Oded Gabbay wrote: >> From: Ben Goz <ben.goz@xxxxxxx> >> >> The mqd_manager module handles MQD data structures. MQD stands for Memory Queue Descriptor, which is used by the H/W to keep the usermode queue state in memory. >> >> Signed-off-by: Ben Goz <ben.goz@xxxxxxx> >> Signed-off-by: Oded Gabbay <oded.gabbay@xxxxxxx> >> --- >> drivers/gpu/drm/radeon/amdkfd/Makefile | 2 +- >> drivers/gpu/drm/radeon/amdkfd/cik_mqds.h | 185 +++++++++++++++ >> drivers/gpu/drm/radeon/amdkfd/cik_regs.h | 220 ++++++++++++++++++ >> drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.c | 291 ++++++++++++++++++++++++ >> drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.h | 54 +++++ >> drivers/gpu/drm/radeon/amdkfd/kfd_priv.h | 8 + >> 6 files changed, 759 insertions(+), 1 deletion(-) >> create mode 100644 drivers/gpu/drm/radeon/amdkfd/cik_mqds.h >> create mode 100644 drivers/gpu/drm/radeon/amdkfd/cik_regs.h >> create mode 100644 drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.c >> create mode 100644 drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.h >> >> diff --git a/drivers/gpu/drm/radeon/amdkfd/Makefile b/drivers/gpu/drm/radeon/amdkfd/Makefile >> index dbff147..b5201f4 100644 >> --- a/drivers/gpu/drm/radeon/amdkfd/Makefile >> +++ b/drivers/gpu/drm/radeon/amdkfd/Makefile >> @@ -6,6 +6,6 @@ ccflags-y := -Iinclude/drm >> >> amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ >> kfd_pasid.o kfd_doorbell.o kfd_vidmem.o kfd_aperture.o \ >> - kfd_process.o kfd_queue.o >> + kfd_process.o kfd_queue.o kfd_mqd_manager.o >> >> obj-$(CONFIG_HSA_RADEON) += amdkfd.o >> diff --git a/drivers/gpu/drm/radeon/amdkfd/cik_mqds.h b/drivers/gpu/drm/radeon/amdkfd/cik_mqds.h >> new file mode 100644 >> index 0000000..ce75604 >> --- /dev/null >> +++ b/drivers/gpu/drm/radeon/amdkfd/cik_mqds.h >> @@ -0,0 +1,185 @@ >> +/* >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + * >> + */ >> + >> +#ifndef CIK_MQDS_H_ >> +#define CIK_MQDS_H_ >> + >> +#pragma pack(push, 4) > > No pragma pack. > Fixed in v3. >> + >> +struct cik_hpd_registers { >> + u32 cp_hpd_roq_offsets; >> + u32 cp_hpd_eop_base_addr; >> + u32 cp_hpd_eop_base_addr_hi; >> + u32 cp_hpd_eop_vmid; >> + u32 cp_hpd_eop_control; >> +}; >> + >> +/* This structure represents mqd used for cp scheduling queue >> + * taken from Gfx72_cp_program_spec.pdf >> + */ >> +struct cik_compute_mqd { >> + u32 header; >> + u32 compute_dispatch_initiator; >> + u32 compute_dim_x; >> + u32 compute_dim_y; >> + u32 compute_dim_z; >> + u32 compute_start_x; >> + u32 compute_start_y; >> + u32 compute_start_z; >> + u32 compute_num_thread_x; >> + u32 compute_num_thread_y; >> + u32 compute_num_thread_z; >> + u32 compute_pipelinestat_enable; >> + u32 compute_perfcount_enable; >> + u32 compute_pgm_lo; >> + u32 compute_pgm_hi; >> + u32 compute_tba_lo; >> + u32 compute_tba_hi; >> + u32 compute_tma_lo; >> + u32 compute_tma_hi; >> + u32 compute_pgm_rsrc1; >> + u32 compute_pgm_rsrc2; >> + u32 compute_vmid; >> + u32 compute_resource_limits; >> + u32 compute_static_thread_mgmt_se0; >> + u32 compute_static_thread_mgmt_se1; >> + u32 compute_tmpring_size; >> + u32 compute_static_thread_mgmt_se2; >> + u32 compute_static_thread_mgmt_se3; >> + u32 compute_restart_x; >> + u32 compute_restart_y; >> + u32 compute_restart_z; >> + u32 compute_thread_trace_enable; >> + u32 compute_misc_reserved; >> + u32 compute_user_data[16]; >> + u32 vgt_csinvoc_count_lo; >> + u32 vgt_csinvoc_count_hi; >> + u32 cp_mqd_base_addr51; >> + u32 cp_mqd_base_addr_hi; >> + u32 cp_hqd_active; >> + u32 cp_hqd_vmid; >> + u32 cp_hqd_persistent_state; >> + u32 cp_hqd_pipe_priority; >> + u32 cp_hqd_queue_priority; >> + u32 cp_hqd_quantum; >> + u32 cp_hqd_pq_base; >> + u32 cp_hqd_pq_base_hi; >> + u32 cp_hqd_pq_rptr; >> + u32 cp_hqd_pq_rptr_report_addr; >> + u32 cp_hqd_pq_rptr_report_addr_hi; >> + u32 cp_hqd_pq_wptr_poll_addr; >> + u32 cp_hqd_pq_wptr_poll_addr_hi; >> + u32 cp_hqd_pq_doorbell_control; >> + u32 cp_hqd_pq_wptr; >> + u32 cp_hqd_pq_control; >> + u32 cp_hqd_ib_base_addr; >> + u32 cp_hqd_ib_base_addr_hi; >> + u32 cp_hqd_ib_rptr; >> + u32 cp_hqd_ib_control; >> + u32 cp_hqd_iq_timer; >> + u32 cp_hqd_iq_rptr; >> + u32 cp_hqd_dequeue_request; >> + u32 cp_hqd_dma_offload; >> + u32 cp_hqd_sema_cmd; >> + u32 cp_hqd_msg_type; >> + u32 cp_hqd_atomic0_preop_lo; >> + u32 cp_hqd_atomic0_preop_hi; >> + u32 cp_hqd_atomic1_preop_lo; >> + u32 cp_hqd_atomic1_preop_hi; >> + u32 cp_hqd_hq_scheduler0; >> + u32 cp_hqd_hq_scheduler1; >> + u32 cp_mqd_control; >> + u32 reserved1[10]; >> + u32 cp_mqd_query_time_lo; >> + u32 cp_mqd_query_time_hi; >> + u32 reserved2[4]; >> + u32 cp_mqd_connect_start_time_lo; >> + u32 cp_mqd_connect_start_time_hi; >> + u32 cp_mqd_connect_end_time_lo; >> + u32 cp_mqd_connect_end_time_hi; >> + u32 cp_mqd_connect_end_wf_count; >> + u32 cp_mqd_connect_end_pq_rptr; >> + u32 cp_mqd_connect_end_pq_wptr; >> + u32 cp_mqd_connect_end_ib_rptr; >> + u32 reserved3[18]; >> +}; >> + >> +/* This structure represents all *IQs >> + * Taken from Gfx73_CPC_Eng_Init_Prog.pdf >> + */ >> +struct cik_interface_mqd { >> + u32 reserved1[128]; >> + u32 cp_mqd_base_addr; >> + u32 cp_mqd_base_addr_hi; >> + u32 cp_hqd_active; >> + u32 cp_hqd_vmid; >> + u32 cp_hqd_persistent_state; >> + u32 cp_hqd_pipe_priority; >> + u32 cp_hqd_queue_priority; >> + u32 cp_hqd_quantum; >> + u32 cp_hqd_pq_base; >> + u32 cp_hqd_pq_base_hi; >> + u32 cp_hqd_pq_rptr; >> + u32 cp_hqd_pq_rptr_report_addr; >> + u32 cp_hqd_pq_rptr_report_addr_hi; >> + u32 cp_hqd_pq_wptr_poll_addr; >> + u32 cp_hqd_pq_wptr_poll_addr_hi; >> + u32 cp_hqd_pq_doorbell_control; >> + u32 cp_hqd_pq_wptr; >> + u32 cp_hqd_pq_control; >> + u32 cp_hqd_ib_base_addr; >> + u32 cp_hqd_ib_base_addr_hi; >> + u32 cp_hqd_ib_rptr; >> + u32 cp_hqd_ib_control; >> + u32 cp_hqd_iq_timer; >> + u32 cp_hqd_iq_rptr; >> + u32 cp_hqd_dequeue_request; >> + u32 cp_hqd_dma_offload; >> + u32 cp_hqd_sema_cmd; >> + u32 cp_hqd_msg_type; >> + u32 cp_hqd_atomic0_preop_lo; >> + u32 cp_hqd_atomic0_preop_hi; >> + u32 cp_hqd_atomic1_preop_lo; >> + u32 cp_hqd_atomic1_preop_hi; >> + u32 cp_hqd_hq_status0; >> + u32 cp_hqd_hq_control0; >> + u32 cp_mqd_control; >> + u32 reserved2[3]; >> + u32 cp_hqd_hq_status1; >> + u32 cp_hqd_hq_control1; >> + u32 reserved3[16]; >> + u32 cp_hqd_hq_status2; >> + u32 cp_hqd_hq_control2; >> + u32 cp_hqd_hq_status3; >> + u32 cp_hqd_hq_control3; >> + u32 reserved4[2]; >> + u32 cp_mqd_query_time_lo; >> + u32 cp_mqd_query_time_hi; >> + u32 reserved5[48]; >> + u32 cp_mqd_skip_process[16]; >> +}; > > I have not fully check but very few of the above fields are use. So please > do strip this structure to only used field we need to keep stack use as low > as possible. Moreover the whole reserved* business kind of tell me that this > is done to match register layout which i would rather avoid being use as a > struct. > The struct cik_mqd, which also includes struct cik_hqd_registers, describe the mqd itself. The mqd is not registers perse, but rather a structure that is common interface between the CPU and GPU. Although we don't initalize all its members (as some of them are for the GPU usage), I believe this is the proper way to use it. Do you have another suggestion ? >> + >> +#pragma pack(pop) >> + >> + >> +#endif /* CIK_MQDS_H_ */ >> diff --git a/drivers/gpu/drm/radeon/amdkfd/cik_regs.h b/drivers/gpu/drm/radeon/amdkfd/cik_regs.h >> new file mode 100644 >> index 0000000..a6404e3 >> --- /dev/null >> +++ b/drivers/gpu/drm/radeon/amdkfd/cik_regs.h >> @@ -0,0 +1,220 @@ >> +/* >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + */ >> + >> +#ifndef CIK_REGS_H >> +#define CIK_REGS_H >> + >> +#define IH_VMID_0_LUT 0x3D40u >> + >> +#define BIF_DOORBELL_CNTL 0x530Cu >> + >> +#define SRBM_GFX_CNTL 0xE44 >> +#define PIPEID(x) ((x) << 0) >> +#define MEID(x) ((x) << 2) >> +#define VMID(x) ((x) << 4) >> +#define QUEUEID(x) ((x) << 8) >> + >> +#define SQ_CONFIG 0x8C00 >> + >> +#define SH_MEM_BASES 0x8C28 >> +/* if PTR32, these are the bases for scratch and lds */ >> +#define PRIVATE_BASE(x) ((x) << 0) /* scratch */ >> +#define SHARED_BASE(x) ((x) << 16) /* LDS */ >> +#define SH_MEM_APE1_BASE 0x8C2C >> +/* if PTR32, this is the base location of GPUVM */ >> +#define SH_MEM_APE1_LIMIT 0x8C30 >> +/* if PTR32, this is the upper limit of GPUVM */ >> +#define SH_MEM_CONFIG 0x8C34 >> +#define PTR32 (1 << 0) >> +#define PRIVATE_ATC (1 << 1) >> +#define ALIGNMENT_MODE(x) ((x) << 2) >> +#define SH_MEM_ALIGNMENT_MODE_DWORD 0 >> +#define SH_MEM_ALIGNMENT_MODE_DWORD_STRICT 1 >> +#define SH_MEM_ALIGNMENT_MODE_STRICT 2 >> +#define SH_MEM_ALIGNMENT_MODE_UNALIGNED 3 >> +#define DEFAULT_MTYPE(x) ((x) << 4) >> +#define APE1_MTYPE(x) ((x) << 7) >> + >> +/* valid for both DEFAULT_MTYPE and APE1_MTYPE */ >> +#define MTYPE_CACHED 0 >> +#define MTYPE_NONCACHED 3 >> + >> + >> +#define SH_STATIC_MEM_CONFIG 0x9604u >> + >> +#define TC_CFG_L1_LOAD_POLICY0 0xAC68 >> +#define TC_CFG_L1_LOAD_POLICY1 0xAC6C >> +#define TC_CFG_L1_STORE_POLICY 0xAC70 >> +#define TC_CFG_L2_LOAD_POLICY0 0xAC74 >> +#define TC_CFG_L2_LOAD_POLICY1 0xAC78 >> +#define TC_CFG_L2_STORE_POLICY0 0xAC7C >> +#define TC_CFG_L2_STORE_POLICY1 0xAC80 >> +#define TC_CFG_L2_ATOMIC_POLICY 0xAC84 >> +#define TC_CFG_L1_VOLATILE 0xAC88 >> +#define TC_CFG_L2_VOLATILE 0xAC8C >> + >> +#define CP_PQ_WPTR_POLL_CNTL 0xC20C >> +#define WPTR_POLL_EN (1 << 31) >> + >> +#define CPC_INT_CNTL 0xC2D0 >> +#define CP_ME1_PIPE0_INT_CNTL 0xC214 >> +#define CP_ME1_PIPE1_INT_CNTL 0xC218 >> +#define CP_ME1_PIPE2_INT_CNTL 0xC21C >> +#define CP_ME1_PIPE3_INT_CNTL 0xC220 >> +#define CP_ME2_PIPE0_INT_CNTL 0xC224 >> +#define CP_ME2_PIPE1_INT_CNTL 0xC228 >> +#define CP_ME2_PIPE2_INT_CNTL 0xC22C >> +#define CP_ME2_PIPE3_INT_CNTL 0xC230 >> +#define DEQUEUE_REQUEST_INT_ENABLE (1 << 13) >> +#define WRM_POLL_TIMEOUT_INT_ENABLE (1 << 17) >> +#define PRIV_REG_INT_ENABLE (1 << 23) >> +#define TIME_STAMP_INT_ENABLE (1 << 26) >> +#define GENERIC2_INT_ENABLE (1 << 29) >> +#define GENERIC1_INT_ENABLE (1 << 30) >> +#define GENERIC0_INT_ENABLE (1 << 31) >> +#define CP_ME1_PIPE0_INT_STATUS 0xC214 >> +#define CP_ME1_PIPE1_INT_STATUS 0xC218 >> +#define CP_ME1_PIPE2_INT_STATUS 0xC21C >> +#define CP_ME1_PIPE3_INT_STATUS 0xC220 >> +#define CP_ME2_PIPE0_INT_STATUS 0xC224 >> +#define CP_ME2_PIPE1_INT_STATUS 0xC228 >> +#define CP_ME2_PIPE2_INT_STATUS 0xC22C >> +#define CP_ME2_PIPE3_INT_STATUS 0xC230 >> +#define DEQUEUE_REQUEST_INT_STATUS (1 << 13) >> +#define WRM_POLL_TIMEOUT_INT_STATUS (1 << 17) >> +#define PRIV_REG_INT_STATUS (1 << 23) >> +#define TIME_STAMP_INT_STATUS (1 << 26) >> +#define GENERIC2_INT_STATUS (1 << 29) >> +#define GENERIC1_INT_STATUS (1 << 30) >> +#define GENERIC0_INT_STATUS (1 << 31) >> + >> +#define CP_HPD_EOP_BASE_ADDR 0xC904 >> +#define CP_HPD_EOP_BASE_ADDR_HI 0xC908 >> +#define CP_HPD_EOP_VMID 0xC90C >> +#define CP_HPD_EOP_CONTROL 0xC910 >> +#define EOP_SIZE(x) ((x) << 0) >> +#define EOP_SIZE_MASK (0x3f << 0) >> +#define CP_MQD_BASE_ADDR 0xC914 >> +#define CP_MQD_BASE_ADDR_HI 0xC918 >> +#define CP_HQD_ACTIVE 0xC91C >> +#define CP_HQD_VMID 0xC920 >> + >> +#define CP_HQD_PERSISTENT_STATE 0xC924u >> +#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8) >> + >> +#define CP_HQD_PIPE_PRIORITY 0xC928u >> +#define CP_HQD_QUEUE_PRIORITY 0xC92Cu >> +#define CP_HQD_QUANTUM 0xC930u >> +#define QUANTUM_EN 1U >> +#define QUANTUM_SCALE_1MS (1U << 4) >> +#define QUANTUM_DURATION(x) ((x) << 8) >> + >> +#define CP_HQD_PQ_BASE 0xC934 >> +#define CP_HQD_PQ_BASE_HI 0xC938 >> +#define CP_HQD_PQ_RPTR 0xC93C >> +#define CP_HQD_PQ_RPTR_REPORT_ADDR 0xC940 >> +#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI 0xC944 >> +#define CP_HQD_PQ_WPTR_POLL_ADDR 0xC948 >> +#define CP_HQD_PQ_WPTR_POLL_ADDR_HI 0xC94C >> +#define CP_HQD_PQ_DOORBELL_CONTROL 0xC950 >> +#define DOORBELL_OFFSET(x) ((x) << 2) >> +#define DOORBELL_OFFSET_MASK (0x1fffff << 2) >> +#define DOORBELL_SOURCE (1 << 28) >> +#define DOORBELL_SCHD_HIT (1 << 29) >> +#define DOORBELL_EN (1 << 30) >> +#define DOORBELL_HIT (1 << 31) >> +#define CP_HQD_PQ_WPTR 0xC954 >> +#define CP_HQD_PQ_CONTROL 0xC958 >> +#define QUEUE_SIZE(x) ((x) << 0) >> +#define QUEUE_SIZE_MASK (0x3f << 0) >> +#define RPTR_BLOCK_SIZE(x) ((x) << 8) >> +#define RPTR_BLOCK_SIZE_MASK (0x3f << 8) >> +#define MIN_AVAIL_SIZE(x) ((x) << 20) >> +#define PQ_ATC_EN (1 << 23) >> +#define PQ_VOLATILE (1 << 26) >> +#define NO_UPDATE_RPTR (1 << 27) >> +#define UNORD_DISPATCH (1 << 28) >> +#define ROQ_PQ_IB_FLIP (1 << 29) >> +#define PRIV_STATE (1 << 30) >> +#define KMD_QUEUE (1 << 31) >> + >> +#define DEFAULT_RPTR_BLOCK_SIZE RPTR_BLOCK_SIZE(5) >> +#define DEFAULT_MIN_AVAIL_SIZE MIN_AVAIL_SIZE(3) >> + >> +#define CP_HQD_IB_BASE_ADDR 0xC95Cu >> +#define CP_HQD_IB_BASE_ADDR_HI 0xC960u >> +#define CP_HQD_IB_RPTR 0xC964u >> +#define CP_HQD_IB_CONTROL 0xC968u >> +#define IB_ATC_EN (1U << 23) >> +#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20) >> + >> +#define CP_HQD_DEQUEUE_REQUEST 0xC974 >> +#define DEQUEUE_REQUEST_DRAIN 1 >> +#define DEQUEUE_REQUEST_RESET 2 >> +#define DEQUEUE_INT (1U << 8) >> + >> +#define CP_HQD_SEMA_CMD 0xC97Cu >> +#define CP_HQD_MSG_TYPE 0xC980u >> +#define CP_HQD_ATOMIC0_PREOP_LO 0xC984u >> +#define CP_HQD_ATOMIC0_PREOP_HI 0xC988u >> +#define CP_HQD_ATOMIC1_PREOP_LO 0xC98Cu >> +#define CP_HQD_ATOMIC1_PREOP_HI 0xC990u >> +#define CP_HQD_HQ_SCHEDULER0 0xC994u >> +#define CP_HQD_HQ_SCHEDULER1 0xC998u >> + >> + >> +#define CP_MQD_CONTROL 0xC99C >> +#define MQD_VMID(x) ((x) << 0) >> +#define MQD_VMID_MASK (0xf << 0) >> +#define MQD_CONTROL_PRIV_STATE_EN (1U << 8) >> + >> +#define GRBM_GFX_INDEX 0x30800 >> +#define INSTANCE_INDEX(x) ((x) << 0) >> +#define SH_INDEX(x) ((x) << 8) >> +#define SE_INDEX(x) ((x) << 16) >> +#define SH_BROADCAST_WRITES (1 << 29) >> +#define INSTANCE_BROADCAST_WRITES (1 << 30) >> +#define SE_BROADCAST_WRITES (1 << 31) >> + >> +#define SQC_CACHES 0x30d20 >> +#define SQC_POLICY 0x8C38u >> +#define SQC_VOLATILE 0x8C3Cu >> + >> +#define CP_PERFMON_CNTL 0x36020 >> + >> +#define ATC_VMID0_PASID_MAPPING 0x339Cu >> +#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u >> +#define ATC_VMID_PASID_MAPPING_VALID (1U << 31) >> + >> +#define ATC_VM_APERTURE0_CNTL 0x3310u >> +#define ATS_ACCESS_MODE_NEVER 0 >> +#define ATS_ACCESS_MODE_ALWAYS 1 >> + >> +#define ATC_VM_APERTURE0_CNTL2 0x3318u >> +#define ATC_VM_APERTURE0_HIGH_ADDR 0x3308u >> +#define ATC_VM_APERTURE0_LOW_ADDR 0x3300u >> +#define ATC_VM_APERTURE1_CNTL 0x3314u >> +#define ATC_VM_APERTURE1_CNTL2 0x331Cu >> +#define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu >> +#define ATC_VM_APERTURE1_LOW_ADDR 0x3304u >> + >> +#endif >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.c >> new file mode 100644 >> index 0000000..5f9f9b9 >> --- /dev/null >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.c >> @@ -0,0 +1,291 @@ >> +/* >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + * >> + */ >> + >> +#include <linux/printk.h> >> +#include <linux/slab.h> >> +#include "kfd_priv.h" >> +#include "kfd_mqd_manager.h" >> +#include "cik_mqds.h" >> +#include "cik_regs.h" >> +#include "../cik_reg.h" >> + >> +inline uint32_t lower_32(uint64_t x) >> +{ >> + return (uint32_t)x; >> +} >> + >> +inline uint32_t upper_32(uint64_t x) >> +{ >> + return (uint32_t)(x >> 32); >> +} > > Do use kernel macro upper_32_bits or lower_32_bits. Each time you do something > like that go check for existing macro. > Done in v3 >> + >> +inline void busy_wait(unsigned long ms) >> +{ >> + while (time_before(jiffies, ms)) >> + cpu_relax(); >> +} >> + >> +static inline struct cik_mqd *get_mqd(void *mqd) >> +{ >> + return (struct cik_mqd *)mqd; >> +} >> + >> +static int init_mqd(struct mqd_manager *mm, void **mqd, kfd_mem_obj *mqd_mem_obj, >> + uint64_t *gart_addr, struct queue_properties *q) >> +{ >> + uint64_t addr; >> + struct cik_mqd *m; >> + int retval; >> + >> + BUG_ON(!mm || !q || !mqd); >> + >> + pr_debug("kfd: In func %s\n", __func__); >> + >> + retval = kfd_vidmem_alloc_map( >> + mm->dev, >> + mqd_mem_obj, >> + (void **)&m, >> + &addr, >> + ALIGN(sizeof(struct cik_mqd), 256)); >> + >> + if (retval != 0) >> + return -ENOMEM; >> + >> + memset(m, 0, sizeof(struct cik_mqd)); >> + >> + m->header = 0xC0310800; >> + m->pipeline_stat_enable = 1; >> + m->static_thread_mgmt01[0] = 0xFFFFFFFF; >> + m->static_thread_mgmt01[1] = 0xFFFFFFFF; >> + m->static_thread_mgmt23[0] = 0xFFFFFFFF; >> + m->static_thread_mgmt23[1] = 0xFFFFFFFF; >> + >> + m->queue_state.cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE; >> + >> + m->queue_state.cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; >> + m->queue_state.cp_mqd_base_addr = lower_32(addr); >> + m->queue_state.cp_mqd_base_addr_hi = upper_32(addr); >> + >> + m->queue_state.cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; >> + /* Although WinKFD writes this, I suspect it should not be necessary. */ >> + m->queue_state.cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; >> + >> + m->queue_state.cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10); >> + >> + m->queue_state.cp_hqd_pipe_priority = 1; >> + m->queue_state.cp_hqd_queue_priority = 15; >> + >> + *mqd = m; >> + if (gart_addr != NULL) >> + *gart_addr = addr; >> + retval = mm->update_mqd(mm, m, q); >> + >> + return retval; >> +} >> + >> +static void uninit_mqd(struct mqd_manager *mm, void *mqd, kfd_mem_obj mqd_mem_obj) >> +{ >> + BUG_ON(!mm || !mqd); >> + kfd_vidmem_free_unmap(mm->dev, mqd_mem_obj); >> +} >> + >> +static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr) >> +{ >> + return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr); >> + >> +} >> + >> +static int update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q) >> +{ >> + struct cik_mqd *m; >> + >> + BUG_ON(!mm || !q || !mqd); >> + >> + pr_debug("kfd: In func %s\n", __func__); >> + >> + m = get_mqd(mqd); >> + m->queue_state.cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; >> + /* calculating queue size which is log base 2 of actual queue size -1 dwords and another -1 for ffs */ >> + m->queue_state.cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; >> + m->queue_state.cp_hqd_pq_base = lower_32((uint64_t)q->queue_address >> 8); >> + m->queue_state.cp_hqd_pq_base_hi = upper_32((uint64_t)q->queue_address >> 8); >> + m->queue_state.cp_hqd_pq_rptr_report_addr = lower_32((uint64_t)q->read_ptr); >> + m->queue_state.cp_hqd_pq_rptr_report_addr_hi = upper_32((uint64_t)q->read_ptr); >> + m->queue_state.cp_hqd_pq_doorbell_control = DOORBELL_EN | DOORBELL_OFFSET(q->doorbell_off); >> + >> + m->queue_state.cp_hqd_vmid = q->vmid; >> + >> + m->queue_state.cp_hqd_active = 0; >> + q->is_active = false; >> + if (q->queue_size > 0 && >> + q->queue_address != 0 && >> + q->queue_percent > 0) { >> + m->queue_state.cp_hqd_active = 1; >> + q->is_active = true; >> + } >> + >> + return 0; >> +} >> + >> +static int destroy_mqd(struct mqd_manager *mm, bool is_reset, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) >> +{ >> + return kfd2kgd->hqd_destroy(mm->dev->kgd, is_reset, timeout, pipe_id, queue_id); >> +} >> + >> +bool is_occupied(struct mqd_manager *mm, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) >> +{ >> + >> + return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address, pipe_id, queue_id); >> + >> +} >> + >> +/* >> + * HIQ MQD Implementation >> + */ > > A more useful comment than that. Done in v3 > >> + >> +static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, kfd_mem_obj *mqd_mem_obj, >> + uint64_t *gart_addr, struct queue_properties *q) >> +{ >> + uint64_t addr; >> + struct cik_mqd *m; >> + int retval; >> + >> + BUG_ON(!mm || !q || !mqd || !mqd_mem_obj); >> + >> + pr_debug("kfd: In func %s\n", __func__); >> + >> + retval = kfd_vidmem_alloc_map( >> + mm->dev, >> + mqd_mem_obj, >> + (void **)&m, >> + &addr, >> + ALIGN(sizeof(struct cik_mqd), PAGE_SIZE)); >> + >> + if (retval != 0) >> + return -ENOMEM; >> + >> + memset(m, 0, sizeof(struct cik_mqd)); >> + >> + m->header = 0xC0310800; >> + m->pipeline_stat_enable = 1; >> + m->static_thread_mgmt01[0] = 0xFFFFFFFF; >> + m->static_thread_mgmt01[1] = 0xFFFFFFFF; >> + m->static_thread_mgmt23[0] = 0xFFFFFFFF; >> + m->static_thread_mgmt23[1] = 0xFFFFFFFF; >> + >> + m->queue_state.cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE; >> + >> + m->queue_state.cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; >> + m->queue_state.cp_mqd_base_addr = lower_32(addr); >> + m->queue_state.cp_mqd_base_addr_hi = upper_32(addr); >> + >> + m->queue_state.cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; >> + >> + m->queue_state.cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10); >> + >> + m->queue_state.cp_hqd_pipe_priority = 1; >> + m->queue_state.cp_hqd_queue_priority = 15; >> + >> + *mqd = m; >> + if (gart_addr) >> + *gart_addr = addr; >> + retval = mm->update_mqd(mm, m, q); >> + >> + return retval; >> +} >> + >> +static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, struct queue_properties *q) >> +{ >> + struct cik_mqd *m; >> + >> + BUG_ON(!mm || !q || !mqd); >> + >> + pr_debug("kfd: In func %s\n", __func__); >> + >> + m = get_mqd(mqd); >> + m->queue_state.cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | PRIV_STATE | KMD_QUEUE; >> + /* calculating queue size which is log base 2 of actual queue size -1 dwords */ >> + m->queue_state.cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; >> + m->queue_state.cp_hqd_pq_base = lower_32((uint64_t)q->queue_address >> 8); >> + m->queue_state.cp_hqd_pq_base_hi = upper_32((uint64_t)q->queue_address >> 8); >> + m->queue_state.cp_hqd_pq_rptr_report_addr = lower_32((uint64_t)q->read_ptr); >> + m->queue_state.cp_hqd_pq_rptr_report_addr_hi = upper_32((uint64_t)q->read_ptr); >> + m->queue_state.cp_hqd_pq_doorbell_control = DOORBELL_EN | DOORBELL_OFFSET(q->doorbell_off); >> + >> + m->queue_state.cp_hqd_vmid = q->vmid; >> + >> + m->queue_state.cp_hqd_active = 0; >> + q->is_active = false; >> + if (q->queue_size > 0 && >> + q->queue_address != 0 && >> + q->queue_percent > 0) { >> + m->queue_state.cp_hqd_active = 1; >> + q->is_active = true; >> + } >> + >> + return 0; >> +} >> + >> +struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev) >> +{ >> + struct mqd_manager *mqd; >> + >> + BUG_ON(!dev); >> + BUG_ON(type >= KFD_MQD_TYPE_MAX); >> + >> + pr_debug("kfd: In func %s\n", __func__); >> + >> + mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); >> + if (!mqd) >> + return NULL; >> + >> + mqd->dev = dev; >> + >> + switch (type) { >> + case KFD_MQD_TYPE_CIK_CP: >> + case KFD_MQD_TYPE_CIK_COMPUTE: >> + mqd->init_mqd = init_mqd; >> + mqd->uninit_mqd = uninit_mqd; >> + mqd->load_mqd = load_mqd; >> + mqd->update_mqd = update_mqd; >> + mqd->destroy_mqd = destroy_mqd; >> + mqd->is_occupied = is_occupied; >> + break; >> + case KFD_MQD_TYPE_CIK_HIQ: >> + mqd->init_mqd = init_mqd_hiq; >> + mqd->uninit_mqd = uninit_mqd; >> + mqd->load_mqd = load_mqd; >> + mqd->update_mqd = update_mqd_hiq; >> + mqd->destroy_mqd = destroy_mqd; >> + mqd->is_occupied = is_occupied; >> + break; >> + default: >> + kfree(mqd); >> + return NULL; >> + break; >> + } >> + >> + return mqd; >> +} >> + >> +/* SDMA queues should be implemented here when the cp will supports them */ >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.h >> new file mode 100644 >> index 0000000..a6b0007 >> --- /dev/null >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_mqd_manager.h >> @@ -0,0 +1,54 @@ >> +/* >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + * >> + */ >> + >> +#ifndef KFD_MQD_MANAGER_H_ >> +#define KFD_MQD_MANAGER_H_ >> + >> +#include "kfd_priv.h" >> + >> +struct mqd_manager { >> + int (*init_mqd)(struct mqd_manager *mm, void **mqd, >> + kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, >> + struct queue_properties *q); >> + >> + int (*load_mqd)(struct mqd_manager *mm, void *mqd, >> + uint32_t pipe_id, uint32_t queue_id, >> + uint32_t __user *wptr); >> + >> + int (*update_mqd)(struct mqd_manager *mm, void *mqd, >> + struct queue_properties *q); >> + >> + int (*destroy_mqd)(struct mqd_manager *mm, bool is_reset, >> + unsigned int timeout, uint32_t pipe_id, >> + uint32_t queue_id); >> + >> + void (*uninit_mqd)(struct mqd_manager *mm, void *mqd, >> + kfd_mem_obj mqd_mem_obj); >> + bool (*is_occupied)(struct mqd_manager *mm, uint64_t queue_address, >> + uint32_t pipe_id, uint32_t queue_id); >> + >> + struct mutex mqd_mutex; >> + struct kfd_dev *dev; >> +}; > > Would be nice to have this interface documented. For reference see how ttm > document things (include/drm/ttm/*.h) > Done in v3 Oded >> + >> +#endif /* KFD_MQD_MANAGER_H_ */ >> diff --git a/drivers/gpu/drm/radeon/amdkfd/kfd_priv.h b/drivers/gpu/drm/radeon/amdkfd/kfd_priv.h >> index 94ff1c3..76494757 100644 >> --- a/drivers/gpu/drm/radeon/amdkfd/kfd_priv.h >> +++ b/drivers/gpu/drm/radeon/amdkfd/kfd_priv.h >> @@ -179,6 +179,14 @@ struct queue { >> struct kfd_dev *device; >> }; >> >> +enum KFD_MQD_TYPE { >> + KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */ >> + KFD_MQD_TYPE_CIK_HIQ, /* for hiq */ >> + KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */ >> + KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */ >> + KFD_MQD_TYPE_MAX >> +}; >> + >> /* Data that is per-process-per device. */ >> struct kfd_process_device { >> /* >> -- >> 1.9.1 >> _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel