Hey Felix, I wanted to confirm my understanding of your email. > Reading the updated shadow_wptr You are referring to the copy_from_user() call, correct? Just wanted to make sure I didn't misunderstand and you meant the write to mmCP_HQD_PQ_WPTR. > after the doorbell has been enabled in the HQD Referring to mmCP_HQD_PQ_DOORBELL_CONTROL, correct? The sequence between the copy_from_user and the write to mmCP_HQD_PQ_DOORBELL_CONTROL should be unaltered. Pre-patch sequence: * copy_from_user > wptr_shadow * wreg32 mmCP_HQD_PQ_DOORBELL_CONTROL * wreg32 mmCP_HQD_PQ_WPTR < wptr_shadow * wreg32 mmCP_HQD_ACTIVE Post-patch sequence: * copy_from_user > wptr_shadow * wreg32 mmCP_HQD_PQ_DOORBELL_CONTROL * wreg32 mmCP_HQD_PQ_WPTR < wptr_shadow * wreg32 mmCP_HQD_PQ_DOORBELL_CONTROL (repeated, this seems like a bug) * The double write is in 4.12-wip, and should be fixed in the patch: "drm/amdgpu: condense mqd programming sequence" * wreg32 mmCP_HQD_ACTIVE Besides a second call to CP_HQD_PQ_DOORBELL_CONTROL with the same value, everything seems to be in the same order. Therefore I think a separate patch to address this bug would probably be better. If I understand correctly we need the following sequence: * wreg32 mmCP_HQD_PQ_DOORBELL_CONTROL * copy_from_user > wptr_shadow * wreg32 mmCP_HQD_PQ_WPTR < wptr_shadow * wreg32 mmCP_HQD_ACTIVE Is that correct? Does the order of the the last two writes matter? Regards, Andres On 2017-04-21 04:35 PM, Felix Kuehling wrote: > I spotted a potential race condition in this change: Reading the updated > shadow_wptr must happen after the doorbell has been enabled in the HQD. > Once the doorbell is enabled, the CP will capture any further > submissions to the queue. If you activate the doorbell later (in > mqd_commit), the CP would not notice a submission to a user mode queue > that happened between reading the wptr from user mode and enabling the > doorbell. For the application that would result in a timeout because the > CP would never finish the job it was given. > > Regards, > Felix > > > On 17-04-21 04:02 PM, Andres Rodriguez wrote: >> Use the same gfx_*_mqd_commit function for kfd and amdgpu codepaths. >> >> This removes the last duplicates of this programming sequence. >> >> v2: fix cp_hqd_pq_wptr value >> >> Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net> >> Acked-by: Christian König <christian.koenig at amd.com> >> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> >> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 51 ++--------------------- >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 49 ++-------------------- >> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 38 ++++++++++++++++- >> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h | 5 +++ >> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 49 +++++++++++++++++++--- >> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h | 5 +++ >> 6 files changed, 97 insertions(+), 100 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c >> index 1a0a5f7..038b7ea 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c >> @@ -12,40 +12,41 @@ >> * all copies or substantial portions of the Software. >> * >> * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> * OTHER DEALINGS IN THE SOFTWARE. >> */ >> >> #include <linux/fdtable.h> >> #include <linux/uaccess.h> >> #include <linux/firmware.h> >> #include <drm/drmP.h> >> #include "amdgpu.h" >> #include "amdgpu_amdkfd.h" >> #include "cikd.h" >> #include "cik_sdma.h" >> #include "amdgpu_ucode.h" >> +#include "gfx_v7_0.h" >> #include "gca/gfx_7_2_d.h" >> #include "gca/gfx_7_2_enum.h" >> #include "gca/gfx_7_2_sh_mask.h" >> #include "oss/oss_2_0_d.h" >> #include "oss/oss_2_0_sh_mask.h" >> #include "gmc/gmc_7_1_d.h" >> #include "gmc/gmc_7_1_sh_mask.h" >> #include "cik_structs.h" >> >> #define CIK_PIPE_PER_MEC (4) >> >> enum { >> MAX_TRAPID = 8, /* 3 bits in the bitfield. */ >> MAX_WATCH_ADDRESSES = 4 >> }; >> >> enum { >> ADDRESS_WATCH_REG_ADDR_HI = 0, >> ADDRESS_WATCH_REG_ADDR_LO, >> ADDRESS_WATCH_REG_CNTL, >> @@ -292,89 +293,45 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) >> static inline struct cik_mqd *get_mqd(void *mqd) >> { >> return (struct cik_mqd *)mqd; >> } >> >> static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) >> { >> return (struct cik_sdma_rlc_registers *)mqd; >> } >> >> static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, >> uint32_t queue_id, uint32_t __user *wptr) >> { >> struct amdgpu_device *adev = get_amdgpu_device(kgd); >> uint32_t wptr_shadow, is_wptr_shadow_valid; >> struct cik_mqd *m; >> >> m = get_mqd(mqd); >> >> is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); >> - >> - acquire_queue(kgd, pipe_id, queue_id); >> - WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); >> - WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); >> - WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); >> - >> - WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); >> - WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); >> - WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); >> - >> - WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); >> - WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); >> - WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); >> - >> - WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); >> - >> - WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); >> - WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); >> - WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type); >> - >> - WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo); >> - WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi); >> - WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo); >> - WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi); >> - >> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); >> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, >> - m->cp_hqd_pq_rptr_report_addr_hi); >> - >> - WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); >> - >> - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo); >> - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi); >> - >> - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); >> - >> - WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); >> - >> - WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); >> - >> - WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); >> - WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); >> - >> - WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); >> - >> if (is_wptr_shadow_valid) >> - WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow); >> + m->cp_hqd_pq_wptr = wptr_shadow; >> >> - WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); >> + acquire_queue(kgd, pipe_id, queue_id); >> + gfx_v7_0_mqd_commit(adev, m); >> release_queue(kgd); >> >> return 0; >> } >> >> static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) >> { >> struct amdgpu_device *adev = get_amdgpu_device(kgd); >> struct cik_sdma_rlc_registers *m; >> uint32_t sdma_base_addr; >> >> m = get_sdma_mqd(mqd); >> sdma_base_addr = get_sdma_base_addr(m); >> >> WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, >> m->sdma_rlc_virtual_addr); >> >> WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, >> m->sdma_rlc_rb_base); >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> index 6697612..f9ad534 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c >> @@ -11,40 +11,41 @@ >> * The above copyright notice and this permission notice shall be included in >> * all copies or substantial portions of the Software. >> * >> * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> * OTHER DEALINGS IN THE SOFTWARE. >> */ >> >> #include <linux/module.h> >> #include <linux/fdtable.h> >> #include <linux/uaccess.h> >> #include <linux/firmware.h> >> #include <drm/drmP.h> >> #include "amdgpu.h" >> #include "amdgpu_amdkfd.h" >> #include "amdgpu_ucode.h" >> +#include "gfx_v8_0.h" >> #include "gca/gfx_8_0_sh_mask.h" >> #include "gca/gfx_8_0_d.h" >> #include "gca/gfx_8_0_enum.h" >> #include "oss/oss_3_0_sh_mask.h" >> #include "oss/oss_3_0_d.h" >> #include "gmc/gmc_8_1_sh_mask.h" >> #include "gmc/gmc_8_1_d.h" >> #include "vi_structs.h" >> #include "vid.h" >> >> #define VI_PIPE_PER_MEC (4) >> >> struct cik_sdma_rlc_registers; >> >> /* >> * Register access functions >> */ >> >> static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, >> uint32_t sh_mem_config, >> @@ -234,87 +235,45 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) >> static inline struct vi_mqd *get_mqd(void *mqd) >> { >> return (struct vi_mqd *)mqd; >> } >> >> static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) >> { >> return (struct cik_sdma_rlc_registers *)mqd; >> } >> >> static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, >> uint32_t queue_id, uint32_t __user *wptr) >> { >> struct vi_mqd *m; >> uint32_t shadow_wptr, valid_wptr; >> struct amdgpu_device *adev = get_amdgpu_device(kgd); >> >> m = get_mqd(mqd); >> >> valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); >> - acquire_queue(kgd, pipe_id, queue_id); >> - >> - WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); >> - WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); >> - WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); >> - >> - WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); >> - WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); >> - WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); >> - WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); >> - WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); >> - WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); >> - WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); >> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); >> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, >> - m->cp_hqd_pq_rptr_report_addr_hi); >> - >> if (valid_wptr > 0) >> - WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr); >> - >> - WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); >> - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); >> - >> - WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo); >> - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi); >> - WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control); >> - WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); >> - WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); >> - WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events); >> - >> - WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo); >> - WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi); >> - WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control); >> - WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset); >> - WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size); >> - WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset); >> - WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size); >> - >> - WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); >> - >> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request); >> - WREG32(mmCP_HQD_ERROR, m->cp_hqd_error); >> - WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); >> - WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones); >> - >> - WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); >> + m->cp_hqd_pq_wptr = shadow_wptr; >> >> + acquire_queue(kgd, pipe_id, queue_id); >> + gfx_v8_0_mqd_commit(adev, mqd); >> release_queue(kgd); >> >> return 0; >> } >> >> static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) >> { >> return 0; >> } >> >> static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, >> uint32_t pipe_id, uint32_t queue_id) >> { >> struct amdgpu_device *adev = get_amdgpu_device(kgd); >> uint32_t act; >> bool retval = false; >> uint32_t low, high; >> >> acquire_queue(kgd, pipe_id, queue_id); >> act = RREG32(mmCP_HQD_ACTIVE); >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >> index c408af5..35e5178 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >> @@ -3050,69 +3050,103 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, >> (ring->doorbell_index << >> CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); >> mqd->cp_hqd_pq_doorbell_control |= >> CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; >> mqd->cp_hqd_pq_doorbell_control &= >> ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | >> CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); >> >> } else { >> mqd->cp_hqd_pq_doorbell_control = 0; >> } >> >> /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ >> ring->wptr = 0; >> mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); >> mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); >> >> /* set the vmid for the queue */ >> mqd->cp_hqd_vmid = 0; >> >> + /* defaults */ >> + mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL); >> + mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR); >> + mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI); >> + mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR); >> + mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE); >> + mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD); >> + mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE); >> + mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO); >> + mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI); >> + mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO); >> + mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI); >> + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); >> + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); >> + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); >> + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); >> + mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR); >> + >> /* activate the queue */ >> mqd->cp_hqd_active = 1; >> } >> >> -static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, >> - struct cik_mqd *mqd) >> +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) >> { >> u32 tmp; >> >> /* disable wptr polling */ >> tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); >> tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); >> WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); >> >> /* program MQD field to HW */ >> WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); >> WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); >> WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); >> WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); >> WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); >> WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); >> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); >> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); >> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); >> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); >> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); >> WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); >> WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); >> >> + WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control); >> + WREG32(mmCP_HQD_IB_BASE_ADDR, mqd->cp_hqd_ib_base_addr_lo); >> + WREG32(mmCP_HQD_IB_BASE_ADDR_HI, mqd->cp_hqd_ib_base_addr_hi); >> + WREG32(mmCP_HQD_IB_RPTR, mqd->cp_hqd_ib_rptr); >> + WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); >> + WREG32(mmCP_HQD_SEMA_CMD, mqd->cp_hqd_sema_cmd); >> + WREG32(mmCP_HQD_MSG_TYPE, mqd->cp_hqd_msg_type); >> + WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, mqd->cp_hqd_atomic0_preop_lo); >> + WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, mqd->cp_hqd_atomic0_preop_hi); >> + WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, mqd->cp_hqd_atomic1_preop_lo); >> + WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, mqd->cp_hqd_atomic1_preop_hi); >> + WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); >> + WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum); >> + WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority); >> + WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority); >> + WREG32(mmCP_HQD_IQ_RPTR, mqd->cp_hqd_iq_rptr); >> + >> /* activate the HQD */ >> WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); >> >> return 0; >> } >> >> static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) >> { >> int r; >> u64 mqd_gpu_addr; >> struct cik_mqd *mqd; >> struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; >> >> if (ring->mqd_obj == NULL) { >> r = amdgpu_bo_create(adev, >> sizeof(struct cik_mqd), >> PAGE_SIZE, true, >> AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, >> &ring->mqd_obj); >> if (r) { >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h >> index 2f5164c..6fb9c15 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h >> @@ -12,21 +12,26 @@ >> * all copies or substantial portions of the Software. >> * >> * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> * OTHER DEALINGS IN THE SOFTWARE. >> * >> */ >> >> #ifndef __GFX_V7_0_H__ >> #define __GFX_V7_0_H__ >> >> extern const struct amdgpu_ip_block_version gfx_v7_0_ip_block; >> extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block; >> extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block; >> extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; >> >> +struct amdgpu_device; >> +struct cik_mqd; >> + >> +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd); >> + >> #endif >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> index c88c765..f38069e 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >> @@ -4877,51 +4877,66 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) >> mqd->cp_hqd_vmid = 0; >> >> tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); >> tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); >> mqd->cp_hqd_persistent_state = tmp; >> >> /* set MTYPE */ >> tmp = RREG32(mmCP_HQD_IB_CONTROL); >> tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); >> tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); >> mqd->cp_hqd_ib_control = tmp; >> >> tmp = RREG32(mmCP_HQD_IQ_TIMER); >> tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); >> mqd->cp_hqd_iq_timer = tmp; >> >> tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); >> tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); >> mqd->cp_hqd_ctx_save_control = tmp; >> >> + /* defaults */ >> + mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); >> + mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); >> + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); >> + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); >> + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); >> + mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); >> + mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); >> + mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); >> + mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); >> + mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); >> + mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); >> + mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); >> + mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); >> + mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); >> + mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); >> + >> /* activate the queue */ >> mqd->cp_hqd_active = 1; >> >> return 0; >> } >> >> -static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring) >> +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, >> + struct vi_mqd *mqd) >> { >> - struct amdgpu_device *adev = ring->adev; >> - struct vi_mqd *mqd = ring->mqd_ptr; >> - >> /* disable wptr polling */ >> WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); >> >> WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); >> WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); >> >> /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ >> WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); >> >> /* enable doorbell? */ >> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); >> >> /* set pq read/write pointers */ >> WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); >> WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); >> WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); >> >> /* set the pointer to the MQD */ >> WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); >> WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); >> @@ -4934,90 +4949,112 @@ static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring) >> WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); >> >> /* set up the HQD, this is similar to CP_RB0_CNTL */ >> WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); >> >> /* set the wb address whether it's enabled or not */ >> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, >> mqd->cp_hqd_pq_rptr_report_addr_lo); >> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, >> mqd->cp_hqd_pq_rptr_report_addr_hi); >> >> /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ >> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); >> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); >> >> /* enable the doorbell if requested */ >> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); >> >> /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ >> WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); >> + WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); >> + WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); >> + >> + /* set the HQD priority */ >> + WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority); >> + WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority); >> + WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum); >> + >> + /* set cwsr save area */ >> + WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, mqd->cp_hqd_ctx_save_base_addr_lo); >> + WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, mqd->cp_hqd_ctx_save_base_addr_hi); >> + WREG32(mmCP_HQD_CTX_SAVE_CONTROL, mqd->cp_hqd_ctx_save_control); >> + WREG32(mmCP_HQD_CNTL_STACK_OFFSET, mqd->cp_hqd_cntl_stack_offset); >> + WREG32(mmCP_HQD_CNTL_STACK_SIZE, mqd->cp_hqd_cntl_stack_size); >> + WREG32(mmCP_HQD_WG_STATE_OFFSET, mqd->cp_hqd_wg_state_offset); >> + WREG32(mmCP_HQD_CTX_SAVE_SIZE, mqd->cp_hqd_ctx_save_size); >> + >> + WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control); >> + WREG32(mmCP_HQD_EOP_EVENTS, mqd->cp_hqd_eop_done_events); >> + WREG32(mmCP_HQD_ERROR, mqd->cp_hqd_error); >> + WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); >> + WREG32(mmCP_HQD_EOP_DONES, mqd->cp_hqd_eop_dones); >> >> /* set the vmid for the queue */ >> WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); >> >> WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); >> >> /* activate the queue */ >> WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); >> >> return 0; >> } >> >> static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) >> { >> int r = 0; >> struct amdgpu_device *adev = ring->adev; >> struct vi_mqd *mqd = ring->mqd_ptr; >> int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; >> >> gfx_v8_0_kiq_setting(ring); >> >> if (adev->gfx.in_reset) { /* for GPU_RESET case */ >> /* reset MQD to a clean status */ >> if (adev->gfx.mec.mqd_backup[mqd_idx]) >> memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); >> >> /* reset ring buffer */ >> ring->wptr = 0; >> amdgpu_ring_clear_ring(ring); >> >> mutex_lock(&adev->srbm_mutex); >> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); >> r = gfx_v8_0_deactivate_hqd(adev, 1); >> if (r) { >> dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name); >> goto out_unlock; >> } >> - gfx_v8_0_mqd_commit(ring); >> + gfx_v8_0_mqd_commit(adev, mqd); >> vi_srbm_select(adev, 0, 0, 0, 0); >> mutex_unlock(&adev->srbm_mutex); >> } else { >> mutex_lock(&adev->srbm_mutex); >> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); >> gfx_v8_0_mqd_init(ring); >> r = gfx_v8_0_deactivate_hqd(adev, 1); >> if (r) { >> dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name); >> goto out_unlock; >> } >> - gfx_v8_0_mqd_commit(ring); >> + gfx_v8_0_mqd_commit(adev, mqd); >> vi_srbm_select(adev, 0, 0, 0, 0); >> mutex_unlock(&adev->srbm_mutex); >> >> if (adev->gfx.mec.mqd_backup[mqd_idx]) >> memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); >> } >> >> return r; >> >> out_unlock: >> vi_srbm_select(adev, 0, 0, 0, 0); >> mutex_unlock(&adev->srbm_mutex); >> return r; >> } >> >> static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) >> { >> struct amdgpu_device *adev = ring->adev; >> struct vi_mqd *mqd = ring->mqd_ptr; >> int mqd_idx = ring - &adev->gfx.compute_ring[0]; >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h >> index 788cc3a..ec3f11f 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h >> @@ -10,21 +10,26 @@ >> * >> * The above copyright notice and this permission notice shall be included in >> * all copies or substantial portions of the Software. >> * >> * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR >> * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> * OTHER DEALINGS IN THE SOFTWARE. >> * >> */ >> >> #ifndef __GFX_V8_0_H__ >> #define __GFX_V8_0_H__ >> >> extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block; >> extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block; >> >> +struct amdgpu_device; >> +struct vi_mqd; >> + >> +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd); >> + >> #endif >