On Sat, Aug 12, 2017 at 12:56 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > Various bug fixes and improvements that accumulated over the last two > years. > > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 16 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 130 +++++++++++++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 165 ++++++++++++++++++--- > .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 +- > drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 3 +- > drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 3 +- > drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 23 +-- > drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 16 +- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 - > drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 11 +- > drivers/gpu/drm/radeon/radeon_kfd.c | 12 +- > 11 files changed, 322 insertions(+), 69 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index b8802a5..8d689ab 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -26,6 +26,7 @@ > #define AMDGPU_AMDKFD_H_INCLUDED > > #include <linux/types.h> > +#include <linux/mmu_context.h> > #include <kgd_kfd_interface.h> > > struct amdgpu_device; > @@ -60,4 +61,19 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); > > uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); > > +#define read_user_wptr(mmptr, wptr, dst) \ > + ({ \ > + bool valid = false; \ > + if ((mmptr) && (wptr)) { \ > + if ((mmptr) == current->mm) { \ > + valid = !get_user((dst), (wptr)); \ > + } else if (current->mm == NULL) { \ > + use_mm(mmptr); \ > + valid = !get_user((dst), (wptr)); \ > + unuse_mm(mmptr); \ > + } \ > + } \ > + valid; \ > + }) > + > #endif /* AMDGPU_AMDKFD_H_INCLUDED */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c > index dfb8c74..994d262 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c > @@ -39,6 +39,12 @@ > #include "gmc/gmc_7_1_sh_mask.h" > #include "cik_structs.h" > > +enum hqd_dequeue_request_type { > + NO_ACTION = 0, > + DRAIN_PIPE, > + RESET_WAVES > +}; > + > enum { > MAX_TRAPID = 8, /* 3 bits in the bitfield. */ > MAX_WATCH_ADDRESSES = 4 > @@ -96,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, > uint32_t hpd_size, uint64_t hpd_gpu_addr); > static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); > static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > - uint32_t queue_id, uint32_t __user *wptr); > + uint32_t queue_id, uint32_t __user *wptr, > + uint32_t wptr_shift, uint32_t wptr_mask, > + struct mm_struct *mm); > static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); > static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id); > > -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, > +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, > + enum kfd_preempt_type reset_type, > unsigned int utimeout, uint32_t pipe_id, > uint32_t queue_id); > static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); > @@ -290,20 +299,38 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) > } > > static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > - uint32_t queue_id, uint32_t __user *wptr) > + uint32_t queue_id, uint32_t __user *wptr, > + uint32_t wptr_shift, uint32_t wptr_mask, > + struct mm_struct *mm) > { > struct amdgpu_device *adev = get_amdgpu_device(kgd); > - uint32_t wptr_shadow, is_wptr_shadow_valid; > struct cik_mqd *m; > + uint32_t *mqd_hqd; > + uint32_t reg, wptr_val, data; > > m = get_mqd(mqd); > > - is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); > - if (is_wptr_shadow_valid) > - m->cp_hqd_pq_wptr = wptr_shadow; > - > acquire_queue(kgd, pipe_id, queue_id); > - gfx_v7_0_mqd_commit(adev, m); > + > + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ > + mqd_hqd = &m->cp_mqd_base_addr_lo; > + > + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) > + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); > + > + /* Copy userspace write pointer value to register. > + * Activate doorbell logic to monitor subsequent changes. > + */ > + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, > + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); > + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); > + > + if (read_user_wptr(mm, wptr, wptr_val)) > + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); > + > + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); > + WREG32(mmCP_HQD_ACTIVE, data); > + > release_queue(kgd); > > return 0; > @@ -382,30 +409,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) > return false; > } > > -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, > +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, > + enum kfd_preempt_type reset_type, > unsigned int utimeout, uint32_t pipe_id, > uint32_t queue_id) > { > struct amdgpu_device *adev = get_amdgpu_device(kgd); > uint32_t temp; > - int timeout = utimeout; > + enum hqd_dequeue_request_type type; > + unsigned long flags, end_jiffies; > + int retry; > > acquire_queue(kgd, pipe_id, queue_id); > WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); > > - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); > + switch (reset_type) { > + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: > + type = DRAIN_PIPE; > + break; > + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: > + type = RESET_WAVES; > + break; > + default: > + type = DRAIN_PIPE; > + break; > + } > > + /* Workaround: If IQ timer is active and the wait time is close to or > + * equal to 0, dequeueing is not safe. Wait until either the wait time > + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is > + * cleared before continuing. Also, ensure wait times are set to at > + * least 0x3. > + */ > + local_irq_save(flags); > + preempt_disable(); > + retry = 5000; /* wait for 500 usecs at maximum */ > + while (true) { > + temp = RREG32(mmCP_HQD_IQ_TIMER); > + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { > + pr_debug("HW is processing IQ\n"); > + goto loop; > + } > + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { > + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) > + == 3) /* SEM-rearm is safe */ > + break; > + /* Wait time 3 is safe for CP, but our MMIO read/write > + * time is close to 1 microsecond, so check for 10 to > + * leave more buffer room > + */ > + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) > + >= 10) > + break; > + pr_debug("IQ timer is active\n"); > + } else > + break; > +loop: > + if (!retry) { > + pr_err("CP HQD IQ timer status time out\n"); > + break; > + } > + ndelay(100); > + --retry; > + } > + retry = 1000; > + while (true) { > + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); > + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) > + break; > + pr_debug("Dequeue request is pending\n"); > + > + if (!retry) { > + pr_err("CP HQD dequeue request time out\n"); > + break; > + } > + ndelay(100); > + --retry; > + } > + local_irq_restore(flags); > + preempt_enable(); > + > + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); > + > + end_jiffies = (utimeout * HZ / 1000) + jiffies; > while (true) { > temp = RREG32(mmCP_HQD_ACTIVE); > - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) > + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) > break; > - if (timeout <= 0) { > - pr_err("kfd: cp queue preemption time out.\n"); > + if (time_after(jiffies, end_jiffies)) { > + pr_err("cp queue preemption time out\n"); > release_queue(kgd); > return -ETIME; > } > - msleep(20); > - timeout -= 20; > + usleep_range(500, 1000); > } > > release_queue(kgd); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c > index 0fccd30..29a6f5d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c > @@ -39,6 +39,12 @@ > #include "vi_structs.h" > #include "vid.h" > > +enum hqd_dequeue_request_type { > + NO_ACTION = 0, > + DRAIN_PIPE, > + RESET_WAVES > +}; > + > struct cik_sdma_rlc_registers; > > /* > @@ -55,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, > uint32_t hpd_size, uint64_t hpd_gpu_addr); > static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); > static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > - uint32_t queue_id, uint32_t __user *wptr); > + uint32_t queue_id, uint32_t __user *wptr, > + uint32_t wptr_shift, uint32_t wptr_mask, > + struct mm_struct *mm); > static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); > static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id); > static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); > -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, > +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, > + enum kfd_preempt_type reset_type, > unsigned int utimeout, uint32_t pipe_id, > uint32_t queue_id); > static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, > @@ -244,20 +253,67 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) > } > > static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > - uint32_t queue_id, uint32_t __user *wptr) > + uint32_t queue_id, uint32_t __user *wptr, > + uint32_t wptr_shift, uint32_t wptr_mask, > + struct mm_struct *mm) > { > - struct vi_mqd *m; > - uint32_t shadow_wptr, valid_wptr; > struct amdgpu_device *adev = get_amdgpu_device(kgd); > + struct vi_mqd *m; > + uint32_t *mqd_hqd; > + uint32_t reg, wptr_val, data; > > m = get_mqd(mqd); > > - valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); > - if (valid_wptr == 0) > - m->cp_hqd_pq_wptr = shadow_wptr; > - > acquire_queue(kgd, pipe_id, queue_id); > - gfx_v8_0_mqd_commit(adev, mqd); > + > + /* HIQ is set during driver init period with vmid set to 0*/ > + if (m->cp_hqd_vmid == 0) { > + uint32_t value, mec, pipe; > + > + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; > + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); > + > + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", > + mec, pipe, queue_id); > + value = RREG32(mmRLC_CP_SCHEDULERS); > + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, > + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); > + WREG32(mmRLC_CP_SCHEDULERS, value); > + } > + > + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ > + mqd_hqd = &m->cp_mqd_base_addr_lo; > + > + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++) > + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); > + > + /* Tonga errata: EOP RPTR/WPTR should be left unmodified. > + * This is safe since EOP RPTR==WPTR for any inactive HQD > + * on ASICs that do not support context-save. > + * EOP writes/reads can start anywhere in the ring. > + */ > + if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { > + WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); > + WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); > + WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); > + } > + > + for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) > + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); > + > + /* Copy userspace write pointer value to register. > + * Activate doorbell logic to monitor subsequent changes. > + */ > + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, > + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); > + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); > + > + if (read_user_wptr(mm, wptr, wptr_val)) > + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); > + > + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); > + WREG32(mmCP_HQD_ACTIVE, data); > + > release_queue(kgd); > > return 0; > @@ -308,29 +364,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) > return false; > } > > -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, > +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, > + enum kfd_preempt_type reset_type, > unsigned int utimeout, uint32_t pipe_id, > uint32_t queue_id) > { > struct amdgpu_device *adev = get_amdgpu_device(kgd); > uint32_t temp; > - int timeout = utimeout; > + enum hqd_dequeue_request_type type; > + unsigned long flags, end_jiffies; > + int retry; > + struct vi_mqd *m = get_mqd(mqd); > > acquire_queue(kgd, pipe_id, queue_id); > > - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); > + if (m->cp_hqd_vmid == 0) > + WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); > + > + switch (reset_type) { > + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: > + type = DRAIN_PIPE; > + break; > + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: > + type = RESET_WAVES; > + break; > + default: > + type = DRAIN_PIPE; > + break; > + } > + > + /* Workaround: If IQ timer is active and the wait time is close to or > + * equal to 0, dequeueing is not safe. Wait until either the wait time > + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is > + * cleared before continuing. Also, ensure wait times are set to at > + * least 0x3. > + */ > + local_irq_save(flags); > + preempt_disable(); > + retry = 5000; /* wait for 500 usecs at maximum */ > + while (true) { > + temp = RREG32(mmCP_HQD_IQ_TIMER); > + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { > + pr_debug("HW is processing IQ\n"); > + goto loop; > + } > + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { > + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) > + == 3) /* SEM-rearm is safe */ > + break; > + /* Wait time 3 is safe for CP, but our MMIO read/write > + * time is close to 1 microsecond, so check for 10 to > + * leave more buffer room > + */ > + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) > + >= 10) > + break; > + pr_debug("IQ timer is active\n"); > + } else > + break; > +loop: > + if (!retry) { > + pr_err("CP HQD IQ timer status time out\n"); > + break; > + } > + ndelay(100); > + --retry; > + } > + retry = 1000; > + while (true) { > + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); > + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) > + break; > + pr_debug("Dequeue request is pending\n"); > > + if (!retry) { > + pr_err("CP HQD dequeue request time out\n"); > + break; > + } > + ndelay(100); > + --retry; > + } > + local_irq_restore(flags); > + preempt_enable(); > + > + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); > + > + end_jiffies = (utimeout * HZ / 1000) + jiffies; > while (true) { > temp = RREG32(mmCP_HQD_ACTIVE); > - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) > + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) > break; > - if (timeout <= 0) { > - pr_err("kfd: cp queue preemption time out.\n"); > + if (time_after(jiffies, end_jiffies)) { > + pr_err("cp queue preemption time out.\n"); > release_queue(kgd); > return -ETIME; > } > - msleep(20); > - timeout -= 20; > + usleep_range(500, 1000); > } > > release_queue(kgd); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 5dac29d..3891fe5 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -268,8 +268,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, > pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", > q->pipe, q->queue); > > - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, > - q->queue, (uint32_t __user *) q->properties.write_ptr); > + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, > + q->process->mm); > if (retval) > goto out_uninit_mqd; > > @@ -585,8 +585,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, > if (retval) > goto out_deallocate_sdma_queue; > > - retval = mqd->load_mqd(mqd, q->mqd, 0, > - 0, NULL); > + retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); > if (retval) > goto out_uninit_mqd; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > index 0e4d4a9..681b639 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c > @@ -143,7 +143,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, > kq->queue->pipe = KFD_CIK_HIQ_PIPE; > kq->queue->queue = KFD_CIK_HIQ_QUEUE; > kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, > - kq->queue->queue, NULL); > + kq->queue->queue, &kq->queue->properties, > + NULL); > } else { > /* allocate fence for DIQ */ > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h > index 213a71e..1f3a6ba 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h > @@ -67,7 +67,8 @@ struct mqd_manager { > > int (*load_mqd)(struct mqd_manager *mm, void *mqd, > uint32_t pipe_id, uint32_t queue_id, > - uint32_t __user *wptr); > + struct queue_properties *p, > + struct mm_struct *mms); > > int (*update_mqd)(struct mqd_manager *mm, void *mqd, > struct queue_properties *q); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c > index 7e0ec6b..44ffd23 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c > @@ -144,15 +144,21 @@ static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, > } > > static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, > - uint32_t queue_id, uint32_t __user *wptr) > + uint32_t queue_id, struct queue_properties *p, > + struct mm_struct *mms) > { > - return mm->dev->kfd2kgd->hqd_load > - (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); > + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ > + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); > + uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); > + > + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, > + (uint32_t __user *)p->write_ptr, > + wptr_shift, wptr_mask, mms); > } > > static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, > - uint32_t pipe_id, uint32_t queue_id, > - uint32_t __user *wptr) > + uint32_t pipe_id, uint32_t queue_id, > + struct queue_properties *p, struct mm_struct *mms) > { > return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); > } > @@ -176,20 +182,17 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, > m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); > m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); > m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); > - m->cp_hqd_pq_doorbell_control = DOORBELL_EN | > - DOORBELL_OFFSET(q->doorbell_off); > + m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); > > m->cp_hqd_vmid = q->vmid; > > if (q->format == KFD_QUEUE_FORMAT_AQL) > m->cp_hqd_pq_control |= NO_UPDATE_RPTR; > > - m->cp_hqd_active = 0; > q->is_active = false; > if (q->queue_size > 0 && > q->queue_address != 0 && > q->queue_percent > 0) { > - m->cp_hqd_active = 1; > q->is_active = true; > } > > @@ -239,7 +242,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, > unsigned int timeout, uint32_t pipe_id, > uint32_t queue_id) > { > - return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, > + return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout, > pipe_id, queue_id); > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c > index 98a930e..73cbfe1 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c > @@ -94,10 +94,15 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, > > static int load_mqd(struct mqd_manager *mm, void *mqd, > uint32_t pipe_id, uint32_t queue_id, > - uint32_t __user *wptr) > + struct queue_properties *p, struct mm_struct *mms) > { > - return mm->dev->kfd2kgd->hqd_load > - (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); > + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ > + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); > + uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); > + > + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, > + (uint32_t __user *)p->write_ptr, > + wptr_shift, wptr_mask, mms); > } > > static int __update_mqd(struct mqd_manager *mm, void *mqd, > @@ -122,7 +127,6 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, > m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); > > m->cp_hqd_pq_doorbell_control = > - 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT | > q->doorbell_off << > CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; > pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", > @@ -159,12 +163,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, > 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; > } > > - m->cp_hqd_active = 0; > q->is_active = false; > if (q->queue_size > 0 && > q->queue_address != 0 && > q->queue_percent > 0) { > - m->cp_hqd_active = 1; > q->is_active = true; > } > > @@ -184,7 +186,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, > uint32_t queue_id) > { > return mm->dev->kfd2kgd->hqd_destroy > - (mm->dev->kgd, type, timeout, > + (mm->dev->kgd, mqd, type, timeout, > pipe_id, queue_id); > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index f0d55cc0..30ce92c 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -239,11 +239,6 @@ enum kfd_preempt_type_filter { > KFD_PREEMPT_TYPE_FILTER_BY_PASID > }; > > -enum kfd_preempt_type { > - KFD_PREEMPT_TYPE_WAVEFRONT, > - KFD_PREEMPT_TYPE_WAVEFRONT_RESET > -}; > - > /** > * enum kfd_queue_type > * > diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > index 36f3766..ffafda0 100644 > --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > @@ -41,6 +41,11 @@ struct kgd_dev; > > struct kgd_mem; > > +enum kfd_preempt_type { > + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, > + KFD_PREEMPT_TYPE_WAVEFRONT_RESET, > +}; > + > enum kgd_memory_pool { > KGD_POOL_SYSTEM_CACHEABLE = 1, > KGD_POOL_SYSTEM_WRITECOMBINE = 2, > @@ -153,14 +158,16 @@ struct kfd2kgd_calls { > int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); > > int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > - uint32_t queue_id, uint32_t __user *wptr); > + uint32_t queue_id, uint32_t __user *wptr, > + uint32_t wptr_shift, uint32_t wptr_mask, > + struct mm_struct *mm); > > int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); > > bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id); > > - int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, > + int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, > unsigned int timeout, uint32_t pipe_id, > uint32_t queue_id); > > diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c > index a2ab6dc..695117a 100644 > --- a/drivers/gpu/drm/radeon/radeon_kfd.c > +++ b/drivers/gpu/drm/radeon/radeon_kfd.c > @@ -75,12 +75,14 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, > uint32_t hpd_size, uint64_t hpd_gpu_addr); > static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); > static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > - uint32_t queue_id, uint32_t __user *wptr); > + uint32_t queue_id, uint32_t __user *wptr, > + uint32_t wptr_shift, uint32_t wptr_mask, > + struct mm_struct *mm); > static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); > static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, > uint32_t pipe_id, uint32_t queue_id); > > -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, > +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, > unsigned int timeout, uint32_t pipe_id, > uint32_t queue_id); > static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); > @@ -482,7 +484,9 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) > } > > static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, > - uint32_t queue_id, uint32_t __user *wptr) > + uint32_t queue_id, uint32_t __user *wptr, > + uint32_t wptr_shift, uint32_t wptr_mask, > + struct mm_struct *mm) > { > uint32_t wptr_shadow, is_wptr_shadow_valid; > struct cik_mqd *m; > @@ -636,7 +640,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) > return false; > } > > -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, > +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, > unsigned int timeout, uint32_t pipe_id, > uint32_t queue_id) > { > -- > 2.7.4 > This patch is: Acked-by: Oded Gabbay <oded.gabbay at gmail.com>