applied this patch instead of original, thanks. On Wed, Apr 25, 2018 at 12:42 AM, Felix Kuehling <felix.kuehling at amd.com> wrote: > A minor update to this patch is attached. The rest of the series is > unchanged and rebased cleanly on 4.17-rc2 on my system. > > Regards, > Felix > > > On 2018-04-10 05:33 PM, Felix Kuehling wrote: >> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> >> --- >> drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 +++++++++ >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 25 +++++++++++++++++------ >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 7 ++++++- >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 9 ++++++++ >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 9 ++++++++ >> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 9 ++++++++ >> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + >> 7 files changed, 63 insertions(+), 7 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c >> index 36c9269e..5d7cccc 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c >> @@ -214,6 +214,16 @@ void write_kernel_doorbell(void __iomem *db, u32 value) >> } >> } >> >> +void write_kernel_doorbell64(void __iomem *db, u64 value) >> +{ >> + if (db) { >> + WARN(((unsigned long)db & 7) != 0, >> + "Unaligned 64-bit doorbell"); >> + writeq(value, (u64 __iomem *)db); >> + pr_debug("writing %llu to doorbell address 0x%p\n", value, db);+ } >> +} >> + >> unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, >> struct kfd_process *process, >> unsigned int doorbell_id) >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> index 9f38161..476951d 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c >> @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, >> kq->rptr_kernel = kq->rptr_mem->cpu_ptr; >> kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; >> >> - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), >> + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, >> &kq->wptr_mem); >> >> if (retval != 0) >> @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, >> size_t available_size; >> size_t queue_size_dwords; >> uint32_t wptr, rptr; >> + uint64_t wptr64; >> unsigned int *queue_address; >> >> /* When rptr == wptr, the buffer is empty. >> @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, >> * the opposite. So we can only use up to queue_size_dwords - 1 dwords. >> */ >> rptr = *kq->rptr_kernel; >> - wptr = *kq->wptr_kernel; >> + wptr = kq->pending_wptr; >> + wptr64 = kq->pending_wptr64; >> queue_address = (unsigned int *)kq->pq_kernel_addr; >> queue_size_dwords = kq->queue->properties.queue_size / 4; >> >> @@ -246,11 +248,13 @@ static int acquire_packet_buffer(struct kernel_queue *kq, >> while (wptr > 0) { >> queue_address[wptr] = kq->nop_packet; >> wptr = (wptr + 1) % queue_size_dwords; >> + wptr64++; >> } >> } >> >> *buffer_ptr = &queue_address[wptr]; >> kq->pending_wptr = wptr + packet_size_in_dwords; >> + kq->pending_wptr64 = wptr64 + packet_size_in_dwords; >> >> return 0; >> >> @@ -272,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq) >> pr_debug("\n"); >> #endif >> >> - *kq->wptr_kernel = kq->pending_wptr; >> - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, >> - kq->pending_wptr); >> + kq->ops_asic_specific.submit_packet(kq); >> } >> >> static void rollback_packet(struct kernel_queue *kq) >> { >> - kq->pending_wptr = *kq->wptr_kernel; >> + if (kq->dev->device_info->doorbell_size == 8) { >> + kq->pending_wptr64 = *kq->wptr64_kernel; >> + kq->pending_wptr = *kq->wptr_kernel % >> + (kq->queue->properties.queue_size / 4); >> + } else { >> + kq->pending_wptr = *kq->wptr_kernel; >> + } >> } >> >> struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, >> @@ -310,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, >> case CHIP_HAWAII: >> kernel_queue_init_cik(&kq->ops_asic_specific); >> break; >> + >> + case CHIP_VEGA10: >> + case CHIP_RAVEN: >> + kernel_queue_init_v9(&kq->ops_asic_specific); >> + break; >> default: >> WARN(1, "Unexpected ASIC family %u", >> dev->device_info->asic_family); >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h >> index 5940531..97aff20 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h >> @@ -72,6 +72,7 @@ struct kernel_queue { >> struct kfd_dev *dev; >> struct mqd_manager *mqd; >> struct queue *queue; >> + uint64_t pending_wptr64; >> uint32_t pending_wptr; >> unsigned int nop_packet; >> >> @@ -79,7 +80,10 @@ struct kernel_queue { >> uint32_t *rptr_kernel; >> uint64_t rptr_gpu_addr; >> struct kfd_mem_obj *wptr_mem; >> - uint32_t *wptr_kernel; >> + union { >> + uint64_t *wptr64_kernel; >> + uint32_t *wptr_kernel; >> + }; >> uint64_t wptr_gpu_addr; >> struct kfd_mem_obj *pq; >> uint64_t pq_gpu_addr; >> @@ -97,5 +101,6 @@ struct kernel_queue { >> >> void kernel_queue_init_cik(struct kernel_queue_ops *ops); >> void kernel_queue_init_vi(struct kernel_queue_ops *ops); >> +void kernel_queue_init_v9(struct kernel_queue_ops *ops); >> >> #endif /* KFD_KERNEL_QUEUE_H_ */ >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c >> index a90eb44..19e54ac 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c >> @@ -26,11 +26,13 @@ >> static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, >> enum kfd_queue_type type, unsigned int queue_size); >> static void uninitialize_cik(struct kernel_queue *kq); >> +static void submit_packet_cik(struct kernel_queue *kq); >> >> void kernel_queue_init_cik(struct kernel_queue_ops *ops) >> { >> ops->initialize = initialize_cik; >> ops->uninitialize = uninitialize_cik; >> + ops->submit_packet = submit_packet_cik; >> } >> >> static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, >> @@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, >> static void uninitialize_cik(struct kernel_queue *kq) >> { >> } >> + >> +static void submit_packet_cik(struct kernel_queue *kq) >> +{ >> + *kq->wptr_kernel = kq->pending_wptr; >> + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, >> + kq->pending_wptr); >> +} >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c >> index ece7d59..684a3bf 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c >> @@ -29,11 +29,13 @@ >> static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, >> enum kfd_queue_type type, unsigned int queue_size); >> static void uninitialize_v9(struct kernel_queue *kq); >> +static void submit_packet_v9(struct kernel_queue *kq); >> >> void kernel_queue_init_v9(struct kernel_queue_ops *ops) >> { >> ops->initialize = initialize_v9; >> ops->uninitialize = uninitialize_v9; >> + ops->submit_packet = submit_packet_v9; >> } >> >> static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, >> @@ -58,6 +60,13 @@ static void uninitialize_v9(struct kernel_queue *kq) >> kfd_gtt_sa_free(kq->dev, kq->eop_mem); >> } >> >> +static void submit_packet_v9(struct kernel_queue *kq) >> +{ >> + *kq->wptr64_kernel = kq->pending_wptr64; >> + write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, >> + kq->pending_wptr64); >> +} >> + >> static int pm_map_process_v9(struct packet_manager *pm, >> uint32_t *buffer, struct qcm_process_device *qpd) >> { >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c >> index f9019ef..bf20c6d 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c >> @@ -29,11 +29,13 @@ >> static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, >> enum kfd_queue_type type, unsigned int queue_size); >> static void uninitialize_vi(struct kernel_queue *kq); >> +static void submit_packet_vi(struct kernel_queue *kq); >> >> void kernel_queue_init_vi(struct kernel_queue_ops *ops) >> { >> ops->initialize = initialize_vi; >> ops->uninitialize = uninitialize_vi; >> + ops->submit_packet = submit_packet_vi; >> } >> >> static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, >> @@ -58,6 +60,13 @@ static void uninitialize_vi(struct kernel_queue *kq) >> kfd_gtt_sa_free(kq->dev, kq->eop_mem); >> } >> >> +static void submit_packet_vi(struct kernel_queue *kq) >> +{ >> + *kq->wptr_kernel = kq->pending_wptr; >> + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, >> + kq->pending_wptr); >> +} >> + >> unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) >> { >> union PM4_MES_TYPE_3_HEADER header; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> index 06b210b..10d5b54 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> @@ -769,6 +769,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, >> void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); >> u32 read_kernel_doorbell(u32 __iomem *db); >> void write_kernel_doorbell(void __iomem *db, u32 value); >> +void write_kernel_doorbell64(void __iomem *db, u64 value); >> unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, >> struct kfd_process *process, >> unsigned int doorbell_id); >