On GFX7 the CP does not perform a TC flush when queues are unmapped. To avoid TC eviction from accessing an invalid VMID, flush it explicitly before releasing a VMID. Signed-off-by: Amber Lin <Amber.Lin at amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 22 +++++++++- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 37 ++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 51 ++++++++++++++++++++++ 4 files changed, 112 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b3b6dab..c18e048 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -142,12 +142,31 @@ static int allocate_vmid(struct device_queue_manager *dqm, return 0; } +static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, + struct qcm_process_device *qpd) +{ + uint32_t len; + + if (!qpd->ib_kaddr) + return -ENOMEM; + + len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + + return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, + qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); +} + static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; + /* On GFX v7, CP doesn't flush TC at dequeue */ + if (q->device->device_info->asic_family == CHIP_HAWAII) + if (flush_texture_cache_nocpsch(q->device, qpd)) + pr_err("Failed to flush TC\n"); + kfd_flush_tlb(qpd_to_pdd(qpd)); /* Release the vmid mapping */ @@ -792,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { init_interrupts(dqm); - return 0; + return pm_init(&dqm->packets, dqm); } static int stop_nocpsch(struct device_queue_manager *dqm) { + pm_uninit(&dqm->packets); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 0ecbd1f..7614375 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -356,6 +356,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; } +/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size + * of this packet + * @gpu_addr - GPU address of the packet. It's a virtual address. + * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer + * Return - length of the packet + */ +uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + WARN_ON(!buffer); + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(*packet)); + + packet->header.u32All = build_pm4_header(IT_RELEASE_MEM, + sizeof(*packet)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + packet->bitfields2.atc = 0; + + packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel___release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return sizeof(*packet) / sizeof(unsigned int); +} + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { pm->dqm = dqm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 78200ba..050fd00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -494,6 +494,7 @@ struct qcm_process_device { /* IB memory */ uint64_t ib_base; + void *ib_kaddr; }; /* KFD Memory Eviction */ @@ -832,6 +833,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm); +uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); + uint64_t kfd_get_number_elems(struct kfd_dev *kfd); /* Events */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 12101fb..25d7dfe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -151,6 +151,53 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p, return err; } +/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage + * The memory reserved is for KFD to submit IB to AMDGPU from kernel. + * If the memory is reserved successfully, ib_kaddr will have + * the CPU/kernel address. Check ib_kaddr before accessing the + * memory. + */ +static int kfd_process_reserve_ib_mem(struct kfd_process *p) +{ + int ret = 0; + struct kfd_process_device *temp, *pdd = NULL; + struct kfd_dev *kdev = NULL; + struct qcm_process_device *qpd = NULL; + void *kaddr; + uint32_t flags = ALLOC_MEM_FLAGS_GTT | + ALLOC_MEM_FLAGS_NO_SUBSTITUTE | + ALLOC_MEM_FLAGS_WRITABLE | + ALLOC_MEM_FLAGS_EXECUTABLE; + + list_for_each_entry_safe(pdd, temp, &p->per_device_data, + per_device_list) { + kdev = pdd->dev; + qpd = &pdd->qpd; + if (qpd->ib_kaddr) + continue; + + if (qpd->ib_base) { /* is dGPU */ + ret = kfd_process_alloc_gpuvm(p, kdev, + qpd->ib_base, PAGE_SIZE, + &kaddr, pdd, flags); + if (!ret) + qpd->ib_kaddr = kaddr; + else + /* In case of error, the kfd_bos for some pdds + * which are already allocated successfully + * will be freed in upper level function + * i.e. create_process(). + */ + return ret; + } else { + /* FIXME: Support APU */ + continue; + } + } + + return 0; +} + struct kfd_process *kfd_create_process(struct file *filep) { struct kfd_process *process; @@ -499,6 +546,9 @@ static struct kfd_process *create_process(const struct task_struct *thread, INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); process->last_restore_timestamp = get_jiffies_64(); + err = kfd_process_reserve_ib_mem(process); + if (err) + goto err_reserve_ib_mem; err = kfd_process_init_cwsr(process, filep); if (err) goto err_init_cwsr; @@ -506,6 +556,7 @@ static struct kfd_process *create_process(const struct task_struct *thread, return process; err_init_cwsr: +err_reserve_ib_mem: kfd_process_free_outstanding_kfd_bos(process); kfd_process_destroy_pdds(process); err_init_apertures: -- 2.7.4