Update the KGD to KFD interface to allow sharing pipes with queue granularity instead of pipe granularity. This allows for more interesting pipe/queue splits. v2: fix overflow check for res.queue_mask v3: fix shift overflow when setting res.queue_mask v4: fix comment in is_pipeline_enabled() v5: clamp res.queue_mask to the first MEC only Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> Acked-by: Christian König <christian.koenig at amd.com> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 22 ++++- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 104 ++++++++++++++------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 10 +- .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 17 +++- drivers/gpu/drm/radeon/radeon_kfd.c | 21 ++++- 9 files changed, 130 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3200ff9..8fc5aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -78,48 +78,64 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev) return true; } void amdgpu_amdkfd_fini(void) { if (kgd2kfd) { kgd2kfd->exit(); symbol_put(kgd2kfd_init); } } void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { if (kgd2kfd) adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, adev->pdev, kfd2kgd); } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { + int i; + int last_valid_bit; if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - - .first_compute_pipe = 1, - .compute_pipe_count = 4 - 1, + .num_mec = adev->gfx.mec.num_mec, + .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe }; + /* this is going to have a few of the MSBs set that we need to + * clear */ + bitmap_complement(gpu_resources.queue_bitmap, + adev->gfx.mec.queue_bitmap, + KGD_MAX_QUEUES); + + /* According to linux/bitmap.h we shouldn't use bitmap_clear if + * nbits is not compile time constant */ + last_valid_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) + clear_bit(i, gpu_resources.queue_bitmap); + amdgpu_doorbell_get_kfd_info(adev, &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); kgd2kfd->device_init(adev->kfd, &gpu_resources); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { if (adev->kfd) { kgd2kfd->device_exit(adev->kfd); adev->kfd = NULL; } } void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3f95f7c..88187bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -209,40 +209,44 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, pasid, address, flags); dev = kfd_device_by_pci_dev(pdev); BUG_ON(dev == NULL); kfd_signal_iommu_event(dev, pasid, address, flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); return AMD_IOMMU_INV_PRI_RSP_INVALID; } bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size; kfd->shared_resources = *gpu_resources; + /* We only use the first MEC */ + if (kfd->shared_resources.num_mec > 1) + kfd->shared_resources.num_mec = 1; + /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; /* * calculate max size of runlist packet. * There can be only 2 packets at once */ size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) + max_num_of_queues_per_device * sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2; /* Add size of HIQ & DIQ */ size += KFD_KERNEL_QUEUE_SIZE * 2; /* add another 512KB for all other allocations on gart (HPD, fences) */ size += 512 * 1024; if (kfd->kfd2kgd->init_gtt_mem_allocation( kfd->kgd, size, &kfd->gtt_mem, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c064dea..955aa30 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -46,55 +46,78 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool preempt_static_queues, bool lock); static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); static void deallocate_sdma_queue(struct device_queue_manager *dqm, unsigned int sdma_queue_id); static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { if (type == KFD_QUEUE_TYPE_SDMA) return KFD_MQD_TYPE_SDMA; return KFD_MQD_TYPE_CP; } -unsigned int get_first_pipe(struct device_queue_manager *dqm) +static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) +{ + int i; + int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec + + pipe * dqm->dev->shared_resources.num_queue_per_pipe; + + /* queue is available for KFD usage if bit is 1 */ + for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) + if (test_bit(pipe_offset + i, + dqm->dev->shared_resources.queue_bitmap)) + return true; + return false; +} + +unsigned int get_mec_num(struct device_queue_manager *dqm) { BUG_ON(!dqm || !dqm->dev); - return dqm->dev->shared_resources.first_compute_pipe; + + return dqm->dev->shared_resources.num_mec; } -unsigned int get_pipes_num(struct device_queue_manager *dqm) +unsigned int get_queues_num(struct device_queue_manager *dqm) { BUG_ON(!dqm || !dqm->dev); - return dqm->dev->shared_resources.compute_pipe_count; + return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, + KGD_MAX_QUEUES); } -static inline unsigned int get_pipes_num_cpsch(void) +unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) { - return PIPE_PER_ME_CP_SCHEDULING; + BUG_ON(!dqm || !dqm->dev); + return dqm->dev->shared_resources.num_queue_per_pipe; +} + +unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) +{ + BUG_ON(!dqm || !dqm->dev); + return dqm->dev->shared_resources.num_pipe_per_mec; } void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { return dqm->dev->kfd2kgd->program_sh_mem_settings( dqm->dev->kgd, qpd->vmid, qpd->sh_mem_config, qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, qpd->sh_mem_bases); } static int allocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { int bit, allocated_vmid; if (dqm->vmid_bitmap == 0) @@ -183,63 +206,67 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. */ dqm->total_queue_count++; pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); mutex_unlock(&dqm->lock); return 0; } static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) { bool set; int pipe, bit, i; set = false; - for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm); - pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) { + for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm); + pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { + + if (!is_pipe_enabled(dqm, 0, pipe)) + continue; + if (dqm->allocated_queues[pipe] != 0) { bit = find_first_bit( (unsigned long *)&dqm->allocated_queues[pipe], - QUEUES_PER_PIPE); + get_queues_per_pipe(dqm)); clear_bit(bit, (unsigned long *)&dqm->allocated_queues[pipe]); q->pipe = pipe; q->queue = bit; set = true; break; } } if (!set) return -EBUSY; pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", __func__, q->pipe, q->queue); /* horizontal hqd allocation */ - dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm); + dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); return 0; } static inline void deallocate_hqd(struct device_queue_manager *dqm, struct queue *q) { set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); } static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { int retval; struct mqd_manager *mqd; BUG_ON(!dqm || !q || !qpd); mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); @@ -452,95 +479,84 @@ static int unregister_process_nocpsch(struct device_queue_manager *dqm, out: mutex_unlock(&dqm->lock); return retval; } static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, unsigned int vmid) { uint32_t pasid_mapping; pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID; return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( dqm->dev->kgd, pasid_mapping, vmid); } -int init_pipelines(struct device_queue_manager *dqm, - unsigned int pipes_num, unsigned int first_pipe) -{ - BUG_ON(!dqm || !dqm->dev); - - pr_debug("kfd: In func %s\n", __func__); - - return 0; -} - static void init_interrupts(struct device_queue_manager *dqm) { unsigned int i; BUG_ON(dqm == NULL); - for (i = 0 ; i < get_pipes_num(dqm) ; i++) - dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, - i + get_first_pipe(dqm)); + for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) + if (is_pipe_enabled(dqm, 0, i)) + dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); } static int init_scheduler(struct device_queue_manager *dqm) { - int retval; + int retval = 0; BUG_ON(!dqm); pr_debug("kfd: In %s\n", __func__); - retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm)); return retval; } static int initialize_nocpsch(struct device_queue_manager *dqm) { int i; BUG_ON(!dqm); pr_debug("kfd: In func %s num of pipes: %d\n", - __func__, get_pipes_num(dqm)); + __func__, get_pipes_per_mec(dqm)); mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; - dqm->allocated_queues = kcalloc(get_pipes_num(dqm), + dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), sizeof(unsigned int), GFP_KERNEL); if (!dqm->allocated_queues) { mutex_destroy(&dqm->lock); return -ENOMEM; } - for (i = 0; i < get_pipes_num(dqm); i++) - dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; + for (i = 0; i < get_pipes_per_mec(dqm); i++) + dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1; dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; init_scheduler(dqm); return 0; } static void uninitialize_nocpsch(struct device_queue_manager *dqm) { int i; BUG_ON(!dqm); BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) kfree(dqm->mqds[i]); mutex_destroy(&dqm->lock); @@ -613,71 +629,91 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, return retval; } retval = mqd->load_mqd(mqd, q->mqd, 0, 0, NULL); if (retval != 0) { deallocate_sdma_queue(dqm, q->sdma_id); mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); return retval; } return 0; } /* * Device Queue Manager implementation for cp scheduler */ static int set_sched_resources(struct device_queue_manager *dqm) { + int i, mec; struct scheduling_resources res; - unsigned int queue_num, queue_mask; BUG_ON(!dqm); pr_debug("kfd: In func %s\n", __func__); - queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE; - queue_mask = (1 << queue_num) - 1; res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; res.vmid_mask <<= KFD_VMID_START_OFFSET; - res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE); + + res.queue_mask = 0; + for (i = 0; i < KGD_MAX_QUEUES; ++i) { + mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) + / dqm->dev->shared_resources.num_pipe_per_mec; + + if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) + continue; + + /* only acquire queues from the first MEC */ + if (mec > 0) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of res.queue_mask needs updating */ + if (WARN_ON(i > (sizeof(res.queue_mask)*8))) { + pr_err("Invalid queue enabled by amdgpu: %d\n", i); + break; + } + + res.queue_mask |= (1ull << i); + } res.gws_mask = res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; pr_debug("kfd: scheduling resources:\n" " vmid mask: 0x%8X\n" " queue mask: 0x%8llX\n", res.vmid_mask, res.queue_mask); return pm_send_set_resources(&dqm->packets, &res); } static int initialize_cpsch(struct device_queue_manager *dqm) { int retval; BUG_ON(!dqm); pr_debug("kfd: In func %s num of pipes: %d\n", - __func__, get_pipes_num_cpsch()); + __func__, get_pipes_per_mec(dqm)); mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; dqm->active_runlist = false; retval = dqm->ops_asic_specific.initialize(dqm); if (retval != 0) goto fail_init_pipelines; return 0; fail_init_pipelines: mutex_destroy(&dqm->lock); return retval; } static int start_cpsch(struct device_queue_manager *dqm) { struct device_process_node *node; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index a625b91..66b9615 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -13,42 +13,40 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * */ #ifndef KFD_DEVICE_QUEUE_MANAGER_H_ #define KFD_DEVICE_QUEUE_MANAGER_H_ #include <linux/rwsem.h> #include <linux/list.h> #include "kfd_priv.h" #include "kfd_mqd_manager.h" #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (500) -#define QUEUES_PER_PIPE (8) -#define PIPE_PER_ME_CP_SCHEDULING (3) #define CIK_VMID_NUM (8) #define KFD_VMID_START_OFFSET (8) #define VMID_PER_DEVICE CIK_VMID_NUM #define KFD_DQM_FIRST_PIPE (0) #define CIK_SDMA_QUEUES (4) #define CIK_SDMA_QUEUES_PER_ENGINE (2) #define CIK_SDMA_ENGINE_NUM (2) struct device_process_node { struct qcm_process_device *qpd; struct list_head list; }; /** * struct device_queue_manager_ops * * @create_queue: Queue creation routine. * * @destroy_queue: Queue destruction routine. * @@ -165,37 +163,37 @@ struct device_queue_manager { unsigned int processes_count; unsigned int queue_count; unsigned int sdma_queue_count; unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; unsigned int sdma_bitmap; unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; uint64_t fence_gpu_addr; unsigned int *fence_addr; struct kfd_mem_obj *fence_mem; bool active_runlist; }; void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops); void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); -int init_pipelines(struct device_queue_manager *dqm, - unsigned int pipes_num, unsigned int first_pipe); -unsigned int get_first_pipe(struct device_queue_manager *dqm); -unsigned int get_pipes_num(struct device_queue_manager *dqm); +unsigned int get_mec_num(struct device_queue_manager *dqm); +unsigned int get_queues_num(struct device_queue_manager *dqm); +unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); +unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { return (pdd->lds_base >> 16) & 0xFF; } static inline unsigned int get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) { return (pdd->lds_base >> 60) & 0x0E; } #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index c6f435a..48dc056 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -134,22 +134,22 @@ static int register_process_cik(struct device_queue_manager *dqm, } static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT); if (q->process->is_32bit_user_mode) value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) | get_sh_mem_bases_32(qpd_to_pdd(qpd)); else value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; q->properties.sdma_vm_addr = value; } static int initialize_cpsch_cik(struct device_queue_manager *dqm) { - return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm)); + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index ca8c093..7131998 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -48,42 +48,41 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) header.count = packet_size/sizeof(uint32_t) - 2; header.type = PM4_TYPE_3; return header.u32all; } static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) { unsigned int process_count, queue_count; unsigned int map_queue_size; BUG_ON(!pm || !rlib_size || !over_subscription); process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; /* check if there is over subscription*/ *over_subscription = false; - if ((process_count > 1) || - queue_count > PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) { + if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) { *over_subscription = true; pr_debug("kfd: over subscribed runlist\n"); } map_queue_size = (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ? sizeof(struct pm4_mes_map_queues) : sizeof(struct pm4_map_queues); /* calculate run list ib allocation size */ *rlib_size = process_count * sizeof(struct pm4_map_process) + queue_count * map_queue_size; /* * Increase the allocation size in case we need a chained run list * when over subscription */ if (*over_subscription) *rlib_size += sizeof(struct pm4_runlist); pr_debug("kfd: runlist ib size %d\n", *rlib_size); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index e1fb40b..32cdf2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -192,41 +192,41 @@ int pqm_create_queue(struct process_queue_manager *pqm, if (retval != 0) return retval; if (list_empty(&pqm->queues)) { pdd->qpd.pqm = pqm; dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); } pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL); if (!pqn) { retval = -ENOMEM; goto err_allocate_pqn; } switch (type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= VMID_PER_DEVICE) || - (dev->dqm->queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE))) { + (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); retval = -EPERM; goto err_create_queue; } retval = create_cp_queue(pqm, dev, &q, &q_properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; pqn->kq = NULL; retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, &q->properties.vmid); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; case KFD_QUEUE_TYPE_DIQ: kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); if (kq == NULL) { retval = -ENOMEM; goto err_create_queue; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 67f6d19..91ef148 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -12,78 +12,85 @@ * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* * This file defines the private interface between the * AMD kernel graphics drivers and the AMD KFD. */ #ifndef KGD_KFD_INTERFACE_H_INCLUDED #define KGD_KFD_INTERFACE_H_INCLUDED #include <linux/types.h> +#include <linux/bitmap.h> struct pci_dev; -#define KFD_INTERFACE_VERSION 1 +#define KFD_INTERFACE_VERSION 2 #define KGD_MAX_QUEUES 128 struct kfd_dev; struct kgd_dev; struct kgd_mem; enum kgd_memory_pool { KGD_POOL_SYSTEM_CACHEABLE = 1, KGD_POOL_SYSTEM_WRITECOMBINE = 2, KGD_POOL_FRAMEBUFFER = 3, }; enum kgd_engine_type { KGD_ENGINE_PFP = 1, KGD_ENGINE_ME, KGD_ENGINE_CE, KGD_ENGINE_MEC1, KGD_ENGINE_MEC2, KGD_ENGINE_RLC, KGD_ENGINE_SDMA1, KGD_ENGINE_SDMA2, KGD_ENGINE_MAX }; struct kgd2kfd_shared_resources { /* Bit n == 1 means VMID n is available for KFD. */ unsigned int compute_vmid_bitmap; - /* Compute pipes are counted starting from MEC0/pipe0 as 0. */ - unsigned int first_compute_pipe; + /* number of mec available from the hardware */ + uint32_t num_mec; - /* Number of MEC pipes available for KFD. */ - unsigned int compute_pipe_count; + /* number of pipes per mec */ + uint32_t num_pipe_per_mec; + + /* number of queues per pipe */ + uint32_t num_queue_per_pipe; + + /* Bit n == 1 means Queue n is available for KFD */ + DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); /* Base address of doorbell aperture. */ phys_addr_t doorbell_physical_address; /* Size in bytes of doorbell aperture. */ size_t doorbell_aperture_size; /* Number of bytes at start of aperture reserved for KGD. */ size_t doorbell_start_offset; }; /** * struct kfd2kgd_calls * * @init_gtt_mem_allocation: Allocate a buffer on the gart aperture. * The buffer can be used for mqds, hpds, kernel queue, fence and runlists * * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture * * @get_vmem_size: Retrieves (physical) size of VRAM diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index a06e3b1..699fe7f 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -162,48 +162,63 @@ int radeon_kfd_init(void) return ret; } void radeon_kfd_fini(void) { if (kgd2kfd) { kgd2kfd->exit(); symbol_put(kgd2kfd_init); } } void radeon_kfd_device_probe(struct radeon_device *rdev) { if (kgd2kfd) rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, rdev->pdev, &kfd2kgd); } void radeon_kfd_device_init(struct radeon_device *rdev) { + int i, queue, pipe, mec; + if (rdev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - - .first_compute_pipe = 1, - .compute_pipe_count = 4 - 1, + .num_mec = 1, + .num_pipe_per_mec = 4, + .num_queue_per_pipe = 8 }; + bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES); + + for (i = 0; i < KGD_MAX_QUEUES; ++i) { + queue = i % gpu_resources.num_queue_per_pipe; + pipe = (i / gpu_resources.num_queue_per_pipe) + % gpu_resources.num_pipe_per_mec; + mec = (i / gpu_resources.num_queue_per_pipe) + / gpu_resources.num_pipe_per_mec; + + if (mec == 0 && pipe > 0) + set_bit(i, gpu_resources.queue_bitmap); + } + radeon_doorbell_get_kfd_info(rdev, &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); kgd2kfd->device_init(rdev->kfd, &gpu_resources); } } void radeon_kfd_device_fini(struct radeon_device *rdev) { if (rdev->kfd) { kgd2kfd->device_exit(rdev->kfd); rdev->kfd = NULL; } } void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry) { if (rdev->kfd) -- 2.9.3