Use an LRU policy to map usermode rings to HW compute queues. Most compute clients use one queue, and usually the first queue available. This results in poor pipe/queue work distribution when multiple compute apps are running. In most cases pipe 0 queue 0 is the only queue that gets used. In order to better distribute work across multiple HW queues, we adopt a policy to map the usermode ring ids to the LRU HW queue. This fixes a large majority of multi-app compute workloads sharing the same HW queue, even though 7 other queues are available. v2: use ring->funcs->type instead of ring->hw_ip Signed-off-by: Andres Rodriguez <andresx7 at gmail.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 52 +++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 57 +++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++ 5 files changed, 117 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index dc79c0e..a4c16a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1517,6 +1517,9 @@ struct amdgpu_device { /* link all gtt */ spinlock_t gtt_list_lock; struct list_head gtt_list; + /* keep an lru list of rings by HW IP */ + struct list_head ring_lru_list; + struct mutex ring_lru_list_lock; /* record hw reset is performed */ bool has_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6abb238..954e3b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1719,6 +1719,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->gtt_list); spin_lock_init(&adev->gtt_list_lock); + INIT_LIST_HEAD(&adev->ring_lru_list); + mutex_init(&adev->ring_lru_list_lock); + if (adev->asic_type >= CHIP_BONAIRE) { adev->rmmio_base = pci_resource_start(adev->pdev, 5); adev->rmmio_size = pci_resource_len(adev->pdev, 5); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 3918bdb..0cf5d24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -97,6 +97,44 @@ static struct amdgpu_queue_mapper_funcs identity_mapper = { .map = amdgpu_identity_map }; +static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) +{ + switch (hw_ip) { + case AMDGPU_HW_IP_GFX: + return AMDGPU_RING_TYPE_GFX; + case AMDGPU_HW_IP_COMPUTE: + return AMDGPU_RING_TYPE_COMPUTE; + case AMDGPU_HW_IP_DMA: + return AMDGPU_RING_TYPE_SDMA; + case AMDGPU_HW_IP_UVD: + return AMDGPU_RING_TYPE_UVD; + case AMDGPU_HW_IP_VCE: + return AMDGPU_RING_TYPE_VCE; + default: + DRM_ERROR("Invalid HW IP specified %d\n", hw_ip); + return -1; + } +} + +static int amdgpu_lru_map(struct amdgpu_device *adev, + struct amdgpu_queue_mapper *mapper, + int user_ring, + struct amdgpu_ring **out_ring) +{ + int r; + int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip); + + r = amdgpu_ring_lru_get(adev, ring_type, out_ring); + if (r) + return r; + + return update_cached_map(mapper, user_ring, *out_ring); +} + +static struct amdgpu_queue_mapper_funcs lru_mapper = { + .map = amdgpu_lru_map +}; + int amdgpu_queue_mgr_init(struct amdgpu_device *adev, struct amdgpu_queue_mgr *mgr) { @@ -107,8 +145,18 @@ int amdgpu_queue_mgr_init(struct amdgpu_device *adev, memset(mgr, 0, sizeof(*mgr)); - for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) - amdgpu_queue_mapper_init(&mgr->mapper[i], i, &identity_mapper); + for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) { + switch (i) { + case AMDGPU_HW_IP_COMPUTE: + amdgpu_queue_mapper_init(&mgr->mapper[i], i, + &lru_mapper); + break; + default: + amdgpu_queue_mapper_init(&mgr->mapper[i], i, + &identity_mapper); + break; + } + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 43cd539..31c6274 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -180,6 +180,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) if (ring->funcs->end_use) ring->funcs->end_use(ring); + + amdgpu_ring_lru_touch(ring->adev, ring); } /** @@ -279,6 +281,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->ptr_mask = (ring->ring_size / 4) - 1; ring->max_dw = max_dw; + INIT_LIST_HEAD(&ring->lru_list); + amdgpu_ring_lru_touch(adev, ring); if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); @@ -312,6 +316,59 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ring->adev->rings[ring->idx] = NULL; } +/** + * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block + * + * @adev: amdgpu_device pointer + * @type: amdgpu_ring_type enum + * @ring: output ring + * + * Retreive the amdgpu_ring structure for the least recently used ring of + * a specific IP block (all asics). + * Returns 0 on success, error on failure. + */ +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, + struct amdgpu_ring **ring) +{ + struct amdgpu_ring *entry; + + /* List is sorted in LRU order, find first entry corresponding + * to the desired HW IP */ + *ring = NULL; + mutex_lock(&adev->ring_lru_list_lock); + list_for_each_entry(entry, &adev->ring_lru_list, lru_list) { + if (entry->funcs->type == type) { + *ring = entry; + break; + } + } + mutex_unlock(&adev->ring_lru_list_lock); + + if (!*ring) { + DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type); + return -EINVAL; + } + + amdgpu_ring_lru_touch(adev, entry); + return 0; +} + +/** + * amdgpu_ring_lru_touch - mark a ring as recently being used + * + * @adev: amdgpu_device pointer + * @ring: ring to touch + * + * Move @ring to the the tail of the lru list + */ +void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + /* list_move_tail handles the case where ring isn't part of the list */ + mutex_lock(&adev->ring_lru_list_lock); + list_move_tail(&ring->lru_list, &adev->ring_lru_list); + mutex_unlock(&adev->ring_lru_list_lock); +} + /* * Debugfs info */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 35da5c5..b51bdcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -144,6 +144,7 @@ struct amdgpu_ring { const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; struct amd_gpu_scheduler sched; + struct list_head lru_list; struct amdgpu_bo *ring_obj; volatile uint32_t *ring; @@ -186,5 +187,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_ring_fini(struct amdgpu_ring *ring); +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int hw_ip, + struct amdgpu_ring **ring); +void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); #endif -- 2.9.3