From: Dave Airlie <airlied@xxxxxxxxxx> This is a port of radv to the new lowlevel cs submission APIs for libdrm that I submitted earlier. This moves a lot of the current non-shared semaphore handling and chunk creation out of libdrm_amdgpu. It provides a much simpler implementation without all the list handling, I'm sure I can even clean it up a lot further. For now I've left the old code paths under the RADV_OLD_LIBDRM define in this patch, I'd replace that with version or just rip out the whole lot once we get a libdrm release with the new APIs in. --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 202 +++++++++++++++++++++++--- 1 file changed, 184 insertions(+), 18 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index ffc7566..ce73b88 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -75,6 +75,10 @@ radv_amdgpu_cs(struct radeon_winsys_cs *base) return (struct radv_amdgpu_cs*)base; } +struct radv_amdgpu_sem_info { + int wait_sem_count; + struct radeon_winsys_sem **wait_sems; +}; static int ring_to_hw_ip(enum ring_type ring) { switch (ring) { @@ -89,6 +93,21 @@ static int ring_to_hw_ip(enum ring_type ring) } } +static void radv_amdgpu_wait_sems(struct radv_amdgpu_ctx *ctx, + uint32_t ip_type, + uint32_t ring, + uint32_t sem_count, + struct radeon_winsys_sem **_sem, + struct radv_amdgpu_sem_info *sem_info); +static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx, + uint32_t ip_type, + uint32_t ring, + uint32_t sem_count, + struct radeon_winsys_sem **_sem); +static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, + struct amdgpu_cs_request *request, + struct radv_amdgpu_sem_info *sem_info); + static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_fence *fence, struct amdgpu_cs_request *req) @@ -647,6 +666,7 @@ static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx, static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, int queue_idx, + struct radv_amdgpu_sem_info *sem_info, struct radeon_winsys_cs **cs_array, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, @@ -703,7 +723,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, ibs[0] = ((struct radv_amdgpu_cs*)initial_preamble_cs)->ib; } - r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); + r = radv_amdgpu_cs_submit(ctx, &request, sem_info); if (r) { if (r == -ENOMEM) fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); @@ -724,6 +744,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, int queue_idx, + struct radv_amdgpu_sem_info *sem_info, struct radeon_winsys_cs **cs_array, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, @@ -775,7 +796,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, } } - r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); + r = radv_amdgpu_cs_submit(ctx, &request, sem_info); if (r) { if (r == -ENOMEM) fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); @@ -801,6 +822,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_idx, + struct radv_amdgpu_sem_info *sem_info, struct radeon_winsys_cs **cs_array, unsigned cs_count, struct radeon_winsys_cs *initial_preamble_cs, @@ -880,7 +902,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, request.ibs = &ib; request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx); - r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); + r = radv_amdgpu_cs_submit(ctx, &request, sem_info); if (r) { if (r == -ENOMEM) fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); @@ -921,29 +943,22 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]); struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); int ret; - int i; - - for (i = 0; i < wait_sem_count; i++) { - amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)wait_sem[i]; - amdgpu_cs_wait_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx, - sem); - } + struct radv_amdgpu_sem_info sem_info = {0}; + + radv_amdgpu_wait_sems(ctx, cs->hw_ip, queue_idx, wait_sem_count, wait_sem, &sem_info); + if (!cs->ws->use_ib_bos) { - ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array, + ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, &sem_info, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) { - ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array, + ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, &sem_info, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else { - ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array, + ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, &sem_info, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } - for (i = 0; i < signal_sem_count; i++) { - amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)signal_sem[i]; - amdgpu_cs_signal_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx, - sem); - } + radv_amdgpu_signal_sems(ctx, cs->hw_ip, queue_idx, signal_sem_count, signal_sem); return ret; } @@ -1040,6 +1055,7 @@ static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx, return true; } +#ifdef RADV_OLD_LIBDRM static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws) { int ret; @@ -1056,6 +1072,156 @@ static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem) amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem; amdgpu_cs_destroy_semaphore(sem); } +#else +static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws) +{ + struct amdgpu_cs_fence *sem = CALLOC_STRUCT(amdgpu_cs_fence); + if (!sem) + return NULL; + + return (struct radeon_winsys_sem *)sem; +} + +static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem) +{ + struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence *)_sem; + FREE(sem); +} +#endif + +static void radv_amdgpu_wait_sems(struct radv_amdgpu_ctx *ctx, + uint32_t ip_type, + uint32_t ring, + uint32_t sem_count, + struct radeon_winsys_sem **_sem, + struct radv_amdgpu_sem_info *sem_info) +{ +#ifdef RADV_OLD_LIBDRM + for (unsigned i = 0; i < sem_count; i++) { + amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem[i]; + amdgpu_cs_wait_semaphore(ctx->ctx, ip_type, 0, ring, + sem); + } +#else + sem_info->wait_sem_count = sem_count; + sem_info->wait_sems = _sem; +#endif +} + +static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx, + uint32_t ip_type, + uint32_t ring, + uint32_t sem_count, + struct radeon_winsys_sem **_sem) +{ +#ifdef RADV_OLD_LIBDRM + for (unsigned i = 0; i < sem_count; i++) { + amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem[i]; + amdgpu_cs_signal_semaphore(ctx->ctx, ip_type, 0, ring, + sem); + } + return 0; +#else + for (unsigned i = 0; i < sem_count; i++) { + struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence *)_sem[i]; + + if (sem->context) + return -EINVAL; + + *sem = ctx->last_submission[ip_type][ring].fence; + } + return 0; +#endif +} + +static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, + struct amdgpu_cs_request *request, + struct radv_amdgpu_sem_info *sem_info) +{ +#ifdef RADV_OLD_LIBDRM + return amdgpu_cs_submit(ctx->ctx, 0, request, 1); +#else + int r; + int num_chunks; + int size; + bool user_fence; + struct drm_amdgpu_cs_chunk *chunks; + struct drm_amdgpu_cs_chunk_data *chunk_data; + struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL; + int i; + struct amdgpu_cs_fence *sem; + user_fence = (request->fence_info.handle != NULL); + size = request->number_of_ibs + (user_fence ? 2 : 1) + 1; + + chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size); + + size = request->number_of_ibs + (user_fence ? 1 : 0); + + chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size); + + num_chunks = request->number_of_ibs; + for (i = 0; i < request->number_of_ibs; i++) { + struct amdgpu_cs_ib_info *ib; + chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB; + chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; + chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; + + ib = &request->ibs[i]; + + chunk_data[i].ib_data._pad = 0; + chunk_data[i].ib_data.va_start = ib->ib_mc_address; + chunk_data[i].ib_data.ib_bytes = ib->size * 4; + chunk_data[i].ib_data.ip_type = request->ip_type; + chunk_data[i].ib_data.ip_instance = request->ip_instance; + chunk_data[i].ib_data.ring = request->ring; + chunk_data[i].ib_data.flags = ib->flags; + } + + if (user_fence) { + i = num_chunks++; + + chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE; + chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4; + chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; + + amdgpu_cs_chunk_fence_info_to_data(&request->fence_info, + &chunk_data[i]); + } + + if (sem_info->wait_sem_count) { + sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_info->wait_sem_count); + if (!sem_dependencies) { + r = -ENOMEM; + goto error_out; + } + int sem_count = 0; + for (unsigned j = 0; j < sem_info->wait_sem_count; j++) { + sem = (struct amdgpu_cs_fence *)sem_info->wait_sems[j]; + if (!sem->context) + continue; + struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++]; + + amdgpu_cs_chunk_fence_to_dep(sem, dep); + } + i = num_chunks++; + + /* dependencies chunk */ + chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES; + chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count; + chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies; + } + + r = amdgpu_cs_submit_raw(ctx->ws->dev, + ctx->ctx, + request->resources, + num_chunks, + chunks, + &request->seq_no); +error_out: + free(sem_dependencies); + return r; +#endif +} void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws) { -- 2.9.4