syncobj wait/signal operation is appending in command submission. v2: separate to two kinds in/out_deps functions Signed-off-by: Chunming Zhou <david1.zhou@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 111 +++++++++++++++++++++---- include/uapi/drm/amdgpu_drm.h | 9 ++ 3 files changed, 112 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 447c4c7a36d6..6e4a3db56833 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -975,6 +975,11 @@ struct amdgpu_cs_chunk { void *kdata; }; +struct amdgpu_cs_syncobj_post_dep { + struct drm_syncobj *post_dep_syncobj; + u64 point; +}; + struct amdgpu_cs_parser { struct amdgpu_device *adev; struct drm_file *filp; @@ -1003,9 +1008,8 @@ struct amdgpu_cs_parser { /* user fence */ struct amdgpu_bo_list_entry uf_entry; - + struct amdgpu_cs_syncobj_post_dep *post_dep_syncobjs; unsigned num_post_dep_syncobjs; - struct drm_syncobj **post_dep_syncobjs; }; static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 412fac238575..7429e7941f4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -204,6 +204,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: break; default: @@ -783,7 +785,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, &parser->validated); for (i = 0; i < parser->num_post_dep_syncobjs; i++) - drm_syncobj_put(parser->post_dep_syncobjs[i]); + drm_syncobj_put(parser->post_dep_syncobjs[i].post_dep_syncobj); kfree(parser->post_dep_syncobjs); dma_fence_put(parser->fence); @@ -1098,13 +1100,17 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, } static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, - uint32_t handle) + uint32_t handle, u64 point, + u64 flags) { int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence); - if (r) + + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); + if (r) { + DRM_ERROR("syncobj %u failed to find fence!\n", handle); return r; + } r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); dma_fence_put(fence); @@ -1115,46 +1121,108 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { + struct drm_amdgpu_cs_chunk_sem *deps; unsigned num_deps; int i, r; - struct drm_amdgpu_cs_chunk_sem *deps; deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, + 0, 0); + if (r) + return r; + } + + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i, r; + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); + r = amdgpu_syncobj_lookup_and_add_to_sync(p, + syncobj_deps[i].handle, + syncobj_deps[i].point, + syncobj_deps[i].flags); if (r) return r; } + return 0; } static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { + struct drm_amdgpu_cs_chunk_sem *deps; unsigned num_deps; int i; - struct drm_amdgpu_cs_chunk_sem *deps; + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; num_deps = chunk->length_dw * 4 / sizeof(struct drm_amdgpu_cs_chunk_sem); p->post_dep_syncobjs = kmalloc_array(num_deps, - sizeof(struct drm_syncobj *), + sizeof(struct amdgpu_cs_syncobj_post_dep), GFP_KERNEL); p->num_post_dep_syncobjs = 0; if (!p->post_dep_syncobjs) return -ENOMEM; + for (i = 0; i < num_deps; ++i) { - p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); - if (!p->post_dep_syncobjs[i]) + p->post_dep_syncobjs[i].post_dep_syncobj = + drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_dep_syncobjs[i].post_dep_syncobj) return -EINVAL; + p->post_dep_syncobjs[i].point = 0; p->num_post_dep_syncobjs++; } + + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk + *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + + p->post_dep_syncobjs = kmalloc_array(num_deps, + sizeof(struct amdgpu_cs_syncobj_post_dep), + GFP_KERNEL); + p->num_post_dep_syncobjs = 0; + + if (!p->post_dep_syncobjs) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { + p->post_dep_syncobjs[i].post_dep_syncobj = + drm_syncobj_find(p->filp, syncobj_deps[i].handle); + if (!p->post_dep_syncobjs[i].post_dep_syncobj) + return -EINVAL; + p->post_dep_syncobjs[i].point = syncobj_deps[i].point; + p->num_post_dep_syncobjs++; + } + return 0; } @@ -1168,18 +1236,32 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, chunk = &p->chunks[i]; - if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { + switch (chunk->chunk_id) { + case AMDGPU_CHUNK_ID_DEPENDENCIES: r = amdgpu_cs_process_fence_dep(p, chunk); if (r) return r; - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: r = amdgpu_cs_process_syncobj_in_dep(p, chunk); if (r) return r; - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: r = amdgpu_cs_process_syncobj_out_dep(p, chunk); if (r) return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); + if (r) + return r; + break; } } @@ -1191,7 +1273,8 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) int i; for (i = 0; i < p->num_post_dep_syncobjs; ++i) - drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence); + drm_syncobj_replace_fence(p->post_dep_syncobjs[i].post_dep_syncobj, + p->post_dep_syncobjs[i].point, p->fence); } static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1ceec56de015..44a34a77f986 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -517,6 +517,8 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 #define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x07 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x08 struct drm_amdgpu_cs_chunk { __u32 chunk_id; @@ -592,6 +594,13 @@ struct drm_amdgpu_cs_chunk_sem { __u32 handle; }; +struct drm_amdgpu_cs_chunk_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + + #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx