Am 20.09.2018 um 13:03 schrieb Chunming Zhou:
syncobj wait/signal operation is appending in command submission.
Signed-off-by: Chunming Zhou <david1.zhou@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 114 +++++++++++++++++++------
include/uapi/drm/amdgpu_drm.h | 10 +++
3 files changed, 104 insertions(+), 28 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 447c4c7a36d6..6e4a3db56833 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -975,6 +975,11 @@ struct amdgpu_cs_chunk {
void *kdata;
};
+struct amdgpu_cs_syncobj_post_dep {
+ struct drm_syncobj *post_dep_syncobj;
+ u64 point;
+};
+
struct amdgpu_cs_parser {
struct amdgpu_device *adev;
struct drm_file *filp;
@@ -1003,9 +1008,8 @@ struct amdgpu_cs_parser {
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
-
+ struct amdgpu_cs_syncobj_post_dep *post_dep_syncobjs;
unsigned num_post_dep_syncobjs;
- struct drm_syncobj **post_dep_syncobjs;
};
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 412fac238575..0efe75bf2f03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -204,6 +204,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
break;
default:
@@ -783,7 +785,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
&parser->validated);
for (i = 0; i < parser->num_post_dep_syncobjs; i++)
- drm_syncobj_put(parser->post_dep_syncobjs[i]);
+ drm_syncobj_put(parser->post_dep_syncobjs[i].post_dep_syncobj);
kfree(parser->post_dep_syncobjs);
dma_fence_put(parser->fence);
@@ -1098,11 +1100,13 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
}
static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
- uint32_t handle)
+ uint32_t handle, u64 point,
+ u64 flags)
{
int r;
struct dma_fence *fence;
- r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
+
+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
if (r)
return r;
@@ -1113,48 +1117,91 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
}
static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
+ struct amdgpu_cs_chunk *chunk,
+ bool timeline)
You should really just duplicate the function. The only think you have
in common here is allocating the post_dep_sync objects.
Would also prevent that we need to make the two chunk types mutual
exclusive.
Christian.
{
unsigned num_deps;
int i, r;
- struct drm_amdgpu_cs_chunk_sem *deps;
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
+ if (!timeline) {
+ struct drm_amdgpu_cs_chunk_sem *deps;
- for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+ deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
+ 0, 0);
if (r)
return r;
+ }
+ } else {
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+
+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add_to_sync(p, syncobj_deps[i].handle,
+ syncobj_deps[i].point,
+ syncobj_deps[i].flags);
+ if (r)
+ return r;
+ }
}
+
return 0;
}
static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
+ struct amdgpu_cs_chunk *chunk,
+ bool timeline)
{
unsigned num_deps;
int i;
struct drm_amdgpu_cs_chunk_sem *deps;
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+
+ if (!timeline) {
+ deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+ } else {
+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+ }
+
p->post_dep_syncobjs = kmalloc_array(num_deps,
- sizeof(struct drm_syncobj *),
+ sizeof(struct amdgpu_cs_syncobj_post_dep),
GFP_KERNEL);
p->num_post_dep_syncobjs = 0;
if (!p->post_dep_syncobjs)
return -ENOMEM;
- for (i = 0; i < num_deps; ++i) {
- p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
- if (!p->post_dep_syncobjs[i])
- return -EINVAL;
- p->num_post_dep_syncobjs++;
+
+ if (!timeline) {
+ for (i = 0; i < num_deps; ++i) {
+ p->post_dep_syncobjs[i].post_dep_syncobj =
+ drm_syncobj_find(p->filp, deps[i].handle);
+ if (!p->post_dep_syncobjs[i].post_dep_syncobj)
+ return -EINVAL;
+ p->post_dep_syncobjs[i].point = 0;
+ p->num_post_dep_syncobjs++;
+ }
+ } else {
+ for (i = 0; i < num_deps; ++i) {
+ p->post_dep_syncobjs[i].post_dep_syncobj =
+ drm_syncobj_find(p->filp, syncobj_deps[i].handle);
+ if (!p->post_dep_syncobjs[i].post_dep_syncobj)
+ return -EINVAL;
+ p->post_dep_syncobjs[i].point = syncobj_deps[i].point;
+ p->num_post_dep_syncobjs++;
+ }
}
+
return 0;
}
@@ -1168,18 +1215,32 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
chunk = &p->chunks[i];
- if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+ switch (chunk->chunk_id) {
+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
r = amdgpu_cs_process_fence_dep(p, chunk);
if (r)
return r;
- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
- r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+ r = amdgpu_cs_process_syncobj_in_dep(p, chunk, false);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+ r = amdgpu_cs_process_syncobj_out_dep(p, chunk, false);
if (r)
return r;
- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
- r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+ r = amdgpu_cs_process_syncobj_in_dep(p, chunk, true);
if (r)
return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ r = amdgpu_cs_process_syncobj_out_dep(p, chunk, true);
+ if (r)
+ return r;
+ break;
}
}
@@ -1191,7 +1252,8 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
int i;
for (i = 0; i < p->num_post_dep_syncobjs; ++i)
- drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence);
+ drm_syncobj_replace_fence(p->post_dep_syncobjs[i].post_dep_syncobj,
+ p->post_dep_syncobjs[i].point, p->fence);
}
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 1ceec56de015..412359b446f1 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -517,6 +517,8 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04
#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x07
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x08
struct drm_amdgpu_cs_chunk {
__u32 chunk_id;
@@ -592,6 +594,14 @@ struct drm_amdgpu_cs_chunk_sem {
__u32 handle;
};
+struct drm_amdgpu_cs_chunk_syncobj {
+ __u32 handle;
+ __u32 pad;
+ __u64 point;
+ __u64 flags;
+};
+
+
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel