Re: [PATCH libdrm] libdrm_amdgpu: add kernel semaphore support

Christian König <deathsimple@xxxxxxxxxxx> · Fri, 7 Jul 2017 11:07:37 +0200

Hi Dave,

on first glance that looks rather good to me, but there is one things I 
don't really like and I strongly think Marek will absolutely agree on 
that: When we add a new CS function then let's get ride of all this 
abstraction!

The new function should get an amdgpu_device_handle and a list of chunks 
to submit, nothing else.

When then provide helper functions to generate the chunks out of the 
existing amdgpu_context_handle and amdgpu_bo_list_handle.

That should be perfectly sufficient and extensible for future additions 
as well.

Regards,
Christian.

Am 07.07.2017 um 00:19 schrieb Dave Airlie:
Chrstian,

you are probably the best person to ack this, I'd like to get the radv
code landed
and allow the GL code to get going.

Dave.

This adds kernel semaphore support to the command submission
interface in what should be a backwards compatible manner,
it adds a new command submission API.

Signed-off-by: Dave Airlie <airlied@xxxxxxxxxx>
---
  amdgpu/amdgpu.h    |  29 ++++++++++++-
  amdgpu/amdgpu_cs.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++----
  2 files changed, 138 insertions(+), 9 deletions(-)

diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 1901fa8..649b66e 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -369,6 +369,16 @@ struct amdgpu_cs_request {
         struct amdgpu_cs_fence_info fence_info;
  };

+struct amdgpu_cs_request_syncobj {
+       /*
+        *
+        */
+       uint32_t number_in_syncobj;
+       uint32_t number_out_syncobj;
+       uint32_t *in_syncobj;
+       uint32_t *out_syncobj;
+};
+
  /**
   * Structure which provide information about GPU VM MC Address space
   * alignments requirements
@@ -886,6 +896,12 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
                      struct amdgpu_cs_request *ibs_request,
                      uint32_t number_of_requests);

+int amdgpu_cs_submit_syncobj(amdgpu_context_handle context,
+                            uint64_t flags,
+                            struct amdgpu_cs_request *ibs_request,
+                            struct amdgpu_cs_request_syncobj *ibs_syncobj,
+                            uint32_t number_of_requests);
+
  /**
   *  Query status of Command Buffer Submission
   *
@@ -1328,8 +1344,19 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
  */
  const char *amdgpu_get_marketing_name(amdgpu_device_handle dev);

+
+int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
+                            uint32_t *syncobj);
+int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
+                            uint32_t syncobj,
+                            int *shared_fd);
+int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
+                            int shared_fd,
+                            uint32_t *syncobj);
+int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
+                             uint32_t syncobj);
+
  #ifdef __cplusplus
  }
  #endif
-
  #endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
index 868eb7b..339c5f9 100644
--- a/amdgpu/amdgpu_cs.c
+++ b/amdgpu/amdgpu_cs.c
@@ -168,7 +168,8 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
   * \sa amdgpu_cs_submit()
  */
  static int amdgpu_cs_submit_one(amdgpu_context_handle context,
-                               struct amdgpu_cs_request *ibs_request)
+                               struct amdgpu_cs_request *ibs_request,
+                               struct amdgpu_cs_request_syncobj *syncobj_request)
  {
         union drm_amdgpu_cs cs;
         uint64_t *chunk_array;
@@ -176,10 +177,13 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
         struct drm_amdgpu_cs_chunk_data *chunk_data;
         struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
         struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
+       struct drm_amdgpu_cs_chunk_sem *in_syncobj_dependencies = NULL;
+       struct drm_amdgpu_cs_chunk_sem *out_syncobj_dependencies = NULL;
         struct list_head *sem_list;
         amdgpu_semaphore_handle sem, tmp;
-       uint32_t i, size, sem_count = 0;
+       uint32_t i, j, size, sem_count = 0;
         bool user_fence;
+       uint32_t sem_size = 0;
         int r = 0;

         if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
@@ -194,7 +198,11 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
         }
         user_fence = (ibs_request->fence_info.handle != NULL);

-       size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
+       if (syncobj_request) {
+               sem_size += syncobj_request->number_in_syncobj ? 1 : 0;
+               sem_size += syncobj_request->number_out_syncobj ? 1 : 0;
+       }
+       size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1 + sem_size;

         chunk_array = alloca(sizeof(uint64_t) * size);
         chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
@@ -306,6 +314,45 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
                 chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
         }

+       if (syncobj_request) {
+               if (syncobj_request->number_in_syncobj) {
+                       in_syncobj_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * syncobj_request->number_in_syncobj);
+                       if (!in_syncobj_dependencies) {
+                               r = -ENOMEM;
+                               goto error_unlock;
+                       }
+                       for (j = 0; j < syncobj_request->number_in_syncobj; j++) {
+                               struct drm_amdgpu_cs_chunk_sem *dep = &in_syncobj_dependencies[j];
+                               dep->handle = syncobj_request->in_syncobj[j];
+                       }
+                       i = cs.in.num_chunks++;
+
+                       /* dependencies chunk */
+                       chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
+                       chunks[i].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN;
+                       chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * syncobj_request->number_in_syncobj;
+                       chunks[i].chunk_data = (uint64_t)(uintptr_t)in_syncobj_dependencies;
+               }
+               if (syncobj_request->number_out_syncobj) {
+                       out_syncobj_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * syncobj_request->number_out_syncobj);
+                       if (!out_syncobj_dependencies) {
+                               r = -ENOMEM;
+                               goto error_unlock;
+                       }
+                       for (j = 0; j < syncobj_request->number_out_syncobj; j++) {
+                               struct drm_amdgpu_cs_chunk_sem *dep = &out_syncobj_dependencies[j];
+                               dep->handle = syncobj_request->out_syncobj[j];
+                       }
+                       i = cs.in.num_chunks++;
+
+                       /* dependencies chunk */
+                       chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
+                       chunks[i].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT;
+                       chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * syncobj_request->number_out_syncobj;
+                       chunks[i].chunk_data = (uint64_t)(uintptr_t)out_syncobj_dependencies;
+               }
+       }
+
         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
                                 &cs, sizeof(cs));
         if (r)
@@ -317,31 +364,48 @@ error_unlock:
         pthread_mutex_unlock(&context->sequence_mutex);
         free(dependencies);
         free(sem_dependencies);
+       free(in_syncobj_dependencies);
+       free(out_syncobj_dependencies);
         return r;
  }

-int amdgpu_cs_submit(amdgpu_context_handle context,
-                    uint64_t flags,
-                    struct amdgpu_cs_request *ibs_request,
-                    uint32_t number_of_requests)
+int amdgpu_cs_submit_syncobj(amdgpu_context_handle context,
+                            uint64_t flags,
+                            struct amdgpu_cs_request *ibs_request,
+                            struct amdgpu_cs_request_syncobj *ibs_syncobj,
+                            uint32_t number_of_requests)
  {
         uint32_t i;
         int r;
+       bool has_syncobj = ibs_syncobj ? true : false;

         if (!context || !ibs_request)
                 return -EINVAL;

         r = 0;
         for (i = 0; i < number_of_requests; i++) {
-               r = amdgpu_cs_submit_one(context, ibs_request);
+               r = amdgpu_cs_submit_one(context, ibs_request, has_syncobj ? ibs_syncobj : NULL);
                 if (r)
                         break;
                 ibs_request++;
+               if (has_syncobj)
+                       ibs_syncobj++;
         }

         return r;
  }

+int amdgpu_cs_submit(amdgpu_context_handle context,
+                    uint64_t flags,
+                    struct amdgpu_cs_request *ibs_request,
+                    uint32_t number_of_requests)
+{
+       return amdgpu_cs_submit_syncobj(context, flags,
+                                       ibs_request, NULL,
+                                       number_of_requests);
+}
+
+
  /**
   * Calculate absolute timeout.
   *
@@ -596,3 +660,41 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
  {
         return amdgpu_cs_unreference_sem(sem);
  }
+
+int amdgpu_cs_create_syncobj(amdgpu_device_handle dev,
+                            uint32_t *handle)
+{
+       if (NULL == dev)
+               return -EINVAL;
+
+       return drmSyncobjCreate(dev->fd, 0, handle);
+}
+
+int amdgpu_cs_destroy_syncobj(amdgpu_device_handle dev,
+                             uint32_t handle)
+{
+       if (NULL == dev)
+               return -EINVAL;
+
+       return drmSyncobjDestroy(dev->fd, handle);
+}
+
+int amdgpu_cs_export_syncobj(amdgpu_device_handle dev,
+                            uint32_t handle,
+                            int *shared_fd)
+{
+       if (NULL == dev)
+               return -EINVAL;
+
+       return drmSyncobjHandleToFD(dev->fd, handle, shared_fd);
+}
+
+int amdgpu_cs_import_syncobj(amdgpu_device_handle dev,
+                            int shared_fd,
+                            uint32_t *handle)
+{
+       if (NULL == dev)
+               return -EINVAL;
+
+       return drmSyncobjFDToHandle(dev->fd, shared_fd, handle);
+}
--
2.9.4

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel