Re: [PATCH v2 01/08] drm/amdgpu: Implement userqueue signal/wait IOCTL

"Paneer Selvam, Arunpravin" <arunpravin.paneerselvam@xxxxxxx> · Thu, 26 Sep 2024 15:01:51 +0530

Hi Christian,

On 9/26/2024 2:57 PM, Christian König wrote:
Am 25.09.24 um 21:59 schrieb Arunpravin Paneer Selvam:
[SNIP]
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+                struct drm_file *filp)
+{
+    struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+    struct drm_amdgpu_userq_wait *wait_info = data;
+    u32 *syncobj_handles, *bo_handles;
+    struct dma_fence **fences = NULL;
+    u32 num_syncobj, num_bo_handles;
+    struct drm_gem_object **gobj;
+    struct drm_exec exec;
+    int r, i, entry, cnt;
+    u64 num_fences = 0;
+
+    num_bo_handles = wait_info->num_bo_handles;
+    bo_handles = 
memdup_user(u64_to_user_ptr(wait_info->bo_handles_array),
+                 sizeof(u32) * num_bo_handles);
+    if (IS_ERR(bo_handles))
+        return PTR_ERR(bo_handles);
+
+    num_syncobj = wait_info->num_syncobj_handles;
+    syncobj_handles = 
memdup_user(u64_to_user_ptr(wait_info->syncobj_handles_array),
+                      sizeof(u32) * num_syncobj);
+    if (IS_ERR(syncobj_handles)) {
+        r = PTR_ERR(syncobj_handles);
+        goto free_bo_handles;
+    }
+
+    /* Array of GEM object handles */
+    gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
+    if (!gobj) {
+        r = -ENOMEM;
+        goto free_syncobj_handles;
+    }
+
+    for (entry = 0; entry < num_bo_handles; entry++) {
+        gobj[entry] = drm_gem_object_lookup(filp, bo_handles[entry]);
+        if (!gobj[entry]) {
+            r = -ENOENT;
+            goto put_gobj;
+        }
+    }
+
+    drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+    drm_exec_until_all_locked(&exec) {
+        r = drm_exec_prepare_array(&exec, gobj, num_bo_handles, 0);
+        drm_exec_retry_on_contention(&exec);
+        if (r) {
+            drm_exec_fini(&exec);
+            goto put_gobj;
+        }
+    }
+
+    if (!wait_info->num_fences) {
+        /* Count syncobj's fence */
+        for (i = 0; i < num_syncobj; i++) {
+            struct dma_fence *fence;
+
+            r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+                           0, 0, &fence);
+            dma_fence_put(fence);
+
+            if (r || !fence)
+                continue;
+
+            num_fences++;
+        }
+
+        /* Count GEM objects fence */
+        for (i = 0; i < num_bo_handles; i++) {
+            struct dma_resv_iter resv_cursor;
+            struct dma_fence *fence;
+
+            dma_resv_for_each_fence(&resv_cursor, gobj[i]->resv,
+ dma_resv_usage_rw(wait_info->bo_wait_flags &
+                        AMDGPU_USERQ_BO_WRITE), fence)
+                num_fences++;

We should probably adjust the UAPI here once more.

The problem is that we only provide the AMDGPU_USERQ_BO_WRITE for the 
whole IOCTL instead of per BO.

So the best approach would probably be to drop the 
AMDGPU_USERQ_BO_WRITE flag and split up the array of BOs into readers 
and writers.

Can you work on that Arun? Shouldn't be more than a bit typing exercise.
Sure, I will modify and send the next version of this file.

Thanks,
Arun.

Thanks,
Christian.