Am 30.09.24 um 13:59 schrieb Arunpravin Paneer Selvam:
Add user fence wait IOCTL timeline syncobj support.
v2:(Christian)
- handle dma_fence_wait() return value.
- shorten the variable name syncobj_timeline_points a bit.
- move num_points up to avoid padding issues.
v3:(Christian)
- Handle timeline drm_syncobj_find_fence() call error
handling
- Use dma_fence_unwrap_for_each() in timeline fence as
there could be more than one fence.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@xxxxxxx>
---
.../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 99 +++++++++++++++++--
include/uapi/drm/amdgpu_drm.h | 16 ++-
2 files changed, 107 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 8f9d2427d380..76552eed6d86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -24,6 +24,7 @@
#include <linux/kref.h>
#include <linux/slab.h>
+#include <linux/dma-fence-unwrap.h>
#include <drm/drm_exec.h>
#include <drm/drm_syncobj.h>
@@ -474,11 +475,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
+ u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles;
+ u32 num_syncobj, num_bo_handles, num_points;
struct drm_amdgpu_userq_fence_info *fence_info = NULL;
struct drm_amdgpu_userq_wait *wait_info = data;
- u32 *syncobj_handles, *bo_handles;
struct dma_fence **fences = NULL;
- u32 num_syncobj, num_bo_handles;
struct drm_gem_object **gobj;
struct drm_exec exec;
int r, i, entry, cnt;
@@ -498,11 +499,26 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
goto free_bo_handles;
}
+ num_points = wait_info->num_points;
+ timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_handles)) {
+ r = PTR_ERR(timeline_handles);
+ goto free_syncobj_handles;
+ }
+
+ timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_points)) {
+ r = PTR_ERR(timeline_points);
+ goto free_timeline_handles;
+ }
+
/* Array of GEM object handles */
gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
if (!gobj) {
r = -ENOMEM;
- goto free_syncobj_handles;
+ goto free_timeline_points;
}
for (entry = 0; entry < num_bo_handles; entry++) {
@@ -524,15 +540,40 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
}
if (!wait_info->num_fences) {
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto exec_fini;
+
+ num_fences++;
+
+ dma_fence_unwrap_for_each(f, &iter, fence)
+ num_fences++;
This duplicates the num_fences increment, just drop the first one since
fence is always included in the iteration as well.
+
+ dma_fence_put(fence);
+ }
+ }
+
/* Count syncobj's fence */
for (i = 0; i < num_syncobj; i++) {
struct dma_fence *fence;
r = drm_syncobj_find_fence(filp, syncobj_handles[i],
- 0, 0, &fence);
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
if (r)
goto exec_fini;
+ dma_fence_put(fence);
num_fences++;
dma_fence_put(fence);
}
@@ -588,12 +629,46 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
}
}
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
Same here.
Apart from that looks good to me. We could cleanup the code a bit more,
but that can come later on.
Regards,
Christian.
+
+ dma_fence_unwrap_for_each(f, &iter, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ dma_fence_get(f);
+ fences[num_fences++] = f;
+ }
+ }
+ }
+
/* Retrieve syncobj's fence */
for (i = 0; i < num_syncobj; i++) {
struct dma_fence *fence;
r = drm_syncobj_find_fence(filp, syncobj_handles[i],
- 0, 0, &fence);
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
if (r)
goto free_fences;
@@ -616,9 +691,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
* Just waiting on other driver fences should
* be good for now
*/
- dma_fence_wait(fences[i], false);
- dma_fence_put(fences[i]);
+ r = dma_fence_wait(fences[i], true);
+ if (r) {
+ dma_fence_put(fences[i]);
+ goto free_fences;
+ }
+ dma_fence_put(fences[i]);
continue;
}
@@ -664,6 +743,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
drm_gem_object_put(gobj[i]);
kfree(gobj);
+ kfree(timeline_points);
+ kfree(timeline_handles);
kfree(syncobj_handles);
kfree(bo_handles);
@@ -681,6 +762,10 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
while (entry-- > 0)
drm_gem_object_put(gobj[entry]);
kfree(gobj);
+free_timeline_points:
+ kfree(timeline_points);
+free_timeline_handles:
+ kfree(timeline_handles);
free_syncobj_handles:
kfree(syncobj_handles);
free_bo_handles:
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index af42798e901d..3b24e0cb1b51 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -521,12 +521,26 @@ struct drm_amdgpu_userq_wait {
* matching fence wait info pair in @userq_fence_info.
*/
__u32 bo_wait_flags;
- __u32 pad;
+ /**
+ * @num_points: A count that represents the number of timeline syncobj handles in
+ * syncobj_handles_array.
+ */
+ __u32 num_points;
/**
* @syncobj_handles_array: An array of syncobj handles defined to get the
* fence wait information of every syncobj handles in the array.
*/
__u64 syncobj_handles_array;
+ /**
+ * @syncobj_timeline_handles: An array of timeline syncobj handles defined to get the
+ * fence wait information of every timeline syncobj handles in the array.
+ */
+ __u64 syncobj_timeline_handles;
+ /**
+ * @syncobj_timeline_points: An array of timeline syncobj points defined to get the
+ * fence wait points of every timeline syncobj handles in the syncobj_handles_array.
+ */
+ __u64 syncobj_timeline_points;
/**
* @bo_handles_array: An array of GEM BO handles defined to fetch the fence
* wait information of every BO handles in the array.