From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Running the Cyberpunk 2077 benchmark we can observe that waiting on DRM sycobjs is relatively hot, but the 96% of the calls are for a single object. (~4% for two points, and never more than three points. While a more trivial workload like vkmark under Plasma is even more skewed to single point waits.) Therefore lets add a fast path to bypass the kcalloc/kfree and use a pre- allocated stack array for those cases. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> --- drivers/gpu/drm/drm_syncobj.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index b4563c696056..94932b89298f 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1035,6 +1035,7 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint32_t *idx, ktime_t *deadline) { + struct syncobj_wait_entry stack_entries[4]; struct syncobj_wait_entry *entries; uint32_t signaled_count, i; struct dma_fence *fence; @@ -1049,9 +1050,14 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, !access_ok(user_points, count * sizeof(*user_points))) return -EFAULT; - entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); - if (!entries) - return -ENOMEM; + if (count > ARRAY_SIZE(stack_entries)) { + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + } else { + memset(stack_entries, 0, sizeof(stack_entries)); + entries = stack_entries; + } /* Walk the list of sync objects and initialize entries. We do * this up-front so that we can properly return -EINVAL if there is @@ -1174,7 +1180,9 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, &entries[i].fence_cb); dma_fence_put(entries[i].fence); } - kfree(entries); + + if (entries != stack_entries) + kfree(entries); return timeout; } -- 2.47.1