Re: [PATCH 6/6] drm/gud: Use the shadow plane helper

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi

Am 23.11.22 um 13:37 schrieb Noralf Trønnes:


Den 23.11.2022 11.38, skrev Thomas Zimmermann:
Hi

Am 22.11.22 um 21:58 schrieb Noralf Trønnes via B4 Submission Endpoint:
From: Noralf Trønnes <noralf@xxxxxxxxxxx>

Use the shadow plane helper to take care of preparing the framebuffer for
CPU access. The synchronous flushing is now done inline without the
use of
a worker. The async path now uses a shadow buffer to hold framebuffer
changes and it doesn't read the framebuffer behind userspace's back
anymore.

Signed-off-by: Noralf Trønnes <noralf@xxxxxxxxxxx>
---
   drivers/gpu/drm/gud/gud_drv.c      |  1 +
   drivers/gpu/drm/gud/gud_internal.h |  1 +
   drivers/gpu/drm/gud/gud_pipe.c     | 69
++++++++++++++++++++++++--------------
   3 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/gud/gud_drv.c
b/drivers/gpu/drm/gud/gud_drv.c
index d57dab104358..5aac7cda0505 100644
--- a/drivers/gpu/drm/gud/gud_drv.c
+++ b/drivers/gpu/drm/gud/gud_drv.c
@@ -365,6 +365,7 @@ static void gud_debugfs_init(struct drm_minor *minor)
   static const struct drm_simple_display_pipe_funcs gud_pipe_funcs = {
       .check      = gud_pipe_check,
       .update        = gud_pipe_update,
+    DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS
   };
     static const struct drm_mode_config_funcs gud_mode_config_funcs = {
diff --git a/drivers/gpu/drm/gud/gud_internal.h
b/drivers/gpu/drm/gud/gud_internal.h
index e351a1f1420d..0d148a6f27aa 100644
--- a/drivers/gpu/drm/gud/gud_internal.h
+++ b/drivers/gpu/drm/gud/gud_internal.h
@@ -43,6 +43,7 @@ struct gud_device {
       struct drm_framebuffer *fb;
       struct drm_rect damage;
       bool prev_flush_failed;
+    void *shadow_buf;
   };
     static inline struct gud_device *to_gud_device(struct drm_device
*drm)
diff --git a/drivers/gpu/drm/gud/gud_pipe.c
b/drivers/gpu/drm/gud/gud_pipe.c
index dfada6eedc58..7686325f7ee7 100644
--- a/drivers/gpu/drm/gud/gud_pipe.c
+++ b/drivers/gpu/drm/gud/gud_pipe.c
@@ -358,10 +358,10 @@ static void gud_flush_damage(struct gud_device
*gdrm, struct drm_framebuffer *fb
   void gud_flush_work(struct work_struct *work)
   {
       struct gud_device *gdrm = container_of(work, struct gud_device,
work);
-    struct iosys_map gem_map = { }, fb_map = { };
+    struct iosys_map shadow_map;
       struct drm_framebuffer *fb;
       struct drm_rect damage;
-    int idx, ret;
+    int idx;
         if (!drm_dev_enter(&gdrm->drm, &idx))
           return;
@@ -369,6 +369,7 @@ void gud_flush_work(struct work_struct *work)
       mutex_lock(&gdrm->damage_lock);
       fb = gdrm->fb;
       gdrm->fb = NULL;
+    iosys_map_set_vaddr(&shadow_map, gdrm->shadow_buf);
       damage = gdrm->damage;
       gud_clear_damage(gdrm);
       mutex_unlock(&gdrm->damage_lock);
@@ -376,33 +377,33 @@ void gud_flush_work(struct work_struct *work)
       if (!fb)
           goto out;
   -    ret = drm_gem_fb_vmap(fb, &gem_map, &fb_map);
-    if (ret)
-        goto fb_put;
+    gud_flush_damage(gdrm, fb, &shadow_map, true, &damage);
   -    ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
-    if (ret)
-        goto vunmap;
-
-    /* Imported buffers are assumed to be WriteCombined with uncached
reads */
-    gud_flush_damage(gdrm, fb, &fb_map, !fb->obj[0]->import_attach,
&damage);
-
-    drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
-vunmap:
-    drm_gem_fb_vunmap(fb, &gem_map);
-fb_put:
       drm_framebuffer_put(fb);
   out:
       drm_dev_exit(idx);
   }
   -static void gud_fb_queue_damage(struct gud_device *gdrm, struct
drm_framebuffer *fb,
-                struct drm_rect *damage)
+static int gud_fb_queue_damage(struct gud_device *gdrm, struct
drm_framebuffer *fb,
+                   const struct iosys_map *map, struct drm_rect *damage)
   {
       struct drm_framebuffer *old_fb = NULL;
+    struct iosys_map shadow_map;
         mutex_lock(&gdrm->damage_lock);
   +    if (!gdrm->shadow_buf) {
+        gdrm->shadow_buf = vzalloc(fb->pitches[0] * fb->height);
+        if (!gdrm->shadow_buf) {
+            mutex_unlock(&gdrm->damage_lock);
+            return -ENOMEM;
+        }
+    }
+
+    iosys_map_set_vaddr(&shadow_map, gdrm->shadow_buf);
+    iosys_map_incr(&shadow_map, drm_fb_clip_offset(fb->pitches[0],
fb->format, damage));
+    drm_fb_memcpy(&shadow_map, fb->pitches, map, fb, damage);
+

It's all good (sans the begin_cpu_access issue) in terms of code, but
the memcpy here can have quite an impact on performace.


Yes if the gud display is the only one in the rendering loop it's not
good. This is on an old HP laptop running the async path (RGB565 over USB):

$ modetest -M gud -s 36:1920x1080 -v
setting mode 1920x1080-60.00Hz on connectors 36, crtc 33
freq: 414.56Hz
freq: 439.71Hz

This is the normal path:

$ modetest -M gud -s 36:1920x1080 -v
setting mode 1920x1080-60.00Hz on connectors 36, crtc 33
freq: 18.11Hz
freq: 17.98Hz

So why not just nuke the async code entirely? It's a userspace problem
and we don't have many(/any?) other drivers with such a workaround. Udl,
our other usb-based driver, works well enough without.


I want to do it gradually so I did consider switching the default to
synchronous flushing in this patchset. I think maybe I should just do
that since you say it "works well enough".

I took the async idea from tiny/gm12u320 and Hans explained the need for
the async worker. He's well versed with userspace so I did the same. But
that is 3 years ago and things change. My long term plan is to drop the
async path, but I want to keep it until I know there are no problems.

If the performace is a fundamental problem it may deserve a DRM-wide solution. I once looked into that briefly for udl, but it didn't seem to be that important then.

Personally, I'd rather push this problem to userspace.


I've seen some reports about the udl driver having horrible performance,
but I haven't seen any explanations as to why. It could be because GNOME
until recently[1] didn't provide damage info to the kernel.

We recently made quite a few improvements to udl, with the most important for performance probably being commit 0a80005d3c5f ("drm/udl: Enable damage clipping"). Without, each screen update transfered a full frame to the adapter. OTOH trying to transfer full-size FullHD frames over USB2 will still be slow.


The GNOME rendering loop issue was solved in 3.38[2]

Ah, I see. Thanks.

Best regards
Thomas


Noralf.

[1] https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/1879
[2]
https://blogs.gnome.org/shell-dev/2020/07/02/splitting-up-the-frame-clock/

Best regards
Thomas

       if (fb != gdrm->fb) {
           old_fb = gdrm->fb;
           drm_framebuffer_get(fb);
@@ -420,6 +421,26 @@ static void gud_fb_queue_damage(struct gud_device
*gdrm, struct drm_framebuffer
         if (old_fb)
           drm_framebuffer_put(old_fb);
+
+    return 0;
+}
+
+static void gud_fb_handle_damage(struct gud_device *gdrm, struct
drm_framebuffer *fb,
+                 const struct iosys_map *map, struct drm_rect *damage)
+{
+    int ret;
+
+    if (gdrm->flags & GUD_DISPLAY_FLAG_FULL_UPDATE)
+        drm_rect_init(damage, 0, 0, fb->width, fb->height);
+
+    if (gud_async_flush) {
+        ret = gud_fb_queue_damage(gdrm, fb, map, damage);
+        if (ret != -ENOMEM)
+            return;
+    }
+
+    /* Imported buffers are assumed to be WriteCombined with uncached
reads */
+    gud_flush_damage(gdrm, fb, map, !fb->obj[0]->import_attach, damage);
   }
     int gud_pipe_check(struct drm_simple_display_pipe *pipe,
@@ -544,6 +565,7 @@ void gud_pipe_update(struct
drm_simple_display_pipe *pipe,
       struct drm_device *drm = pipe->crtc.dev;
       struct gud_device *gdrm = to_gud_device(drm);
       struct drm_plane_state *state = pipe->plane.state;
+    struct drm_shadow_plane_state *shadow_plane_state =
to_drm_shadow_plane_state(state);
       struct drm_framebuffer *fb = state->fb;
       struct drm_crtc *crtc = &pipe->crtc;
       struct drm_rect damage;
@@ -557,6 +579,8 @@ void gud_pipe_update(struct
drm_simple_display_pipe *pipe,
               gdrm->fb = NULL;
           }
           gud_clear_damage(gdrm);
+        vfree(gdrm->shadow_buf);
+        gdrm->shadow_buf = NULL;
           mutex_unlock(&gdrm->damage_lock);
       }
   @@ -572,13 +596,8 @@ void gud_pipe_update(struct
drm_simple_display_pipe *pipe,
       if (crtc->state->active_changed)
           gud_usb_set_u8(gdrm, GUD_REQ_SET_DISPLAY_ENABLE,
crtc->state->active);
   -    if (drm_atomic_helper_damage_merged(old_state, state, &damage)) {
-        if (gdrm->flags & GUD_DISPLAY_FLAG_FULL_UPDATE)
-            drm_rect_init(&damage, 0, 0, fb->width, fb->height);
-        gud_fb_queue_damage(gdrm, fb, &damage);
-        if (!gud_async_flush)
-            flush_work(&gdrm->work);
-    }
+    if (drm_atomic_helper_damage_merged(old_state, state, &damage))
+        gud_fb_handle_damage(gdrm, fb, &shadow_plane_state->data[0],
&damage);
         if (!crtc->state->enable)
           gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 0);



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev

Attachment: OpenPGP_signature
Description: OpenPGP digital signature


[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux