Hi Am 23.11.22 um 13:37 schrieb Noralf Trønnes:
Den 23.11.2022 11.38, skrev Thomas Zimmermann:Hi Am 22.11.22 um 21:58 schrieb Noralf Trønnes via B4 Submission Endpoint:From: Noralf Trønnes <noralf@xxxxxxxxxxx> Use the shadow plane helper to take care of preparing the framebuffer for CPU access. The synchronous flushing is now done inline without the use of a worker. The async path now uses a shadow buffer to hold framebuffer changes and it doesn't read the framebuffer behind userspace's back anymore. Signed-off-by: Noralf Trønnes <noralf@xxxxxxxxxxx> --- drivers/gpu/drm/gud/gud_drv.c | 1 + drivers/gpu/drm/gud/gud_internal.h | 1 + drivers/gpu/drm/gud/gud_pipe.c | 69 ++++++++++++++++++++++++-------------- 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c index d57dab104358..5aac7cda0505 100644 --- a/drivers/gpu/drm/gud/gud_drv.c +++ b/drivers/gpu/drm/gud/gud_drv.c @@ -365,6 +365,7 @@ static void gud_debugfs_init(struct drm_minor *minor) static const struct drm_simple_display_pipe_funcs gud_pipe_funcs = { .check = gud_pipe_check, .update = gud_pipe_update, + DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS }; static const struct drm_mode_config_funcs gud_mode_config_funcs = { diff --git a/drivers/gpu/drm/gud/gud_internal.h b/drivers/gpu/drm/gud/gud_internal.h index e351a1f1420d..0d148a6f27aa 100644 --- a/drivers/gpu/drm/gud/gud_internal.h +++ b/drivers/gpu/drm/gud/gud_internal.h @@ -43,6 +43,7 @@ struct gud_device { struct drm_framebuffer *fb; struct drm_rect damage; bool prev_flush_failed; + void *shadow_buf; }; static inline struct gud_device *to_gud_device(struct drm_device *drm) diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c index dfada6eedc58..7686325f7ee7 100644 --- a/drivers/gpu/drm/gud/gud_pipe.c +++ b/drivers/gpu/drm/gud/gud_pipe.c @@ -358,10 +358,10 @@ static void gud_flush_damage(struct gud_device *gdrm, struct drm_framebuffer *fb void gud_flush_work(struct work_struct *work) { struct gud_device *gdrm = container_of(work, struct gud_device, work); - struct iosys_map gem_map = { }, fb_map = { }; + struct iosys_map shadow_map; struct drm_framebuffer *fb; struct drm_rect damage; - int idx, ret; + int idx; if (!drm_dev_enter(&gdrm->drm, &idx)) return; @@ -369,6 +369,7 @@ void gud_flush_work(struct work_struct *work) mutex_lock(&gdrm->damage_lock); fb = gdrm->fb; gdrm->fb = NULL; + iosys_map_set_vaddr(&shadow_map, gdrm->shadow_buf); damage = gdrm->damage; gud_clear_damage(gdrm); mutex_unlock(&gdrm->damage_lock); @@ -376,33 +377,33 @@ void gud_flush_work(struct work_struct *work) if (!fb) goto out; - ret = drm_gem_fb_vmap(fb, &gem_map, &fb_map); - if (ret) - goto fb_put; + gud_flush_damage(gdrm, fb, &shadow_map, true, &damage); - ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); - if (ret) - goto vunmap; - - /* Imported buffers are assumed to be WriteCombined with uncached reads */ - gud_flush_damage(gdrm, fb, &fb_map, !fb->obj[0]->import_attach, &damage); - - drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); -vunmap: - drm_gem_fb_vunmap(fb, &gem_map); -fb_put: drm_framebuffer_put(fb); out: drm_dev_exit(idx); } -static void gud_fb_queue_damage(struct gud_device *gdrm, struct drm_framebuffer *fb, - struct drm_rect *damage) +static int gud_fb_queue_damage(struct gud_device *gdrm, struct drm_framebuffer *fb, + const struct iosys_map *map, struct drm_rect *damage) { struct drm_framebuffer *old_fb = NULL; + struct iosys_map shadow_map; mutex_lock(&gdrm->damage_lock); + if (!gdrm->shadow_buf) { + gdrm->shadow_buf = vzalloc(fb->pitches[0] * fb->height); + if (!gdrm->shadow_buf) { + mutex_unlock(&gdrm->damage_lock); + return -ENOMEM; + } + } + + iosys_map_set_vaddr(&shadow_map, gdrm->shadow_buf); + iosys_map_incr(&shadow_map, drm_fb_clip_offset(fb->pitches[0], fb->format, damage)); + drm_fb_memcpy(&shadow_map, fb->pitches, map, fb, damage); +It's all good (sans the begin_cpu_access issue) in terms of code, but the memcpy here can have quite an impact on performace.Yes if the gud display is the only one in the rendering loop it's not good. This is on an old HP laptop running the async path (RGB565 over USB): $ modetest -M gud -s 36:1920x1080 -v setting mode 1920x1080-60.00Hz on connectors 36, crtc 33 freq: 414.56Hz freq: 439.71Hz This is the normal path: $ modetest -M gud -s 36:1920x1080 -v setting mode 1920x1080-60.00Hz on connectors 36, crtc 33 freq: 18.11Hz freq: 17.98HzSo why not just nuke the async code entirely? It's a userspace problem and we don't have many(/any?) other drivers with such a workaround. Udl, our other usb-based driver, works well enough without.I want to do it gradually so I did consider switching the default to synchronous flushing in this patchset. I think maybe I should just do that since you say it "works well enough". I took the async idea from tiny/gm12u320 and Hans explained the need for the async worker. He's well versed with userspace so I did the same. But that is 3 years ago and things change. My long term plan is to drop the async path, but I want to keep it until I know there are no problems.
If the performace is a fundamental problem it may deserve a DRM-wide solution. I once looked into that briefly for udl, but it didn't seem to be that important then.
Personally, I'd rather push this problem to userspace.
I've seen some reports about the udl driver having horrible performance, but I haven't seen any explanations as to why. It could be because GNOME until recently[1] didn't provide damage info to the kernel.
We recently made quite a few improvements to udl, with the most important for performance probably being commit 0a80005d3c5f ("drm/udl: Enable damage clipping"). Without, each screen update transfered a full frame to the adapter. OTOH trying to transfer full-size FullHD frames over USB2 will still be slow.
The GNOME rendering loop issue was solved in 3.38[2]
Ah, I see. Thanks. Best regards Thomas
Noralf. [1] https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/1879 [2] https://blogs.gnome.org/shell-dev/2020/07/02/splitting-up-the-frame-clock/Best regards Thomasif (fb != gdrm->fb) { old_fb = gdrm->fb; drm_framebuffer_get(fb); @@ -420,6 +421,26 @@ static void gud_fb_queue_damage(struct gud_device *gdrm, struct drm_framebuffer if (old_fb) drm_framebuffer_put(old_fb); + + return 0; +} + +static void gud_fb_handle_damage(struct gud_device *gdrm, struct drm_framebuffer *fb, + const struct iosys_map *map, struct drm_rect *damage) +{ + int ret; + + if (gdrm->flags & GUD_DISPLAY_FLAG_FULL_UPDATE) + drm_rect_init(damage, 0, 0, fb->width, fb->height); + + if (gud_async_flush) { + ret = gud_fb_queue_damage(gdrm, fb, map, damage); + if (ret != -ENOMEM) + return; + } + + /* Imported buffers are assumed to be WriteCombined with uncached reads */ + gud_flush_damage(gdrm, fb, map, !fb->obj[0]->import_attach, damage); } int gud_pipe_check(struct drm_simple_display_pipe *pipe, @@ -544,6 +565,7 @@ void gud_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_device *drm = pipe->crtc.dev; struct gud_device *gdrm = to_gud_device(drm); struct drm_plane_state *state = pipe->plane.state; + struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state); struct drm_framebuffer *fb = state->fb; struct drm_crtc *crtc = &pipe->crtc; struct drm_rect damage; @@ -557,6 +579,8 @@ void gud_pipe_update(struct drm_simple_display_pipe *pipe, gdrm->fb = NULL; } gud_clear_damage(gdrm); + vfree(gdrm->shadow_buf); + gdrm->shadow_buf = NULL; mutex_unlock(&gdrm->damage_lock); } @@ -572,13 +596,8 @@ void gud_pipe_update(struct drm_simple_display_pipe *pipe, if (crtc->state->active_changed) gud_usb_set_u8(gdrm, GUD_REQ_SET_DISPLAY_ENABLE, crtc->state->active); - if (drm_atomic_helper_damage_merged(old_state, state, &damage)) { - if (gdrm->flags & GUD_DISPLAY_FLAG_FULL_UPDATE) - drm_rect_init(&damage, 0, 0, fb->width, fb->height); - gud_fb_queue_damage(gdrm, fb, &damage); - if (!gud_async_flush) - flush_work(&gdrm->work); - } + if (drm_atomic_helper_damage_merged(old_state, state, &damage)) + gud_fb_handle_damage(gdrm, fb, &shadow_plane_state->data[0], &damage); if (!crtc->state->enable) gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 0);
-- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Maxfeldstr. 5, 90409 Nürnberg, Germany (HRB 36809, AG Nürnberg) Geschäftsführer: Ivo Totev
Attachment:
OpenPGP_signature
Description: OpenPGP digital signature