The bandwidth between system memory and VRAM is very limited on G200. So when using a 32bit framebuffer on system memory, convert it to 24bit when copying the frame to the VRAM, this allows to go 33% faster. Converting the format on the fly is negligible, even on low end CPU. small benchmark on my Dell T310: 1280x1024 32bits: ~125ms to transfert a single frame. 1280x1024 24bits: ~95ms Signed-off-by: Jocelyn Falempe <jfalempe@xxxxxxxxxx> --- drivers/gpu/drm/mgag200/mgag200_mode.c | 28 ++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index e3f0da338b95..a8d6b08bf959 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -289,6 +289,8 @@ void mgag200_set_mode_regs(struct mga_device *mdev, const struct drm_display_mod static u32 mgag200_calculate_offset(struct mga_device *mdev, const struct drm_framebuffer *fb) { + if (fb->format->format == DRM_FORMAT_XRGB8888) + return (fb->pitches[0] * 3) >> 6; return fb->pitches[0] >> 4; } @@ -314,17 +316,16 @@ void mgag200_set_format_regs(struct mga_device *mdev, const struct drm_format_in struct drm_device *dev = &mdev->base; unsigned int scale; u8 crtcext3, xmulctrl; + u8 cpp; switch (format->format) { case DRM_FORMAT_RGB565: xmulctrl = MGA1064_MUL_CTL_16bits; break; + case DRM_FORMAT_XRGB8888: /* use 24bit format in VRAM */ case DRM_FORMAT_RGB888: xmulctrl = MGA1064_MUL_CTL_24bits; break; - case DRM_FORMAT_XRGB8888: - xmulctrl = MGA1064_MUL_CTL_32_24bits; - break; default: /* BUG: We should have caught this problem already. */ drm_WARN_ON(dev, "invalid drm format\n"); @@ -346,8 +347,12 @@ void mgag200_set_format_regs(struct mga_device *mdev, const struct drm_format_in WREG_GFX(7, 0x0f); WREG_GFX(8, 0x0f); + cpp = format->cpp[0]; + if (cpp == 4) /* use 24 bit format in VRAM */ + cpp = 3; + /* scale is the number of bytes per pixels - 1 */ - scale = format->cpp[0] - 1; + scale = cpp - 1; RREG_ECRT(3, crtcext3); crtcext3 &= ~GENMASK(2, 0); @@ -403,8 +408,19 @@ static void mgag200_handle_damage(struct mga_device *mdev, const struct iosys_ma { struct iosys_map dst = IOSYS_MAP_INIT_VADDR_IOMEM(mdev->vram); - iosys_map_incr(&dst, drm_fb_clip_offset(fb->pitches[0], fb->format, clip)); - drm_fb_memcpy(&dst, fb->pitches, vmap, fb, clip); + if (fb->format->format == DRM_FORMAT_XRGB8888) { + /* use 24 bit format for VRAM, to save memory bandwidth, + * converting on the fly is much faster than sending the bytes + */ + u32 dst_pitch[3] = {(fb->pitches[0] * 3) / 4, + (fb->pitches[1] * 3) / 4, + (fb->pitches[2] * 3) / 4}; + iosys_map_incr(&dst, clip->y1 * dst_pitch[0] + clip->x1 * 3); + drm_fb_xrgb8888_to_rgb888(&dst, dst_pitch, vmap, fb, clip); + } else { + iosys_map_incr(&dst, drm_fb_clip_offset(fb->pitches[0], fb->format, clip)); + drm_fb_memcpy(&dst, fb->pitches, vmap, fb, clip); + } } /* -- 2.39.2