Previously, in staging/sm7xxfb (now fbdev/sm712fb), 2D acceleration was implemented, but after its submission, a critical bug that causes total system hang was discovered, as a stopgap measure, 2D ops was completele removed in commit 3af805735a25 ("staging: sm7xx: remove the buggy 2D acceleration support") and never implemented again. It created a massive usability problem - on YeeLoong 8089, a notable MIPS platform which uses SM712 - even scrolling a single line of text on the console required an unaccelerated screen redraw, running "dmesg" typically takes 8-11 seconds, and absurdly, printf(), became a significant performance bottleneck that slows down GCC and "make", make the computer largely unusable. So I decided to take a look. Most of the my actual development was done in 2014 in a personal out-of-tree driver, I did not mainline it because 2D acceleration was not working properly in 24-bit color. I discovered the solution in early 2019 and now it's ready to be mainlined. This commit reimplements the 2D acceleration for sm712fb. Unlike the original implementation, which was messy and unnecessarily complicated by calling a 2D acceleration wrapper file with many unneeded functions, this is a minimum and (relatively) clean implementation. My tests have shown that running "dmesg" only takes 0.9 seconds, a performance boost of 950%. System hangs did not occur in my tests. Currently, 2D acceleration is only supported on little-endian CPUs, it's disabled on Big Endian systems as a safety measure, since I code for myself without any monetary or hardware support from any company or OEMs, I don't have the hardware and it's completely untested. I should be also to purchase a Big Endian test platform and add proper support soon. Finally, thanks to Miodrag Vallat and other OpenBSD developers, this work would be impossible without their code, that served as a reference implementation for me. As a historical note, though I'm not extremely sure, but I believe the source of the original system hang was a type punning. unsigned char ajRemain[4]; SMTC_write2Ddataport(0, *(unsigned long *)ajRemain); Type punning is undefined behavior in C, and here it caused an unaligned memory access, which is illegal on MIPS, thus crashing the computer. Signed-off-by: Yifeng Li <tomli@xxxxxxxx> --- drivers/video/fbdev/sm712.h | 3 + drivers/video/fbdev/sm712fb.c | 393 +++++++++++++++++++++++++++++++++- 2 files changed, 389 insertions(+), 7 deletions(-) diff --git a/drivers/video/fbdev/sm712.h b/drivers/video/fbdev/sm712.h index 4892fd485f08..ad63676d3d4f 100644 --- a/drivers/video/fbdev/sm712.h +++ b/drivers/video/fbdev/sm712.h @@ -135,7 +135,10 @@ static inline u8 smtc_seqr(u8 reg) #define DE_CTRL_HOST_SHIFT 22 #define DE_CTRL_HOST_SRC_IS_MONO 0x01 #define DE_CTRL_FORMAT_XY 0x00 +#define DE_CTRL_FORMAT_8BIT 0x00 +#define DE_CTRL_FORMAT_16BIT 0x10 #define DE_CTRL_FORMAT_24BIT 0x30 +#define DE_CTRL_FORMAT_32BIT 0x20 /* * 32-bit I/O for 2D opeartions. diff --git a/drivers/video/fbdev/sm712fb.c b/drivers/video/fbdev/sm712fb.c index fef5b076589c..75d60ea63883 100644 --- a/drivers/video/fbdev/sm712fb.c +++ b/drivers/video/fbdev/sm712fb.c @@ -47,6 +47,7 @@ #include <linux/module.h> #include <linux/console.h> #include <linux/screen_info.h> +#include <linux/delay.h> #include <linux/pm.h> @@ -62,11 +63,14 @@ struct smtcfb_info { u8 chip_rev_id; void __iomem *lfb; /* linear frame buffer */ + void __iomem *dp_port; /* drawing processor data port */ void __iomem *dp_regs; /* drawing processor control regs */ void __iomem *vp_regs; /* video processor control regs */ void __iomem *cp_regs; /* capture processor control regs */ void __iomem *mmio; /* memory map IO port */ + bool accel; /* whether to actually use drawing processor */ + u_int width; u_int height; u_int hz; @@ -75,6 +79,7 @@ struct smtcfb_info { }; void __iomem *smtc_regbaseaddress; /* Memory Map IO starting address */ +void __iomem *smtc_dprbaseaddress; /* DPR, 2D control registers */ static const struct fb_var_screeninfo smtcfb_var = { .xres = 1024, @@ -848,14 +853,21 @@ static const struct modeinit vgamode[] = { }, }; -static struct screen_info smtc_scr_info; +/* prototypes of two cross-referenced functions */ +static void smtcfb_reset_accel(void); +static int smtcfb_init_accel(struct smtcfb_info *fb); + +static struct screen_info smtc_scr_info; static char *mode_option; +static bool accel = true; /* can be ignored if not supported */ +static bool accel_status_reported; -/* process command line options, get vga parameter */ +/* process command line options, get vga and accel parameter */ static void __init sm7xx_vga_setup(char *options) { int i; + char *this_opt; if (!options || !*options) return; @@ -872,9 +884,20 @@ static void __init sm7xx_vga_setup(char *options) smtc_scr_info.lfb_height = vesa_mode_table[i].lfb_height; smtc_scr_info.lfb_depth = vesa_mode_table[i].lfb_depth; - return; + break; } } + + while ((this_opt = strsep(&options, ",")) != NULL) { + if (!*this_opt) + continue; + + if (!strcmp(this_opt, "accel:0")) + accel = false; + else if (!strcmp(this_opt, "accel:1")) + accel = true; + } + accel_status_reported = false; } static void sm712_setpalette(int regno, unsigned int red, unsigned int green, @@ -1361,7 +1384,42 @@ static void smtcfb_setmode(struct smtcfb_info *sfb) sfb->width = sfb->fb->var.xres; sfb->height = sfb->fb->var.yres; sfb->hz = 60; + + /* + * We reset the 2D engine twice, once before the modesetting, once + * after the modesetting (mandatory), since users may chance the + * mode on-the-fly. Just be safe. + */ + smtcfb_reset_accel(); + smtc_set_timing(sfb); + + /* + * Currently, 2D acceleration is only supported on SM712 with + * little-endian CPUs, it's disabled on Big Endian systems and SM720 + * chips as a safety measure. Since I don't have monetary or hardware + * support from any company or OEMs, I don't have the hardware and + * it's completely untested. I should be also to purchase a Big Endian + * test platform and add proper support soon. I still have to spend + * 200 USD+ to purchase this piece of 1998's hardware, yikes! If you + * have a Big-Endian platform with SM7xx available for testing, please + * send an E-mail to Tom, thanks! + */ +#ifdef __BIG_ENDIAN + sfb->accel = false; + if (accel) + dev_info(&sfb->pdev->dev, + "2D acceleration is unsupported on Big Endian.\n"); +#endif + if (!accel) { + sfb->accel = false; + dev_info(&sfb->pdev->dev, + "2D acceleration is disabled by the user.\n"); + } + + /* reset 2D engine after a modesetting is mandatory */ + smtcfb_reset_accel(); + smtcfb_init_accel(sfb); } static int smtc_check_var(struct fb_var_screeninfo *var, struct fb_info *info) @@ -1401,6 +1459,316 @@ static struct fb_ops smtcfb_ops = { .fb_write = smtcfb_write, }; +static int smtcfb_wait(struct smtcfb_info *fb) +{ + int i; + u8 reg; + + smtc_dprr(DPR_DE_CTRL); + for (i = 0; i < 10000; i++) { + reg = smtc_seqr(SCR_DE_STATUS); + if ((reg & SCR_DE_STATUS_MASK) == SCR_DE_ENGINE_IDLE) + return 0; + udelay(1); + } + dev_err(&fb->pdev->dev, "2D engine hang detected!\n"); + return -EBUSY; +} + +static void +smtcfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) +{ + u32 width = rect->width, height = rect->height; + u32 dx = rect->dx, dy = rect->dy; + u32 color; + + struct smtcfb_info *sfb = info->par; + + if (unlikely(info->state != FBINFO_STATE_RUNNING)) + return; + + if (unlikely(rect->rop != ROP_COPY)) { + /* + * It must be ROP_XOR. It's only used to combine a hardware + * cursor with the screen, and should never occur. Included + * for completeness. If one wants to implement hardware cursor + * (you don't, hardware only has RGB332 cursor), ROP2_XOR + * should be implemented here. + */ + cfb_fillrect(info, rect); + return; + } + + if ((rect->dx >= info->var.xres_virtual) || + (rect->dy >= info->var.yres_virtual)) + return; + + if (info->fix.visual == FB_VISUAL_TRUECOLOR || + info->fix.visual == FB_VISUAL_DIRECTCOLOR) + color = ((u32 *) (info->pseudo_palette))[rect->color]; + else + color = rect->color; + + if (sfb->fb->var.bits_per_pixel == 24) { + /* + * In 24-bit mode, all x, y coordinates and widths (but not + * height) must be multipiled by three. + */ + dx *= 3; + dy *= 3; + width *= 3; + + /* + * In 24-bit color mode, SOLIDFILL will sometimes put random + * color stripes of garbage on the screen, it seems to be a + * hardware bug. Alternatively, we initialize MONO_PATTERN_LOW + * & HIGH with 0xffffffff (all ones, and we have already set + * that in smtcfb_init_accel). Since the color of this mono + * pattern is controlled by DPR_FG_COLOR, BITBLTing it with + * ROP_COPY is effectively a rectfill(). + */ + smtc_dprw(DPR_FG_COLOR, color); + smtc_dprw(DPR_DST_COORDS, DPR_COORDS(dx, dy)); + smtc_dprw(DPR_SPAN_COORDS, DPR_COORDS(width, height)); + smtc_dprw(DPR_DE_CTRL, DE_CTRL_START | DE_CTRL_ROP2_SELECT | + DE_CTRL_ROP2_SRC_IS_PATTERN | + (DE_CTRL_COMMAND_BITBLT << + DE_CTRL_COMMAND_SHIFT) | + (DE_CTRL_ROP2_COPY << + DE_CTRL_ROP2_SHIFT)); + } else { + smtc_dprw(DPR_FG_COLOR, color); + smtc_dprw(DPR_DST_COORDS, DPR_COORDS(dx, dy)); + smtc_dprw(DPR_SPAN_COORDS, DPR_COORDS(width, height)); + smtc_dprw(DPR_DE_CTRL, DE_CTRL_START | DE_CTRL_ROP2_SELECT | + (DE_CTRL_COMMAND_SOLIDFILL << + DE_CTRL_COMMAND_SHIFT) | + (DE_CTRL_ROP2_COPY << + DE_CTRL_ROP2_SHIFT)); + } + smtcfb_wait(sfb); +} + +static void +smtcfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) +{ + u32 sx = area->sx, sy = area->sy; + u32 dx = area->dx, dy = area->dy; + u32 height = area->height, width = area->width; + u32 direction; + + struct smtcfb_info *sfb = info->par; + + if (unlikely(info->state != FBINFO_STATE_RUNNING)) + return; + if ((sx >= info->var.xres_virtual) || (sy >= info->var.yres_virtual)) + return; + + if (sy < dy || (sy == dy && sx <= dx)) { + sx += width - 1; + dx += width - 1; + sy += height - 1; + dy += height - 1; + direction = DE_CTRL_RTOL; + } else { + direction = 0; + } + + if (sfb->fb->var.bits_per_pixel == 24) { + sx *= 3; + sy *= 3; + dx *= 3; + dy *= 3; + width *= 3; + if (direction == DE_CTRL_RTOL) { + /* + * some hardware shenanigan from the original git + * commit, that is never clearly mentioned in the + * official datasheet. Not sure whether it even + * works correctly. + */ + sx += 2; + dx += 2; + } + } + + smtc_dprw(DPR_SRC_COORDS, DPR_COORDS(sx, sy)); + smtc_dprw(DPR_DST_COORDS, DPR_COORDS(dx, dy)); + smtc_dprw(DPR_SPAN_COORDS, DPR_COORDS(width, height)); + smtc_dprw(DPR_DE_CTRL, + DE_CTRL_START | DE_CTRL_ROP2_SELECT | direction | + (DE_CTRL_COMMAND_BITBLT << DE_CTRL_COMMAND_SHIFT) | + (DE_CTRL_ROP2_COPY << DE_CTRL_ROP2_SHIFT)); + smtcfb_wait(sfb); +} + +static void +smtcfb_imageblit(struct fb_info *info, const struct fb_image *image) +{ + u32 dx = image->dx, dy = image->dy; + u32 width = image->width, height = image->height; + u32 fg_color, bg_color; + + u32 total_bytes, total_dwords, leftovers; + u32 i; + u32 idx = 0; + u32 scanline = image->width >> 3; + + struct smtcfb_info *sfb = info->par; + + if (unlikely(info->state != FBINFO_STATE_RUNNING)) + return; + if ((image->dx >= info->var.xres_virtual) || + (image->dy >= info->var.yres_virtual)) + return; + + if (unlikely(image->depth != 1)) { + /* unsupported depth, fallback to draw Tux */ + cfb_imageblit(info, image); + return; + } + + if (info->fix.visual == FB_VISUAL_TRUECOLOR || + info->fix.visual == FB_VISUAL_DIRECTCOLOR) { + fg_color = ((u32 *) (info->pseudo_palette))[image->fg_color]; + bg_color = ((u32 *) (info->pseudo_palette))[image->bg_color]; + } else { + fg_color = image->fg_color; + bg_color = image->bg_color; + } + + /* total bytes we need to write */ + total_bytes = (width + 7) / 8; + total_dwords = (total_bytes & ~3) / 4; + leftovers = total_bytes & 3; + + if (sfb->fb->var.bits_per_pixel == 24) { + dx *= 3; + dy *= 3; + width *= 3; + } + smtc_dprw(DPR_SRC_COORDS, 0); + smtc_dprw(DPR_DST_COORDS, DPR_COORDS(dx, dy)); + smtc_dprw(DPR_SPAN_COORDS, DPR_COORDS(width, height)); + smtc_dprw(DPR_FG_COLOR, fg_color); + smtc_dprw(DPR_BG_COLOR, bg_color); + smtc_dprw(DPR_DE_CTRL, DE_CTRL_START | DE_CTRL_ROP2_SELECT | + (DE_CTRL_COMMAND_HOSTWRITE << DE_CTRL_COMMAND_SHIFT) | + (DE_CTRL_HOST_SRC_IS_MONO << DE_CTRL_HOST_SHIFT) | + (DE_CTRL_ROP2_COPY << DE_CTRL_ROP2_SHIFT)); + + for (i = 0; i < height; i++) { + iowrite32_rep(sfb->dp_port, &image->data[idx], total_dwords); + if (leftovers) { + /* + * We can set info->pixmap.scan_align/buf_align = 4 + * for automatic padding. But it would be sometimes + * incompatible with cfb_*(), especially imageblit() + * when depth = 1. In case we need to fallback (e.g. + * debugging), it would be inconvenient, so we pad it + * manually. + */ + iowrite32( + pad_to_dword( + &image->data[idx + total_dwords * 4], + leftovers), + sfb->dp_port); + } + idx += scanline; + } + smtcfb_wait(sfb); +} + +static void smtcfb_reset_accel(void) +{ + u8 reg; + + /* enable Zoom Video Port, 2D Drawing Engine and Video Processor */ + smtc_seqw(0x21, smtc_seqr(0x21) & 0xf8); + + /* abort pending 2D Drawing Engine operations */ + reg = smtc_seqr(0x15); + smtc_seqw(0x15, reg | 0x30); + smtc_seqw(0x15, reg); +} + +/* + * Function smtcfb_reset_accel(); should be called before calling + * this function + */ +static int smtcfb_init_accel(struct smtcfb_info *fb) +{ + + if (accel && !fb->accel) { + fb->fb->flags |= FBINFO_HWACCEL_NONE; + return 0; + } else if (!accel && !fb->accel) { + fb->fb->flags |= FBINFO_HWACCEL_DISABLED; + return 0; + } + + if (smtcfb_wait(fb) != 0) { + fb->fb->flags |= FBINFO_HWACCEL_NONE; + dev_err(&fb->pdev->dev, + "2D acceleration initialization failed!\n"); + fb->accel = false; + return -1; + } + + smtc_dprw(DPR_CROP_TOPLEFT_COORDS, DPR_COORDS(0, 0)); + + /* same width for DPR_PITCH and DPR_SRC_WINDOW */ + smtc_dprw(DPR_PITCH, DPR_COORDS(fb->fb->var.xres, fb->fb->var.xres)); + smtc_dprw(DPR_SRC_WINDOW, + DPR_COORDS(fb->fb->var.xres, fb->fb->var.xres)); + + switch (fb->fb->var.bits_per_pixel) { + case 8: + smtc_dprw_16(DPR_DE_FORMAT_SELECT, + DE_CTRL_FORMAT_XY | DE_CTRL_FORMAT_8BIT); + break; + case 16: + smtc_dprw_16(DPR_DE_FORMAT_SELECT, + DE_CTRL_FORMAT_XY | DE_CTRL_FORMAT_16BIT); + break; + case 24: + smtc_dprw_16(DPR_DE_FORMAT_SELECT, + DE_CTRL_FORMAT_XY | DE_CTRL_FORMAT_24BIT); + smtc_dprw(DPR_PITCH, + DPR_COORDS(fb->fb->var.xres * 3, + fb->fb->var.xres * 3)); + break; + case 32: + smtc_dprw_16(DPR_DE_FORMAT_SELECT, + DE_CTRL_FORMAT_XY | DE_CTRL_FORMAT_32BIT); + break; + } + + smtc_dprw(DPR_BYTE_BIT_MASK, 0xffffffff); + smtc_dprw(DPR_COLOR_COMPARE_MASK, 0); + smtc_dprw(DPR_COLOR_COMPARE, 0); + smtc_dprw(DPR_SRC_BASE, 0); + smtc_dprw(DPR_DST_BASE, 0); + smtc_dprw(DPR_MONO_PATTERN_LO32, 0xffffffff); + smtc_dprw(DPR_MONO_PATTERN_HI32, 0xffffffff); + smtc_dprr(DPR_DST_BASE); + + smtcfb_ops.fb_copyarea = smtcfb_copyarea; + smtcfb_ops.fb_fillrect = smtcfb_fillrect; + smtcfb_ops.fb_imageblit = smtcfb_imageblit; + fb->fb->flags |= FBINFO_HWACCEL_COPYAREA | + FBINFO_HWACCEL_FILLRECT | + FBINFO_HWACCEL_IMAGEBLIT | + FBINFO_READS_FAST; + + /* don't spam the kernel log after each modesetting */ + if (!accel_status_reported) + dev_info(&fb->pdev->dev, "2D acceleration is enabled.\n"); + accel_status_reported = true; + + return 0; +} + /* * Unmap in the memory mapped IO registers */ @@ -1599,10 +1967,14 @@ static int smtcfb_pci_probe(struct pci_dev *pdev, goto failed_fb; } - sfb->mmio = (smtc_regbaseaddress = - sfb->lfb + 0x00700000); + sfb->mmio = sfb->lfb + 0x00700000; + sfb->dp_port = sfb->lfb + 0x00400000; sfb->dp_regs = sfb->lfb + 0x00408000; sfb->vp_regs = sfb->lfb + 0x0040c000; + + smtc_regbaseaddress = sfb->mmio; + smtc_dprbaseaddress = sfb->dp_regs; + sfb->accel = accel; if (sfb->fb->var.bits_per_pixel == 32) { sfb->lfb += big_addr; dev_info(&pdev->dev, "sfb->lfb=%p\n", sfb->lfb); @@ -1623,9 +1995,13 @@ static int smtcfb_pci_probe(struct pci_dev *pdev, sfb->fb->fix.mmio_len = 0x00200000; sfb->dp_regs = ioremap(mmio_base, 0x00200000 + smem_size); sfb->lfb = sfb->dp_regs + 0x00200000; - sfb->mmio = (smtc_regbaseaddress = - sfb->dp_regs + 0x000c0000); + sfb->mmio = sfb->dp_regs + 0x000c0000; sfb->vp_regs = sfb->dp_regs + 0x800; + sfb->dp_port = sfb->dp_regs + 0x00006000; + + smtc_regbaseaddress = sfb->mmio; + smtc_dprbaseaddress = sfb->dp_regs; + sfb->accel = accel; smtc_seqw(0x62, 0xff); smtc_seqw(0x6a, 0x0d); @@ -1807,6 +2183,9 @@ static void __exit sm712fb_exit(void) module_exit(sm712fb_exit); +module_param(accel, bool, 0444); +MODULE_PARM_DESC(accel, "Use Acceleration (2D Drawing) Engine (default = 1)"); + MODULE_AUTHOR("Siliconmotion "); MODULE_DESCRIPTION("Framebuffer driver for SMI Graphic Cards"); MODULE_LICENSE("GPL"); -- 2.20.1