Hi, I don't know who's currently "responsible" for the whirl&pinch plugin, so I post my patch to this list. I have modified whirl&pinch slightly to use "blocking", ie. doing all calculations in small squares (32*32). With that technique very common in numerical computing, the CPU caches (and for GIMP) the tile cache have a much higher hit rate. The boost is quite spectacular: The original whirl&pinch on a larger image (1400*1400) needs on a Athlon-600 30s to complete, with my patch only 6.5s. That's a speedup by a factor of 4.5 without any change in the algorithm itself! The changes are relatively small (effectively about 10 lines) and affect mostly clipping. I have found no side efects of the patch... The blocking can IMHO easily used for a lot of other filters, and should give a large speedup for most of GIMP's filters. Please try out the patch and apply it to the source tree if you like it ;-) -- Georg Acher, acher@xxxxxxxxx http://www.in.tum.de/~acher/ "Oh no, not again !" The bowl of petunias
--- whirlpinch.c.org Thu Apr 5 17:47:17 2001 +++ whirlpinch.c Thu Apr 5 18:49:09 2001 @@ -22,6 +22,14 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/* Version 2.10: + * + * Major Speedup by use of "blocking", ie. doing the calcualations + * in small squares, thus gaining a performance boost from CPU caches + * and the tile cache. + * + * Georg Acher, acher@xxxxxxxxx + */ /* Version 2.09: * @@ -63,7 +71,7 @@ #define PLUG_IN_NAME "plug_in_whirl_pinch" -#define PLUG_IN_VERSION "May 1997, 2.09" +#define PLUG_IN_VERSION "April 2001, 2.10" /***** Magic numbers *****/ @@ -71,6 +79,10 @@ #define SCALE_WIDTH 200 #define ENTRY_WIDTH 60 +/* blocking size, 32*32pixels is a good compromise for all CPUs */ + +#define BLOCKING 32 + /***** Types *****/ typedef struct @@ -366,12 +378,13 @@ guchar *top_row, *bot_row; guchar *top_p, *bot_p; gint row, col; + gint row1,col1; guchar pixel[4][4]; guchar values[4]; double whirl; double cx, cy; int ix, iy; - int i; + int i,n; guchar bg_color[4]; pixel_fetcher_t *pft, *pfb; @@ -406,112 +419,133 @@ whirl = wpvals.whirl * G_PI / 180; radius2 = radius * radius * wpvals.radius; - for (row = sel_y1; row <= ((sel_y1 + sel_y2) / 2); row++) + /* Whirl&Pinch in small squares to benefit from cache effects + (tile cache, CPU cache) + 20010405 GA + */ + for (row1 = sel_y1; row1 <= ((sel_y1 + sel_y2) / 2); row1+=BLOCKING) { - top_p = top_row; - bot_p = bot_row + img_bpp * (sel_width - 1); - - for (col = sel_x1; col < sel_x2; col++) - { - if (calc_undistorted_coords (col, row, whirl, wpvals.pinch, &cx, &cy)) - { - /* We are inside the distortion area */ - - /* Top */ - - if (cx >= 0.0) - ix = (int) cx; - else - ix = -((int) -cx + 1); - - if (cy >= 0.0) - iy = (int) cy; - else - iy = -((int) -cy + 1); - - pixel_fetcher_get_pixel (pft, ix, iy, pixel[0]); - pixel_fetcher_get_pixel (pft, ix + 1, iy, pixel[1]); - pixel_fetcher_get_pixel (pft, ix, iy + 1, pixel[2]); - pixel_fetcher_get_pixel (pft, ix + 1, iy + 1, pixel[3]); - - for (i = 0; i < img_bpp; i++) - { - values[0] = pixel[0][i]; - values[1] = pixel[1][i]; - values[2] = pixel[2][i]; - values[3] = pixel[3][i]; - - *top_p++ = bilinear (cx, cy, values); - } - - /* Bottom */ - - cx = cen_x + (cen_x - cx); - cy = cen_y + (cen_y - cy); - - if (cx >= 0.0) - ix = (int) cx; - else - ix = -((int) -cx + 1); - - if (cy >= 0.0) - iy = (int) cy; - else - iy = -((int) -cy + 1); - - pixel_fetcher_get_pixel (pfb, ix, iy, pixel[0]); - pixel_fetcher_get_pixel (pfb, ix + 1, iy, pixel[1]); - pixel_fetcher_get_pixel (pfb, ix, iy + 1, pixel[2]); - pixel_fetcher_get_pixel (pfb, ix + 1, iy + 1, pixel[3]); - - for (i = 0; i < img_bpp; i++) - { - values[0] = pixel[0][i]; - values[1] = pixel[1][i]; - values[2] = pixel[2][i]; - values[3] = pixel[3][i]; - - *bot_p++ = bilinear (cx, cy, values); - } - - bot_p -= 2 * img_bpp; /* We move backwards! */ - } - else - { - /* We are outside the distortion area; - * just copy the source pixels - */ - - /* Top */ - - pixel_fetcher_get_pixel (pft, col, row, pixel[0]); - - for (i = 0; i < img_bpp; i++) - *top_p++ = pixel[0][i]; - - /* Bottom */ - - pixel_fetcher_get_pixel (pfb, - (sel_x2 - 1) - (col - sel_x1), - (sel_y2 - 1) - (row - sel_y1), - pixel[0]); - - for (i = 0; i < img_bpp; i++) - *bot_p++ = pixel[0][i]; - - bot_p -= 2 * img_bpp; /* We move backwards! */ - } - } - - /* Paint rows to image */ - - gimp_pixel_rgn_set_row (&dest_rgn, top_row, sel_x1, row, sel_width); - gimp_pixel_rgn_set_row (&dest_rgn, bot_row, - sel_x1, (sel_y2 - 1) - (row - sel_y1), sel_width); - + for(col1 = sel_x1; col1 < sel_x2; col1+=BLOCKING) + { + /* Now whirl the block starting at (col1,row1) with size BLOCKING*BLOCKING */ + + for (row = row1; (row <= ((sel_y1 + sel_y2) / 2))&&(row<(row1+BLOCKING)); row++) + { + top_p = top_row; + bot_p = bot_row + img_bpp * (BLOCKING - 1); + + for (col = col1; (col < sel_x2)&&(col<(col1+BLOCKING)); col++) + { + if (calc_undistorted_coords (col, row, whirl, wpvals.pinch, &cx, &cy)) + { + /* We are inside the distortion area */ + + /* Top */ + + if (cx >= 0.0) + ix = (int) cx; + else + ix = -((int) -cx + 1); + + if (cy >= 0.0) + iy = (int) cy; + else + iy = -((int) -cy + 1); + + pixel_fetcher_get_pixel (pft, ix, iy, pixel[0]); + pixel_fetcher_get_pixel (pft, ix + 1, iy, pixel[1]); + pixel_fetcher_get_pixel (pft, ix, iy + 1, pixel[2]); + pixel_fetcher_get_pixel (pft, ix + 1, iy + 1, pixel[3]); + + for (i = 0; i < img_bpp; i++) + { + values[0] = pixel[0][i]; + values[1] = pixel[1][i]; + values[2] = pixel[2][i]; + values[3] = pixel[3][i]; + + *top_p++ = bilinear (cx, cy, values); + } + + /* Bottom */ + + cx = cen_x + (cen_x - cx); + cy = cen_y + (cen_y - cy); + + if (cx >= 0.0) + ix = (int) cx; + else + ix = -((int) -cx + 1); + + if (cy >= 0.0) + iy = (int) cy; + else + iy = -((int) -cy + 1); + + pixel_fetcher_get_pixel (pfb, ix, iy, pixel[0]); + pixel_fetcher_get_pixel (pfb, ix + 1, iy, pixel[1]); + pixel_fetcher_get_pixel (pfb, ix, iy + 1, pixel[2]); + pixel_fetcher_get_pixel (pfb, ix + 1, iy + 1, pixel[3]); + + for (i = 0; i < img_bpp; i++) + { + values[0] = pixel[0][i]; + values[1] = pixel[1][i]; + values[2] = pixel[2][i]; + values[3] = pixel[3][i]; + + *bot_p++ = bilinear (cx, cy, values); + } + + bot_p -= 2 * img_bpp; /* We move backwards! */ + } + else + { + /* We are outside the distortion area; + * just copy the source pixels + */ + + /* Top */ + + pixel_fetcher_get_pixel (pft, col, row, pixel[0]); + + for (i = 0; i < img_bpp; i++) + *top_p++ = pixel[0][i]; + + /* Bottom */ + + pixel_fetcher_get_pixel (pfb, + (sel_x2 - 1) - (col - sel_x1), + (sel_y2 - 1) - (row - sel_y1), + pixel[0]); + + for (i = 0; i < img_bpp; i++) + *bot_p++ = pixel[0][i]; + + bot_p -= 2 * img_bpp; /* We move backwards! */ + }/* else */ + } /* for col=col1 */ + + /* Paint rows to image */ + /* Due to blocking, some clipping is needed */ + if (col1<(sel_x2-BLOCKING)) + gimp_pixel_rgn_set_row(&dest_rgn, top_row, col1, row, BLOCKING); + else + gimp_pixel_rgn_set_row(&dest_rgn, top_row, col1, row, sel_x2-col1); + + n=sel_x2-(col1-sel_x1)-BLOCKING; + + if (n>=(sel_x1)) + gimp_pixel_rgn_set_row(&dest_rgn, bot_row, n, (sel_y2 - 1) - (row - sel_y1), BLOCKING); + else + gimp_pixel_rgn_set_row(&dest_rgn, bot_row+ (sel_x1-n)* img_bpp, sel_x1, + (sel_y2 - 1) - (row - sel_y1),BLOCKING-(sel_x1-n)); + } /* for row=row1 */ + + } /* for col1= */ /* Update progress */ - progress += sel_width * 2; + progress += sel_width * 2 *BLOCKING; gimp_progress_update ((double) progress / max_progress); }