[Gimp-developer] [patch] Major speedup for whirl&pinch plugin

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,
I don't know who's currently "responsible" for the whirl&pinch plugin, so I
post my patch to this list.

I have modified whirl&pinch slightly to use "blocking", ie. doing all
calculations in small squares (32*32). With that technique very common in
numerical computing, the CPU caches (and for GIMP) the tile cache have a much
higher hit rate. 
The boost is quite spectacular: The original whirl&pinch on a larger image 
(1400*1400) needs on a Athlon-600 30s to complete, with my patch only 6.5s.
That's a speedup by a factor of 4.5 without any change in the algorithm
itself!

The changes are relatively small (effectively about 10 lines) and affect 
mostly clipping.

I have found no side efects of the patch...

The blocking can IMHO easily used for a lot of other filters, and should
give a large speedup for most of GIMP's filters.

Please try out the patch and apply it to the source tree if you like it ;-)
-- 
         Georg Acher, acher@xxxxxxxxx         
         http://www.in.tum.de/~acher/
          "Oh no, not again !" The bowl of petunias          
--- whirlpinch.c.org	Thu Apr  5 17:47:17 2001
+++ whirlpinch.c	Thu Apr  5 18:49:09 2001
@@ -22,6 +22,14 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
+/* Version 2.10:
+ *
+ * Major Speedup by use of "blocking", ie. doing the calcualations
+ * in small squares, thus gaining a performance boost from CPU caches
+ * and the tile cache.
+ *
+ * Georg Acher, acher@xxxxxxxxx
+ */
 
 /* Version 2.09:
  *
@@ -63,7 +71,7 @@
 
 
 #define PLUG_IN_NAME    "plug_in_whirl_pinch"
-#define PLUG_IN_VERSION "May 1997, 2.09"
+#define PLUG_IN_VERSION "April 2001, 2.10"
 
 /***** Magic numbers *****/
 
@@ -71,6 +79,10 @@
 #define SCALE_WIDTH  200
 #define ENTRY_WIDTH  60
 
+/* blocking size, 32*32pixels is a good compromise for all CPUs */
+
+#define BLOCKING 32
+
 /***** Types *****/
 
 typedef struct
@@ -366,12 +378,13 @@
   guchar          *top_row, *bot_row;
   guchar          *top_p, *bot_p;
   gint             row, col;
+  gint             row1,col1;
   guchar           pixel[4][4];
   guchar           values[4];
   double           whirl;
   double           cx, cy;
   int              ix, iy;
-  int              i;
+  int              i,n;
   guchar           bg_color[4];
   pixel_fetcher_t *pft, *pfb;
 
@@ -406,112 +419,133 @@
   whirl   = wpvals.whirl * G_PI / 180;
   radius2 = radius * radius * wpvals.radius;
 
-  for (row = sel_y1; row <= ((sel_y1 + sel_y2) / 2); row++)
+  /* Whirl&Pinch in small squares to benefit from cache effects
+	   (tile cache, CPU cache) 
+	   20010405 GA
+	   */
+  for (row1 = sel_y1; row1 <= ((sel_y1 + sel_y2) / 2); row1+=BLOCKING)
     {
-      top_p = top_row;
-      bot_p = bot_row + img_bpp * (sel_width - 1);
-
-      for (col = sel_x1; col < sel_x2; col++)
-	{
-	  if (calc_undistorted_coords (col, row, whirl, wpvals.pinch, &cx, &cy))
-	    {
-	      /* We are inside the distortion area */
-
-	      /* Top */
-
-	      if (cx >= 0.0)
-		ix = (int) cx;
-	      else
-		ix = -((int) -cx + 1);
-
-	      if (cy >= 0.0)
-		iy = (int) cy;
-	      else
-		iy = -((int) -cy + 1);
-
-	      pixel_fetcher_get_pixel (pft, ix,     iy,     pixel[0]);
-	      pixel_fetcher_get_pixel (pft, ix + 1, iy,     pixel[1]);
-	      pixel_fetcher_get_pixel (pft, ix,     iy + 1, pixel[2]);
-	      pixel_fetcher_get_pixel (pft, ix + 1, iy + 1, pixel[3]);
-
-	      for (i = 0; i < img_bpp; i++)
-		{
-		  values[0] = pixel[0][i];
-		  values[1] = pixel[1][i];
-		  values[2] = pixel[2][i];
-		  values[3] = pixel[3][i];
-
-		  *top_p++ = bilinear (cx, cy, values);
-		}
-
-	      /* Bottom */
-
-	      cx = cen_x + (cen_x - cx);
-	      cy = cen_y + (cen_y - cy);
-
-	      if (cx >= 0.0)
-		ix = (int) cx;
-	      else
-		ix = -((int) -cx + 1);
-
-	      if (cy >= 0.0)
-		iy = (int) cy;
-	      else
-		iy = -((int) -cy + 1);
-
-	      pixel_fetcher_get_pixel (pfb, ix,     iy,     pixel[0]);
-	      pixel_fetcher_get_pixel (pfb, ix + 1, iy,     pixel[1]);
-	      pixel_fetcher_get_pixel (pfb, ix,     iy + 1, pixel[2]);
-	      pixel_fetcher_get_pixel (pfb, ix + 1, iy + 1, pixel[3]);
-
-	      for (i = 0; i < img_bpp; i++)
-		{
-		  values[0] = pixel[0][i];
-		  values[1] = pixel[1][i];
-		  values[2] = pixel[2][i];
-		  values[3] = pixel[3][i];
-
-		  *bot_p++ = bilinear (cx, cy, values);
-		}
-
-	      bot_p -= 2 * img_bpp; /* We move backwards! */
-	    }
-	  else
-	    {
-	      /*  We are outside the distortion area;
-	       *  just copy the source pixels
-	       */
-
-	      /* Top */
-
-	      pixel_fetcher_get_pixel (pft, col, row, pixel[0]);
-
-	      for (i = 0; i < img_bpp; i++)
-		*top_p++ = pixel[0][i];
-
-	      /* Bottom */
-
-	      pixel_fetcher_get_pixel (pfb,
-				       (sel_x2 - 1) - (col - sel_x1),
-				       (sel_y2 - 1) - (row - sel_y1),
-				       pixel[0]);
-
-	      for (i = 0; i < img_bpp; i++)
-		*bot_p++ = pixel[0][i];
-
-	      bot_p -= 2 * img_bpp; /* We move backwards! */
-	    }
-	}
-
-      /* Paint rows to image */
-
-      gimp_pixel_rgn_set_row (&dest_rgn, top_row, sel_x1, row, sel_width);
-      gimp_pixel_rgn_set_row (&dest_rgn, bot_row,
-			      sel_x1, (sel_y2 - 1) - (row - sel_y1), sel_width);
-
+    	for(col1 = sel_x1; col1 < sel_x2; col1+=BLOCKING) 
+	 			{
+					/* Now whirl the block starting at (col1,row1) with size BLOCKING*BLOCKING */
+
+					for (row = row1; (row <= ((sel_y1 + sel_y2) / 2))&&(row<(row1+BLOCKING)); row++)
+						{
+		 					top_p = top_row;
+      				bot_p = bot_row + img_bpp * (BLOCKING - 1);
+
+							for (col = col1; (col < sel_x2)&&(col<(col1+BLOCKING)); col++)     
+							{
+	  						if (calc_undistorted_coords (col, row, whirl, wpvals.pinch, &cx, &cy))
+	    						{
+	    						  /* We are inside the distortion area */
+
+	  						    /* Top */
+
+	  						    if (cx >= 0.0)
+											ix = (int) cx;
+							      else
+											ix = -((int) -cx + 1);
+
+	     							if (cy >= 0.0)
+											iy = (int) cy;
+	     							else
+											iy = -((int) -cy + 1);
+
+	  						    pixel_fetcher_get_pixel (pft, ix,     iy,     pixel[0]);
+							      pixel_fetcher_get_pixel (pft, ix + 1, iy,     pixel[1]);
+							      pixel_fetcher_get_pixel (pft, ix,     iy + 1, pixel[2]);
+							      pixel_fetcher_get_pixel (pft, ix + 1, iy + 1, pixel[3]);
+
+	      						for (i = 0; i < img_bpp; i++)
+											{
+		  									values[0] = pixel[0][i];
+		  									values[1] = pixel[1][i];
+											  values[2] = pixel[2][i];
+		  									values[3] = pixel[3][i];
+
+		  									*top_p++ = bilinear (cx, cy, values);
+											}
+
+	     						 /* Bottom */
+
+	     							cx = cen_x + (cen_x - cx);
+	      						cy = cen_y + (cen_y - cy);
+
+	      						if (cx >= 0.0)
+											ix = (int) cx;
+	      						else
+											ix = -((int) -cx + 1);
+
+	      						if (cy >= 0.0)
+											iy = (int) cy;
+	      						else
+											iy = -((int) -cy + 1);
+
+	     							pixel_fetcher_get_pixel (pfb, ix,     iy,     pixel[0]);
+	      						pixel_fetcher_get_pixel (pfb, ix + 1, iy,     pixel[1]);
+	      						pixel_fetcher_get_pixel (pfb, ix,     iy + 1, pixel[2]);
+	      						pixel_fetcher_get_pixel (pfb, ix + 1, iy + 1, pixel[3]);
+
+	      						for (i = 0; i < img_bpp; i++)
+											{
+		  									values[0] = pixel[0][i];
+		  									values[1] = pixel[1][i];
+		  									values[2] = pixel[2][i];
+		  									values[3] = pixel[3][i];
+
+		  									*bot_p++ = bilinear (cx, cy, values);
+											}
+
+	      						bot_p -= 2 * img_bpp; /* We move backwards! */
+	    						}
+	  						else
+	    						{
+	      						/*  We are outside the distortion area;
+	       						 *  just copy the source pixels
+	       						*/
+
+	      					/* Top */
+
+	      						pixel_fetcher_get_pixel (pft, col, row, pixel[0]);
+
+	      						for (i = 0; i < img_bpp; i++)
+											*top_p++ = pixel[0][i];
+
+	      						/* Bottom */
+
+	     							 pixel_fetcher_get_pixel (pfb,
+				       					(sel_x2 - 1) - (col - sel_x1),
+				       					(sel_y2 - 1) - (row - sel_y1),
+				       					pixel[0]);
+
+	      							for (i = 0; i < img_bpp; i++)
+												*bot_p++ = pixel[0][i];
+
+	      							bot_p -= 2 * img_bpp; /* We move backwards! */
+	    							}/* else */
+								} /* for col=col1 */
+
+     					 /* Paint rows to image */
+							/* Due to blocking, some clipping is needed */
+							if (col1<(sel_x2-BLOCKING))
+											gimp_pixel_rgn_set_row(&dest_rgn, top_row, col1, row, BLOCKING);
+							else
+											gimp_pixel_rgn_set_row(&dest_rgn, top_row, col1, row, sel_x2-col1);
+
+							n=sel_x2-(col1-sel_x1)-BLOCKING;
+							
+							if (n>=(sel_x1))
+											gimp_pixel_rgn_set_row(&dest_rgn, bot_row, n, (sel_y2 - 1) - (row - sel_y1), BLOCKING);
+							else
+											gimp_pixel_rgn_set_row(&dest_rgn, bot_row+ (sel_x1-n)* img_bpp, sel_x1, 
+																						 (sel_y2 - 1) - (row - sel_y1),BLOCKING-(sel_x1-n));
+						} /* for row=row1 */
+					
+				} /* for col1= */
       /* Update progress */
 
-      progress += sel_width * 2;
+      progress += sel_width * 2 *BLOCKING;
       gimp_progress_update ((double) progress / max_progress);
     }
 

[Index of Archives]     [Video For Linux]     [Photo]     [Yosemite News]     [gtk]     [GIMP for Windows]     [KDE]     [GEGL]     [Gimp's Home]     [Gimp on GUI]     [Gimp on Windows]     [Steve's Art]

  Powered by Linux