[D3D] Faster memory to frame buffer blits.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This makes me gain about 10 FPS in the Ragnarok menu (which extensively uses
Blt to the frame buffer). No idea about the game as it got commercial and I
cannot enter anymore with the 'beta' account I was given.

If anyone wants the game to work better, feel free to lend me your account
to do some tests :-)

               Lionel

Changelog:
 - some TRACEing fixes
 - faster Blt to the framebuffer using texture engine
  
-- 
		 Lionel Ulmer - http://www.bbrox.org/
--- dlls/ddraw_CVS/d3ddevice/mesa.c	Mon Jun 16 00:00:37 2003
+++ dlls/ddraw/d3ddevice/mesa.c	Sat Jun 21 12:32:41 2003
@@ -153,7 +153,6 @@
     if (gl_d3d_dev->fogging != FALSE) glDisable(GL_FOG);
     if (gl_d3d_dev->current_tex_env != GL_REPLACE)
 	glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
-    glColor3ub(0xFF, 0xFF, 0xFF);
     
     return opt_bitmap;
 }
@@ -2686,14 +2685,12 @@
     return d3ddevice_clear(This, WINE_GL_BUFFER_BACK, dwCount, lpRects, dwFlags, dwColor, dvZ, dwStencil);
 }
 
-HRESULT
-d3ddevice_blt(IDirectDrawSurfaceImpl *This, LPRECT rdst,
-	      LPDIRECTDRAWSURFACE7 src, LPRECT rsrc,
-	      DWORD dwFlags, LPDDBLTFX lpbltfx)
+static HRESULT
+setup_rect_and_surface_for_blt(IDirectDrawSurfaceImpl *This,
+			       WINE_GL_BUFFER_TYPE *buffer_type_p, D3DRECT *rect)
 {
     IDirect3DDeviceGLImpl *gl_d3d_dev = (IDirect3DDeviceGLImpl *) This->d3ddevice;
     WINE_GL_BUFFER_TYPE buffer_type;
-    D3DRECT rect;
     
     /* First check if we BLT to the backbuffer... */
     if ((This->surface_desc.ddsCaps.dwCaps & (DDSCAPS_BACKBUFFER)) != 0) {
@@ -2704,7 +2701,30 @@
 	ERR("Only BLT override to front or back-buffer is supported for now !\n");
 	return DDERR_INVALIDPARAMS;
     }
+            
+    if ((gl_d3d_dev->state[buffer_type] == SURFACE_MEMORY_DIRTY) &&
+	(rect->u1.x1 >= gl_d3d_dev->lock_rect[buffer_type].left) &&
+	(rect->u2.y1 >= gl_d3d_dev->lock_rect[buffer_type].top) &&
+	(rect->u3.x2 <= gl_d3d_dev->lock_rect[buffer_type].right) &&
+	(rect->u4.y2 <= gl_d3d_dev->lock_rect[buffer_type].bottom)) {
+	/* If the memory zone is already dirty, use the standard 'in memory' blit operations and not
+	 * GL to do it.
+	 */
+	return DDERR_INVALIDPARAMS;
+    }
+    *buffer_type_p = buffer_type;
     
+    return DD_OK;
+}
+
+HRESULT
+d3ddevice_blt(IDirectDrawSurfaceImpl *This, LPRECT rdst,
+	      LPDIRECTDRAWSURFACE7 src, LPRECT rsrc,
+	      DWORD dwFlags, LPDDBLTFX lpbltfx)
+{
+    WINE_GL_BUFFER_TYPE buffer_type;
+    D3DRECT rect;
+
     if (rdst) {
 	rect.u1.x1 = rdst->left;
 	rect.u2.y1 = rdst->top;
@@ -2716,17 +2736,8 @@
 	rect.u3.x2 = This->surface_desc.dwWidth;
 	rect.u4.y2 = This->surface_desc.dwHeight;
     }
-        
-    if ((gl_d3d_dev->state[buffer_type] == SURFACE_MEMORY_DIRTY) &&
-	(rect.u1.x1 >= gl_d3d_dev->lock_rect[buffer_type].left) &&
-	(rect.u2.y1 >= gl_d3d_dev->lock_rect[buffer_type].top) &&
-	(rect.u3.x2 <= gl_d3d_dev->lock_rect[buffer_type].right) &&
-	(rect.u4.y2 <= gl_d3d_dev->lock_rect[buffer_type].bottom)) {
-	/* If the memory zone is already dirty, use the standard 'in memory' blit operations and not
-	 * GL to do it.
-	 */
-	return DDERR_INVALIDPARAMS;
-    }
+    
+    if (setup_rect_and_surface_for_blt(This, &buffer_type, &rect) != DD_OK) return DDERR_INVALIDPARAMS;
 
     if (dwFlags & DDBLT_COLORFILL) {
         /* This is easy to handle for the D3D Device... */
@@ -2794,14 +2805,16 @@
         LEAVE_GL();
         
         return DD_OK;
-    } else if ((dwFlags & (~(DDBLT_WAIT|DDBLT_ASYNC))) == 0) {
+    } else if ((dwFlags & (~(DDBLT_KEYSRC|DDBLT_WAIT|DDBLT_ASYNC))) == 0) {
 	/* Normal blit without any special case... */
 	if (src != NULL) {
 	    /* And which has a SRC surface */
 	    IDirectDrawSurfaceImpl *src_impl = ICOM_OBJECT(IDirectDrawSurfaceImpl, IDirectDrawSurface7, src);
+	    
 	    if ((src_impl->surface_desc.ddsCaps.dwCaps & DDSCAPS_3DDEVICE) &&
-		(src_impl->d3ddevice == This->d3ddevice)) {
-		/* Both are 3D devices and using the same GL device */
+		(src_impl->d3ddevice == This->d3ddevice) &&
+		((dwFlags & DDBLT_KEYSRC) == 0)) {
+		/* Both are 3D devices and using the same GL device and the Blt is without color-keying */
 		D3DRECT src_rect;
 		int width, height;
 		GLenum prev_draw;
@@ -2920,6 +2933,110 @@
 		LEAVE_GL();
 
 		return DD_OK;
+	    } else {
+		/* This is the normal 'with source' Blit. Use the texture engine to do the Blt for us
+		   (this prevents calling glReadPixels) */
+		D3DRECT src_rect;
+		int width, height;
+		GLenum prev_draw;
+		IDirect3DDeviceGLImpl *gl_d3d_dev = (IDirect3DDeviceGLImpl *) This->d3ddevice;
+		BOOLEAN initial = FALSE;
+		DWORD opt_bitmap;
+		int x, y;
+		double x_stretch, y_stretch;
+		
+		if (dwFlags & DDBLT_KEYSRC) {
+		    /* As I have no game using this, did not bother to do it yet as I cannot test it anyway */
+		    FIXME(" Blt overide with color-keying not supported yet.\n");
+		    return DDERR_INVALIDPARAMS;
+		}
+
+		if (rsrc) {
+		    src_rect.u1.x1 = rsrc->left;
+		    src_rect.u2.y1 = rsrc->top;
+		    src_rect.u3.x2 = rsrc->right;
+		    src_rect.u4.y2 = rsrc->bottom;
+		} else {
+		    src_rect.u1.x1 = 0;
+		    src_rect.u2.y1 = 0;
+		    src_rect.u3.x2 = src_impl->surface_desc.dwWidth;
+		    src_rect.u4.y2 = src_impl->surface_desc.dwHeight;
+		}
+
+		width = src_rect.u3.x2 - src_rect.u1.x1;
+		height = src_rect.u4.y2 - src_rect.u2.y1;
+
+		x_stretch = (double) (rect.u3.x2 - rect.u1.x1) / (double) width;
+		y_stretch = (double) (rect.u4.y2 - rect.u2.y1) / (double) height;
+
+		TRACE(" using memory to buffer Blt overide.\n");
+
+		ENTER_GL();
+
+		opt_bitmap = d3ddevice_set_state_for_flush(This->d3ddevice, (LPCRECT) &rect, FALSE, &initial);
+		
+		if (upload_surface_to_tex_memory_init(src_impl, 0, &gl_d3d_dev->current_internal_format,
+						      initial, FALSE, UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE) != DD_OK) {
+		    ERR(" unsupported pixel format at memory to buffer Blt overide.\n");
+		    LEAVE_GL();
+		    return DDERR_INVALIDPARAMS;
+		}
+		
+		glGetIntegerv(GL_DRAW_BUFFER, &prev_draw);
+		if (buffer_type == WINE_GL_BUFFER_FRONT)
+		    glDrawBuffer(GL_FRONT);
+		else
+		    glDrawBuffer(GL_BACK);
+
+		/* Now the serious stuff happens. This is basically the same code that for the memory
+		   flush to frame buffer ... with stretching and different rectangles added :-) */
+		for (y = 0; y < height; y += UNLOCK_TEX_SIZE) {
+		    RECT flush_rect;
+
+		    flush_rect.top    = src_rect.u2.y1 + y;
+		    flush_rect.bottom = ((src_rect.u2.y1 + y + UNLOCK_TEX_SIZE > src_rect.u4.y2) ?
+					 src_rect.u4.y2 :
+					 (src_rect.u2.y1 + y + UNLOCK_TEX_SIZE));
+		    
+		    for (x = 0; x < width; x += UNLOCK_TEX_SIZE) {
+			flush_rect.left  = src_rect.u1.x1 + x;
+			flush_rect.right = ((src_rect.u1.x1 + x + UNLOCK_TEX_SIZE > src_rect.u3.x2) ?
+					    src_rect.u3.x2 :
+					    (src_rect.u1.x1 + x + UNLOCK_TEX_SIZE));
+			
+			upload_surface_to_tex_memory(&flush_rect, 0, 0, &(gl_d3d_dev->surface_ptr));
+			
+			glBegin(GL_QUADS);
+			glTexCoord2f(0.0, 0.0);
+			glVertex3d(rect.u1.x1 + (x * x_stretch),
+				   rect.u2.y1 + (y * y_stretch),
+				   0.5);
+			glTexCoord2f(1.0, 0.0);
+			glVertex3d(rect.u1.x1 + ((x + UNLOCK_TEX_SIZE) * x_stretch),
+				   rect.u2.y1 + (y * y_stretch),
+				   0.5);
+			glTexCoord2f(1.0, 1.0);
+			glVertex3d(rect.u1.x1 + ((x + UNLOCK_TEX_SIZE) * x_stretch),
+				   rect.u2.y1 + ((y + UNLOCK_TEX_SIZE) * y_stretch),
+				   0.5);
+			glTexCoord2f(0.0, 1.0);
+			glVertex3d(rect.u1.x1 + (x * x_stretch),
+				   rect.u2.y1 + ((y + UNLOCK_TEX_SIZE) * y_stretch),
+				   0.5);
+			glEnd();
+		    }
+		}
+		
+		upload_surface_to_tex_memory_release();
+		d3ddevice_restore_state_after_flush(This->d3ddevice, opt_bitmap, FALSE);
+		
+		if (((buffer_type == WINE_GL_BUFFER_FRONT) && (prev_draw == GL_BACK)) ||
+		    ((buffer_type == WINE_GL_BUFFER_BACK)  && (prev_draw == GL_FRONT)))
+		    glDrawBuffer(prev_draw);
+		
+		LEAVE_GL();
+
+		return DD_OK;		
 	    }
 	}
     }
@@ -2931,7 +3048,117 @@
 		  DWORD dsty, LPDIRECTDRAWSURFACE7 src,
 		  LPRECT rsrc, DWORD trans)
 {
-     return DDERR_INVALIDPARAMS;
+    RECT rsrc2;
+    RECT rdst;
+    IDirectDrawSurfaceImpl *src_impl = ICOM_OBJECT(IDirectDrawSurfaceImpl, IDirectDrawSurface7, src);
+    IDirect3DDeviceGLImpl *gl_d3d_dev = (IDirect3DDeviceGLImpl *) This->d3ddevice;
+    WINE_GL_BUFFER_TYPE buffer_type;
+    GLenum prev_draw;
+    DWORD opt_bitmap;
+    BOOLEAN initial;
+    int width, height, x, y;
+    
+    /* Cannot support DSTCOLORKEY blitting... */
+    if ((trans & DDBLTFAST_DESTCOLORKEY) != 0) return DDERR_INVALIDPARAMS;
+
+    if (rsrc == NULL) {
+	WARN("rsrc is NULL - getting the whole surface !!\n");
+	rsrc = &rsrc2;
+	rsrc->left = rsrc->top = 0;
+	rsrc->right = src_impl->surface_desc.dwWidth;
+	rsrc->bottom = src_impl->surface_desc.dwHeight;
+    } else {
+	rsrc2 = *rsrc;
+	rsrc = &rsrc2;
+    }
+
+    rdst.left = dstx;
+    rdst.top = dsty;
+    rdst.right = dstx + (rsrc->right - rsrc->left);
+    if (rdst.right > This->surface_desc.dwWidth) {
+	rsrc->right -= (This->surface_desc.dwWidth - rdst.right);
+	rdst.right = This->surface_desc.dwWidth;
+    }
+    rdst.bottom = dsty + (rsrc->bottom - rsrc->top);
+    if (rdst.bottom > This->surface_desc.dwHeight) {
+	rsrc->bottom -= (This->surface_desc.dwHeight - rdst.bottom);
+	rdst.bottom = This->surface_desc.dwHeight;
+    }
+
+    width = rsrc->right - rsrc->left;
+    height = rsrc->bottom - rsrc->top;
+    
+    if (setup_rect_and_surface_for_blt(This, &buffer_type, (D3DRECT *) &rdst) != DD_OK) return DDERR_INVALIDPARAMS;
+
+    TRACE(" using BltFast memory to frame buffer overide.\n");
+    
+    ENTER_GL();
+    
+    opt_bitmap = d3ddevice_set_state_for_flush(This->d3ddevice, &rdst, (trans & DDBLTFAST_SRCCOLORKEY) != 0, &initial);
+    
+    if (upload_surface_to_tex_memory_init(src_impl, 0, &gl_d3d_dev->current_internal_format,
+					  initial, (trans & DDBLTFAST_SRCCOLORKEY) != 0,
+					  UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE) != DD_OK) {
+	ERR(" unsupported pixel format at memory to buffer Blt overide.\n");
+	LEAVE_GL();
+	return DDERR_INVALIDPARAMS;
+    }
+    
+    glGetIntegerv(GL_DRAW_BUFFER, &prev_draw);
+    if (buffer_type == WINE_GL_BUFFER_FRONT)
+	glDrawBuffer(GL_FRONT);
+    else
+	glDrawBuffer(GL_BACK);
+    
+    /* Now the serious stuff happens. This is basically the same code that for the memory
+       flush to frame buffer but with different rectangles for source and destination :-) */
+    for (y = 0; y < height; y += UNLOCK_TEX_SIZE) {
+	RECT flush_rect;
+	
+	flush_rect.top    = rsrc->top + y;
+	flush_rect.bottom = ((rsrc->top + y + UNLOCK_TEX_SIZE > rsrc->bottom) ?
+			     rsrc->bottom :
+			     (rsrc->top + y + UNLOCK_TEX_SIZE));
+	
+	for (x = 0; x < width; x += UNLOCK_TEX_SIZE) {
+	    flush_rect.left  = rsrc->left + x;
+	    flush_rect.right = ((rsrc->left + x + UNLOCK_TEX_SIZE > rsrc->right) ?
+				rsrc->right :
+				(rsrc->left + x + UNLOCK_TEX_SIZE));
+	    
+	    upload_surface_to_tex_memory(&flush_rect, 0, 0, &(gl_d3d_dev->surface_ptr));
+	    
+	    glBegin(GL_QUADS);
+	    glTexCoord2f(0.0, 0.0);
+	    glVertex3d(rdst.left + x,
+		       rdst.top + y,
+		       0.5);
+	    glTexCoord2f(1.0, 0.0);
+	    glVertex3d(rdst.left + (x + UNLOCK_TEX_SIZE),
+		       rdst.top + y,
+		       0.5);
+	    glTexCoord2f(1.0, 1.0);
+	    glVertex3d(rdst.left + (x + UNLOCK_TEX_SIZE),
+		       rdst.top + (y + UNLOCK_TEX_SIZE),
+		       0.5);
+	    glTexCoord2f(0.0, 1.0);
+	    glVertex3d(rdst.left + x,
+		       rdst.top + (y + UNLOCK_TEX_SIZE),
+		       0.5);
+	    glEnd();
+	}
+    }
+    
+    upload_surface_to_tex_memory_release();
+    d3ddevice_restore_state_after_flush(This->d3ddevice, opt_bitmap, (trans & DDBLTFAST_SRCCOLORKEY) != 0);
+    
+    if (((buffer_type == WINE_GL_BUFFER_FRONT) && (prev_draw == GL_BACK)) ||
+	((buffer_type == WINE_GL_BUFFER_BACK)  && (prev_draw == GL_FRONT)))
+	glDrawBuffer(prev_draw);
+    
+    LEAVE_GL();
+    
+    return DD_OK;
 }
 
 void
@@ -3325,7 +3552,7 @@
 	for (x = pRect->left; x < pRect->right; x += UNLOCK_TEX_SIZE) {
 	    /* First, upload the texture... */
 	    flush_rect.left = x;
-	    flush_rect.right  = (x + UNLOCK_TEX_SIZE > pRect->right)  ? pRect->right  : (x + UNLOCK_TEX_SIZE);
+	    flush_rect.right = (x + UNLOCK_TEX_SIZE > pRect->right)  ? pRect->right  : (x + UNLOCK_TEX_SIZE);
 
 	    upload_surface_to_tex_memory(&flush_rect, 0, 0, &(gl_d3d_dev->surface_ptr));
 
--- dlls/ddraw_CVS/dsurface/dib.c	Sun May 25 15:35:40 2003
+++ dlls/ddraw/dsurface/dib.c	Mon Jun 16 22:44:35 2003
@@ -819,16 +819,16 @@
 
 
     if (TRACE_ON(ddraw)) {
-	FIXME("(%p)->(%ld,%ld,%p,%p,%08lx)\n",
+	TRACE("(%p)->(%ld,%ld,%p,%p,%08lx)\n",
 		This,dstx,dsty,src,rsrc,trans
 	);
-	FIXME("\ttrans:");
+	TRACE("\ttrans:");
 	if (FIXME_ON(ddraw))
 	  DDRAW_dump_DDBLTFAST(trans);
 	if (rsrc)
-	  FIXME("\tsrcrect: %ldx%ld-%ldx%ld\n",rsrc->left,rsrc->top,rsrc->right,rsrc->bottom);
+	  TRACE("\tsrcrect: %ldx%ld-%ldx%ld\n",rsrc->left,rsrc->top,rsrc->right,rsrc->bottom);
 	else
-	  FIXME(" srcrect: NULL\n");
+	  TRACE(" srcrect: NULL\n");
     }
 
     /* First, check if the possible override function handles this case */
--- dlls/ddraw_CVS/mesa.c	Sun Jun 15 18:46:05 2003
+++ dlls/ddraw/mesa.c	Sat Jun 21 12:28:48 2003
@@ -617,6 +617,8 @@
 static GLuint current_level;
 static DWORD current_tex_width;
 static DWORD current_tex_height;
+static BOOLEAN need_alignement_restore;
+static int current_storage_width;
 
 HRESULT upload_surface_to_tex_memory_init(IDirectDrawSurfaceImpl *surf_ptr, GLuint level, GLenum *current_internal_format,
 					  BOOLEAN need_to_alloc, BOOLEAN need_alpha_ck, DWORD tex_width, DWORD tex_height)
@@ -627,6 +629,8 @@
     GLenum internal_format = GL_LUMINANCE; /* A bogus value to be sure to have a nice Mesa warning :-) */
     BYTE bpp = GET_BPP(surf_ptr->surface_desc);
     BOOL sub_texture = TRUE;
+
+    need_alignement_restore = FALSE;
     
     current_surface = surf_ptr;
     current_level = level;
@@ -868,16 +872,16 @@
     }
 
     if ((sub_texture == TRUE) && (convert_type == NO_CONVERSION)) {
-	glPixelStorei(GL_UNPACK_ROW_LENGTH, surf_ptr->surface_desc.u1.lPitch / bpp);
+	current_storage_width = surf_ptr->surface_desc.u1.lPitch / bpp;
     } else {
 	if (surf_ptr->surface_desc.u1.lPitch == (surf_ptr->surface_desc.dwWidth * bpp)) {
-	    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+	    current_storage_width = 0;
 	} else {
-	    glPixelStorei(GL_UNPACK_ROW_LENGTH, surf_ptr->surface_desc.u1.lPitch / bpp);
-	}
-	
+	    current_storage_width = surf_ptr->surface_desc.u1.lPitch / bpp;
+	}	
     }
-    
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, current_storage_width);
+
     return DD_OK;
 }
 
@@ -1217,10 +1221,31 @@
     }
 
     if (convert_type != NO_CONVERSION) {
+	int storage_width;
+	
 	surf_buffer = *temp_buffer;
 	if (width != current_tex_width) {
 	    /* Overide the default PixelStore parameter if only using part of the actual texture */
-	    glPixelStorei(GL_UNPACK_ROW_LENGTH, width);
+	    storage_width = width;
+	    /* This is needed when locking with a rectangle with 'odd' width */
+	    if (need_alignement_restore == FALSE) {
+		glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+		need_alignement_restore = TRUE;
+	    }
+	} else {
+	    if (current_surface->surface_desc.u1.lPitch == (current_surface->surface_desc.dwWidth * bpp)) {
+		storage_width = 0;
+	    } else {
+		storage_width = current_surface->surface_desc.u1.lPitch / bpp;
+	    }
+	    if (need_alignement_restore == TRUE) {
+		glPixelStorei(GL_UNPACK_ALIGNMENT, 0);
+		need_alignement_restore = FALSE;
+	    }
+	}
+	if (storage_width != current_storage_width) {
+	    glPixelStorei(GL_UNPACK_ROW_LENGTH, storage_width);
+	    current_storage_width = storage_width;
 	}
     }
     
@@ -1239,5 +1264,9 @@
 {
     current_surface = NULL;
 
+    if (need_alignement_restore == TRUE) {
+	glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
+    }
+    
     return DD_OK;
 }

[Index of Archives]     [Gimp for Windows]     [Red Hat]     [Samba]     [Yosemite Camping]     [Graphics Cards]     [Wine Home]

  Powered by Linux