[PATCH 2/7] drm/radeon/kms: compute GPU addresses correctly on evergreen

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



There are also two fixes:
- In DRAW_INDEX_2, we read idx_value, but should have read idx+1.
- When correcting SQ_VTX_CONSTANT_WORD1_0.SIZE, we should subtract
  the offset.

Signed-off-by: Marek Olšák <maraeo@xxxxxxxxx>
---
 drivers/gpu/drm/radeon/evergreen_cs.c |  130 ++++++++++++++++++++++++++-------
 1 files changed, 103 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 4674a68..b39a089 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -1811,6 +1811,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 	{
 		int pred_op;
 		int tmp;
+		uint64_t offset;
+
 		if (pkt->count != 1) {
 			DRM_ERROR("bad SET PREDICATION\n");
 			return -EINVAL;
@@ -1834,8 +1836,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}
 
-		ib[idx + 0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-		ib[idx + 1] = tmp + (upper_32_bits(reloc->lobj.gpu_offset) & 0xff);
+		offset = reloc->lobj.gpu_offset +
+		         (idx_value & 0xfffffff0) +
+		         ((u64)(tmp & 0xff) << 32);
+
+		ib[idx + 0] = offset;
+		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
 	}
 	break;
 	case PACKET3_CONTEXT_CONTROL:
@@ -1863,6 +1869,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 		break;
 	case PACKET3_INDEX_BASE:
+	{
+		uint64_t offset;
+
 		if (pkt->count != 1) {
 			DRM_ERROR("bad INDEX_BASE\n");
 			return -EINVAL;
@@ -1872,15 +1881,24 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			DRM_ERROR("bad INDEX_BASE\n");
 			return -EINVAL;
 		}
-		ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-		ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+		offset = reloc->lobj.gpu_offset +
+		         idx_value +
+		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+		ib[idx+0] = offset;
+		ib[idx+1] = upper_32_bits(offset) & 0xff;
+
 		r = evergreen_cs_track_check(p);
 		if (r) {
 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
 			return r;
 		}
 		break;
+	}
 	case PACKET3_DRAW_INDEX:
+	{
+		uint64_t offset;
 		if (pkt->count != 3) {
 			DRM_ERROR("bad DRAW_INDEX\n");
 			return -EINVAL;
@@ -1890,15 +1908,25 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			DRM_ERROR("bad DRAW_INDEX\n");
 			return -EINVAL;
 		}
-		ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-		ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+		offset = reloc->lobj.gpu_offset +
+		         idx_value +
+		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+		ib[idx+0] = offset;
+		ib[idx+1] = upper_32_bits(offset) & 0xff;
+
 		r = evergreen_cs_track_check(p);
 		if (r) {
 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
 			return r;
 		}
 		break;
+	}
 	case PACKET3_DRAW_INDEX_2:
+	{
+		uint64_t offset;
+
 		if (pkt->count != 4) {
 			DRM_ERROR("bad DRAW_INDEX_2\n");
 			return -EINVAL;
@@ -1908,14 +1936,21 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			DRM_ERROR("bad DRAW_INDEX_2\n");
 			return -EINVAL;
 		}
-		ib[idx+1] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-		ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+		offset = reloc->lobj.gpu_offset +
+		         radeon_get_ib_value(p, idx+1) +
+		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+		ib[idx+1] = offset;
+		ib[idx+2] = upper_32_bits(offset) & 0xff;
+
 		r = evergreen_cs_track_check(p);
 		if (r) {
 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
 			return r;
 		}
 		break;
+	}
 	case PACKET3_DRAW_INDEX_AUTO:
 		if (pkt->count != 1) {
 			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
@@ -2006,13 +2041,20 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 		/* bit 4 is reg (0) or mem (1) */
 		if (idx_value & 0x10) {
+			uint64_t offset;
+
 			r = evergreen_cs_packet_next_reloc(p, &reloc);
 			if (r) {
 				DRM_ERROR("bad WAIT_REG_MEM\n");
 				return -EINVAL;
 			}
-			ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-			ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+			offset = reloc->lobj.gpu_offset +
+			         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
+			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		}
 		break;
 	case PACKET3_SURFACE_SYNC:
@@ -2037,16 +2079,25 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}
 		if (pkt->count) {
+			uint64_t offset;
+
 			r = evergreen_cs_packet_next_reloc(p, &reloc);
 			if (r) {
 				DRM_ERROR("bad EVENT_WRITE\n");
 				return -EINVAL;
 			}
-			ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-			ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+			offset = reloc->lobj.gpu_offset +
+			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
+			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+			ib[idx+1] = offset & 0xfffffff8;
+			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		}
 		break;
 	case PACKET3_EVENT_WRITE_EOP:
+	{
+		uint64_t offset;
+
 		if (pkt->count != 4) {
 			DRM_ERROR("bad EVENT_WRITE_EOP\n");
 			return -EINVAL;
@@ -2056,10 +2107,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			DRM_ERROR("bad EVENT_WRITE_EOP\n");
 			return -EINVAL;
 		}
-		ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-		ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+		offset = reloc->lobj.gpu_offset +
+		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+		ib[idx+1] = offset & 0xfffffffc;
+		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
 		break;
+	}
 	case PACKET3_EVENT_WRITE_EOS:
+	{
+		uint64_t offset;
+
 		if (pkt->count != 3) {
 			DRM_ERROR("bad EVENT_WRITE_EOS\n");
 			return -EINVAL;
@@ -2069,9 +2129,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			DRM_ERROR("bad EVENT_WRITE_EOS\n");
 			return -EINVAL;
 		}
-		ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-		ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+		offset = reloc->lobj.gpu_offset +
+		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+
+		ib[idx+1] = offset & 0xfffffffc;
+		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
 		break;
+	}
 	case PACKET3_SET_CONFIG_REG:
 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
 		end_reg = 4 * pkt->count + start_reg - 4;
@@ -2164,6 +2230,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				ib[idx+1+(i*8)+3] += moffset;
 				break;
 			case SQ_TEX_VTX_VALID_BUFFER:
+			{
+				uint64_t offset64;
 				/* vtx base */
 				r = evergreen_cs_packet_next_reloc(p, &reloc);
 				if (r) {
@@ -2175,11 +2243,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
 					/* force size to size of the buffer */
 					dev_warn(p->dev, "vbo resource seems too big for the bo\n");
-					ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj);
+					ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
 				}
-				ib[idx+1+(i*8)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff);
-				ib[idx+1+(i*8)+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+
+				offset64 = reloc->lobj.gpu_offset + offset;
+				ib[idx+1+(i*8)+0] = offset64;
+				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
+						    (upper_32_bits(offset64) & 0xff);
 				break;
+			}
 			case SQ_TEX_VTX_INVALID_TEXTURE:
 			case SQ_TEX_VTX_INVALID_BUFFER:
 			default:
@@ -2255,8 +2327,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-			ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+			offset += reloc->lobj.gpu_offset;
+			ib[idx+1] = offset;
+			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		}
 		/* Reading data from SRC_ADDRESS. */
 		if (((idx_value >> 1) & 0x3) == 2) {
@@ -2273,8 +2346,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-			ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+			offset += reloc->lobj.gpu_offset;
+			ib[idx+3] = offset;
+			ib[idx+4] = upper_32_bits(offset) & 0xff;
 		}
 		break;
 	case PACKET3_COPY_DW:
@@ -2297,8 +2371,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-			ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+			offset += reloc->lobj.gpu_offset;
+			ib[idx+1] = offset;
+			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		} else {
 			/* SRC is a reg. */
 			reg = radeon_get_ib_value(p, idx+1) << 2;
@@ -2320,8 +2395,9 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
-			ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+			offset += reloc->lobj.gpu_offset;
+			ib[idx+3] = offset;
+			ib[idx+4] = upper_32_bits(offset) & 0xff;
 		} else {
 			/* DST is a reg. */
 			reg = radeon_get_ib_value(p, idx+3) << 2;
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel



[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux