On Fri, Feb 10, 2012 at 01:35:21PM -0500, j.glisse@xxxxxxxxx wrote: > From: Jerome Glisse <jglisse@xxxxxxxxxx> > > For 6xx+. Required for mesa to use htile support for HiZ/HiS. > Userspace will check radeon version 2.14 with is bumped either > by tiling patch or stream out patch. > > Signed-off-by: Pierre-Eric Pelloux-Prayer <pelloux@xxxxxxxxx> > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > Signed-off-by: Jerome Glisse <jglisse@xxxxxxxxxx> NAKING i got size computation wrong. > --- > drivers/gpu/drm/radeon/evergreen_cs.c | 56 ++++++- > drivers/gpu/drm/radeon/evergreend.h | 7 + > drivers/gpu/drm/radeon/r600_cs.c | 292 ++++++++++++++++++----------- > drivers/gpu/drm/radeon/r600d.h | 7 + > drivers/gpu/drm/radeon/reg_srcs/cayman | 1 - > drivers/gpu/drm/radeon/reg_srcs/evergreen | 1 - > drivers/gpu/drm/radeon/reg_srcs/r600 | 1 - > 7 files changed, 249 insertions(+), 116 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c > index 2ed17f7..6e269ff 100644 > --- a/drivers/gpu/drm/radeon/evergreen_cs.c > +++ b/drivers/gpu/drm/radeon/evergreen_cs.c > @@ -85,6 +85,9 @@ struct evergreen_cs_track { > u32 db_s_write_offset; > struct radeon_bo *db_s_read_bo; > struct radeon_bo *db_s_write_bo; > + u32 htile_offset; > + u32 htile_surface; > + struct radeon_bo *htile_bo; > }; > > static u32 evergreen_cs_get_aray_mode(u32 tiling_flags) > @@ -155,6 +158,9 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track) > track->db_s_write_offset = 0xFFFFFFFF; > track->db_s_read_bo = NULL; > track->db_s_write_bo = NULL; > + track->htile_bo = NULL; > + track->htile_offset = 0xFFFFFFFF; > + track->htile_surface = 0; > > for (i = 0; i < 4; i++) { > track->vgt_strmout_size[i] = 0; > @@ -627,6 +633,40 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) > return -EINVAL; > } > > + /* hyperz */ > + if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) { > + unsigned long size; > + unsigned nbx, nby; > + > + if (track->htile_bo == NULL) { > + dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", > + __func__, __LINE__, track->db_z_info); > + return -EINVAL; > + } > + > + nbx = surf.nbx; > + nby = surf.nby; > + if (G_028ABC_HTILE_WIDTH(track->htile_surface)) { > + nbx = (nbx + 7) / 8; > + } else { > + nbx = (nbx + 3) / 4; > + } > + if (G_028ABC_HTILE_HEIGHT(track->htile_surface)) { > + nby = (nby + 7) / 8; > + } else { > + nby = (nby + 3) / 4; > + } > + size = nbx * nby * 4; > + size += track->htile_offset; > + > + if (size > radeon_bo_size(track->htile_bo)) { > + dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", > + __func__, __LINE__, radeon_bo_size(track->htile_bo), > + size, nbx, nby); > + return -EINVAL; > + } > + } > + > return 0; > } > > @@ -1611,6 +1651,21 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) > track->cb_color_base_last[tmp] = ib[idx]; > track->cb_color_bo[tmp] = reloc->robj; > break; > + case DB_HTILE_DATA_BASE: > + r = evergreen_cs_packet_next_reloc(p, &reloc); > + if (r) { > + dev_warn(p->dev, "bad SET_CONTEXT_REG " > + "0x%04X\n", reg); > + return -EINVAL; > + } > + track->htile_offset = radeon_get_ib_value(p, idx); > + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); > + track->htile_bo = reloc->robj; > + break; > + case DB_HTILE_SURFACE: > + /* 8x8 only */ > + track->htile_surface = radeon_get_ib_value(p, idx); > + break; > case CB_IMMED0_BASE: > case CB_IMMED1_BASE: > case CB_IMMED2_BASE: > @@ -1623,7 +1678,6 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) > case CB_IMMED9_BASE: > case CB_IMMED10_BASE: > case CB_IMMED11_BASE: > - case DB_HTILE_DATA_BASE: > case SQ_PGM_START_FS: > case SQ_PGM_START_ES: > case SQ_PGM_START_VS: > diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h > index eb5708c..b4d1c42 100644 > --- a/drivers/gpu/drm/radeon/evergreend.h > +++ b/drivers/gpu/drm/radeon/evergreend.h > @@ -991,6 +991,13 @@ > #define G_028008_SLICE_MAX(x) (((x) >> 13) & 0x7FF) > #define C_028008_SLICE_MAX 0xFF001FFF > #define DB_HTILE_DATA_BASE 0x28014 > +#define DB_HTILE_SURFACE 0x28abc > +#define S_028ABC_HTILE_WIDTH(x) (((x) & 0x1) << 0) > +#define G_028ABC_HTILE_WIDTH(x) (((x) >> 0) & 0x1) > +#define C_028ABC_HTILE_WIDTH 0xFFFFFFFE > +#define S_028ABC_HTILE_HEIGHT(x) (((x) & 0x1) << 1) > +#define G_028ABC_HTILE_HEIGHT(x) (((x) >> 1) & 0x1) > +#define C_028ABC_HTILE_HEIGHT 0xFFFFFFFD > #define DB_Z_INFO 0x28040 > # define Z_ARRAY_MODE(x) ((x) << 4) > # define DB_TILE_SPLIT(x) (((x) & 0x7) << 8) > diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c > index 5cbe948..23f8aca 100644 > --- a/drivers/gpu/drm/radeon/r600_cs.c > +++ b/drivers/gpu/drm/radeon/r600_cs.c > @@ -74,6 +74,9 @@ struct r600_cs_track { > u32 db_offset; > struct radeon_bo *db_bo; > u64 db_bo_mc; > + struct radeon_bo *htile_bo; > + u64 htile_offset; > + u32 htile_surface; > }; > > #define FMT_8_BIT(fmt, vc) [fmt] = { 1, 1, 1, vc, CHIP_R600 } > @@ -315,6 +318,9 @@ static void r600_cs_track_init(struct r600_cs_track *track) > track->db_depth_size = 0xFFFFFFFF; > track->db_depth_size_idx = 0; > track->db_depth_control = 0xFFFFFFFF; > + track->htile_bo = NULL; > + track->htile_offset = 0xFFFFFFFF; > + track->htile_surface = 0; > > for (i = 0; i < 4; i++) { > track->vgt_strmout_size[i] = 0; > @@ -447,12 +453,167 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) > return 0; > } > > +static int r600_cs_track_validate_db(struct radeon_cs_parser *p) > +{ > + struct r600_cs_track *track = p->track; > + u32 nviews, bpe, ntiles, size, slice_tile_max, tmp; > + u32 height_align, pitch_align, depth_align; > + u32 pitch = 8192; > + u32 height = 8192; > + u64 base_offset, base_align; > + struct array_mode_checker array_check; > + int array_mode; > + volatile u32 *ib = p->ib->ptr; > + > + if (track->db_bo == NULL) { > + dev_warn(p->dev, "z/stencil with no depth buffer\n"); > + return -EINVAL; > + } > + switch (G_028010_FORMAT(track->db_depth_info)) { > + case V_028010_DEPTH_16: > + bpe = 2; > + break; > + case V_028010_DEPTH_X8_24: > + case V_028010_DEPTH_8_24: > + case V_028010_DEPTH_X8_24_FLOAT: > + case V_028010_DEPTH_8_24_FLOAT: > + case V_028010_DEPTH_32_FLOAT: > + bpe = 4; > + break; > + case V_028010_DEPTH_X24_8_32_FLOAT: > + bpe = 8; > + break; > + default: > + dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info)); > + return -EINVAL; > + } > + if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { > + if (!track->db_depth_size_idx) { > + dev_warn(p->dev, "z/stencil buffer size not set\n"); > + return -EINVAL; > + } > + tmp = radeon_bo_size(track->db_bo) - track->db_offset; > + tmp = (tmp / bpe) >> 6; > + if (!tmp) { > + dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n", > + track->db_depth_size, bpe, track->db_offset, > + radeon_bo_size(track->db_bo)); > + return -EINVAL; > + } > + ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF); > + } else { > + size = radeon_bo_size(track->db_bo); > + /* pitch in pixels */ > + pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8; > + slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; > + slice_tile_max *= 64; > + height = slice_tile_max / pitch; > + if (height > 8192) > + height = 8192; > + base_offset = track->db_bo_mc + track->db_offset; > + array_mode = G_028010_ARRAY_MODE(track->db_depth_info); > + array_check.array_mode = array_mode; > + array_check.group_size = track->group_size; > + array_check.nbanks = track->nbanks; > + array_check.npipes = track->npipes; > + array_check.nsamples = track->nsamples; > + array_check.blocksize = bpe; > + if (r600_get_array_mode_alignment(&array_check, > + &pitch_align, &height_align, &depth_align, &base_align)) { > + dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, > + G_028010_ARRAY_MODE(track->db_depth_info), > + track->db_depth_info); > + return -EINVAL; > + } > + switch (array_mode) { > + case V_028010_ARRAY_1D_TILED_THIN1: > + /* don't break userspace */ > + height &= ~0x7; > + break; > + case V_028010_ARRAY_2D_TILED_THIN1: > + break; > + default: > + dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, > + G_028010_ARRAY_MODE(track->db_depth_info), > + track->db_depth_info); > + return -EINVAL; > + } > + > + if (!IS_ALIGNED(pitch, pitch_align)) { > + dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n", > + __func__, __LINE__, pitch, pitch_align, array_mode); > + return -EINVAL; > + } > + if (!IS_ALIGNED(height, height_align)) { > + dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n", > + __func__, __LINE__, height, height_align, array_mode); > + return -EINVAL; > + } > + if (!IS_ALIGNED(base_offset, base_align)) { > + dev_warn(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__, > + base_offset, base_align, array_mode); > + return -EINVAL; > + } > + > + ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; > + nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; > + tmp = ntiles * bpe * 64 * nviews; > + if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { > + dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", > + array_mode, > + track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, > + radeon_bo_size(track->db_bo)); > + return -EINVAL; > + } > + } > + > + /* hyperz */ > + if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) { > + unsigned long size; > + unsigned nbx, nby; > + > + if (track->htile_bo == NULL) { > + dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", > + __func__, __LINE__, track->db_depth_info); > + return -EINVAL; > + } > + if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { > + dev_warn(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n", > + __func__, __LINE__, track->db_depth_size); > + return -EINVAL; > + } > + > + nbx = pitch; > + nby = height; > + if (G_028D24_HTILE_WIDTH(track->htile_surface)) { > + nbx = (nbx + 7) / 8; > + } else { > + nbx = (nbx + 3) / 4; > + } > + if (G_028D24_HTILE_HEIGHT(track->htile_surface)) { > + nby = (nby + 7) / 8; > + } else { > + nby = (nby + 3) / 4; > + } > + size = nbx * nby * 4; > + size += track->htile_offset; > + > + if (size > radeon_bo_size(track->htile_bo)) { > + dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", > + __func__, __LINE__, radeon_bo_size(track->htile_bo), > + size, nbx, nby); > + return -EINVAL; > + } > + } > + > + return 0; > +} > + > static int r600_cs_track_check(struct radeon_cs_parser *p) > { > struct r600_cs_track *track = p->track; > u32 tmp; > int r, i; > - volatile u32 *ib = p->ib->ptr; > > /* on legacy kernel we don't perform advanced check */ > if (p->rdev == NULL) > @@ -497,121 +658,15 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) > return r; > } > } > + > /* Check depth buffer */ > if (G_028800_STENCIL_ENABLE(track->db_depth_control) || > G_028800_Z_ENABLE(track->db_depth_control)) { > - u32 nviews, bpe, ntiles, size, slice_tile_max; > - u32 height, height_align, pitch, pitch_align, depth_align; > - u64 base_offset, base_align; > - struct array_mode_checker array_check; > - int array_mode; > - > - if (track->db_bo == NULL) { > - dev_warn(p->dev, "z/stencil with no depth buffer\n"); > - return -EINVAL; > - } > - if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) { > - dev_warn(p->dev, "this kernel doesn't support z/stencil htile\n"); > - return -EINVAL; > - } > - switch (G_028010_FORMAT(track->db_depth_info)) { > - case V_028010_DEPTH_16: > - bpe = 2; > - break; > - case V_028010_DEPTH_X8_24: > - case V_028010_DEPTH_8_24: > - case V_028010_DEPTH_X8_24_FLOAT: > - case V_028010_DEPTH_8_24_FLOAT: > - case V_028010_DEPTH_32_FLOAT: > - bpe = 4; > - break; > - case V_028010_DEPTH_X24_8_32_FLOAT: > - bpe = 8; > - break; > - default: > - dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info)); > - return -EINVAL; > - } > - if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { > - if (!track->db_depth_size_idx) { > - dev_warn(p->dev, "z/stencil buffer size not set\n"); > - return -EINVAL; > - } > - tmp = radeon_bo_size(track->db_bo) - track->db_offset; > - tmp = (tmp / bpe) >> 6; > - if (!tmp) { > - dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n", > - track->db_depth_size, bpe, track->db_offset, > - radeon_bo_size(track->db_bo)); > - return -EINVAL; > - } > - ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF); > - } else { > - size = radeon_bo_size(track->db_bo); > - /* pitch in pixels */ > - pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8; > - slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; > - slice_tile_max *= 64; > - height = slice_tile_max / pitch; > - if (height > 8192) > - height = 8192; > - base_offset = track->db_bo_mc + track->db_offset; > - array_mode = G_028010_ARRAY_MODE(track->db_depth_info); > - array_check.array_mode = array_mode; > - array_check.group_size = track->group_size; > - array_check.nbanks = track->nbanks; > - array_check.npipes = track->npipes; > - array_check.nsamples = track->nsamples; > - array_check.blocksize = bpe; > - if (r600_get_array_mode_alignment(&array_check, > - &pitch_align, &height_align, &depth_align, &base_align)) { > - dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, > - G_028010_ARRAY_MODE(track->db_depth_info), > - track->db_depth_info); > - return -EINVAL; > - } > - switch (array_mode) { > - case V_028010_ARRAY_1D_TILED_THIN1: > - /* don't break userspace */ > - height &= ~0x7; > - break; > - case V_028010_ARRAY_2D_TILED_THIN1: > - break; > - default: > - dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, > - G_028010_ARRAY_MODE(track->db_depth_info), > - track->db_depth_info); > - return -EINVAL; > - } > - > - if (!IS_ALIGNED(pitch, pitch_align)) { > - dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n", > - __func__, __LINE__, pitch, pitch_align, array_mode); > - return -EINVAL; > - } > - if (!IS_ALIGNED(height, height_align)) { > - dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n", > - __func__, __LINE__, height, height_align, array_mode); > - return -EINVAL; > - } > - if (!IS_ALIGNED(base_offset, base_align)) { > - dev_warn(p->dev, "%s offset[%d] 0x%llx, 0x%llx, %d not aligned\n", __func__, i, > - base_offset, base_align, array_mode); > - return -EINVAL; > - } > - > - ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1; > - nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; > - tmp = ntiles * bpe * 64 * nviews; > - if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { > - dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", > - array_mode, > - track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, > - radeon_bo_size(track->db_bo)); > - return -EINVAL; > - } > - } > + r = r600_cs_track_validate_db(p); > + if (r) > + return r; > } > + > return 0; > } > > @@ -1209,6 +1264,19 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) > track->db_bo_mc = reloc->lobj.gpu_offset; > break; > case DB_HTILE_DATA_BASE: > + r = r600_cs_packet_next_reloc(p, &reloc); > + if (r) { > + dev_warn(p->dev, "bad SET_CONTEXT_REG " > + "0x%04X\n", reg); > + return -EINVAL; > + } > + track->htile_offset = radeon_get_ib_value(p, idx) << 8; > + ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); > + track->htile_bo = reloc->robj; > + break; > + case DB_HTILE_SURFACE: > + track->htile_surface = radeon_get_ib_value(p, idx); > + break; > case SQ_PGM_START_FS: > case SQ_PGM_START_ES: > case SQ_PGM_START_VS: > diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h > index 2ba460b..4bd003e 100644 > --- a/drivers/gpu/drm/radeon/r600d.h > +++ b/drivers/gpu/drm/radeon/r600d.h > @@ -195,6 +195,13 @@ > #define PREZ_MUST_WAIT_FOR_POSTZ_DONE (1 << 31) > #define DB_DEPTH_BASE 0x2800C > #define DB_HTILE_DATA_BASE 0x28014 > +#define DB_HTILE_SURFACE 0x28D24 > +#define S_028D24_HTILE_WIDTH(x) (((x) & 0x1) << 0) > +#define G_028D24_HTILE_WIDTH(x) (((x) >> 0) & 0x1) > +#define C_028D24_HTILE_WIDTH 0xFFFFFFFE > +#define S_028D24_HTILE_HEIGHT(x) (((x) & 0x1) << 1) > +#define G_028D24_HTILE_HEIGHT(x) (((x) >> 1) & 0x1) > +#define C_028D24_HTILE_HEIGHT 0xFFFFFFFD > #define DB_WATERMARKS 0x9838 > #define DEPTH_FREE(x) ((x) << 0) > #define DEPTH_FLUSH(x) ((x) << 5) > diff --git a/drivers/gpu/drm/radeon/reg_srcs/cayman b/drivers/gpu/drm/radeon/reg_srcs/cayman > index 7b526d3..0446d4e 100644 > --- a/drivers/gpu/drm/radeon/reg_srcs/cayman > +++ b/drivers/gpu/drm/radeon/reg_srcs/cayman > @@ -510,7 +510,6 @@ cayman 0x9400 > 0x00028AA8 IA_MULTI_VGT_PARAM > 0x00028AB4 VGT_REUSE_OFF > 0x00028AB8 VGT_VTX_CNT_EN > -0x00028ABC DB_HTILE_SURFACE > 0x00028AC0 DB_SRESULTS_COMPARE_STATE0 > 0x00028AC4 DB_SRESULTS_COMPARE_STATE1 > 0x00028AC8 DB_PRELOAD_CONTROL > diff --git a/drivers/gpu/drm/radeon/reg_srcs/evergreen b/drivers/gpu/drm/radeon/reg_srcs/evergreen > index 7f43394..d2eae01 100644 > --- a/drivers/gpu/drm/radeon/reg_srcs/evergreen > +++ b/drivers/gpu/drm/radeon/reg_srcs/evergreen > @@ -520,7 +520,6 @@ evergreen 0x9400 > 0x00028AA4 VGT_INSTANCE_STEP_RATE_1 > 0x00028AB4 VGT_REUSE_OFF > 0x00028AB8 VGT_VTX_CNT_EN > -0x00028ABC DB_HTILE_SURFACE > 0x00028AC0 DB_SRESULTS_COMPARE_STATE0 > 0x00028AC4 DB_SRESULTS_COMPARE_STATE1 > 0x00028AC8 DB_PRELOAD_CONTROL > diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600 > index 79d2455..0210665 100644 > --- a/drivers/gpu/drm/radeon/reg_srcs/r600 > +++ b/drivers/gpu/drm/radeon/reg_srcs/r600 > @@ -714,7 +714,6 @@ r600 0x9400 > 0x0000A710 TD_VS_SAMPLER17_BORDER_RED > 0x00009508 TA_CNTL_AUX > 0x0002802C DB_DEPTH_CLEAR > -0x00028D24 DB_HTILE_SURFACE > 0x00028D34 DB_PREFETCH_LIMIT > 0x00028D30 DB_PRELOAD_CONTROL > 0x00028D0C DB_RENDER_CONTROL > -- > 1.7.7.1 > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel