On Thu, Jun 28, 2012 at 5:50 PM, <alexdeucher@xxxxxxxxx> wrote: > From: Alex Deucher <alexander.deucher@xxxxxxx> > > Consolidate the CS functions to one section of the file. > Previously they were spread all around. > > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> Reviewed-by: Jerome Glisse <jglisse@xxxxxxxxxx> > --- > drivers/gpu/drm/radeon/r100.c | 2983 ++++++++++++++++++++--------------------- > 1 files changed, 1491 insertions(+), 1492 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c > index 35825bf..3fa82e1 100644 > --- a/drivers/gpu/drm/radeon/r100.c > +++ b/drivers/gpu/drm/radeon/r100.c > @@ -103,112 +103,6 @@ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) > * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 > */ > > -int r100_reloc_pitch_offset(struct radeon_cs_parser *p, > - struct radeon_cs_packet *pkt, > - unsigned idx, > - unsigned reg) > -{ > - int r; > - u32 tile_flags = 0; > - u32 tmp; > - struct radeon_cs_reloc *reloc; > - u32 value; > - > - r = r100_cs_packet_next_reloc(p, &reloc); > - if (r) { > - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", > - idx, reg); > - r100_cs_dump_packet(p, pkt); > - return r; > - } > - > - value = radeon_get_ib_value(p, idx); > - tmp = value & 0x003fffff; > - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); > - > - if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { > - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) > - tile_flags |= RADEON_DST_TILE_MACRO; > - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { > - if (reg == RADEON_SRC_PITCH_OFFSET) { > - DRM_ERROR("Cannot src blit from microtiled surface\n"); > - r100_cs_dump_packet(p, pkt); > - return -EINVAL; > - } > - tile_flags |= RADEON_DST_TILE_MICRO; > - } > - > - tmp |= tile_flags; > - p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; > - } else > - p->ib.ptr[idx] = (value & 0xffc00000) | tmp; > - return 0; > -} > - > -int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, > - struct radeon_cs_packet *pkt, > - int idx) > -{ > - unsigned c, i; > - struct radeon_cs_reloc *reloc; > - struct r100_cs_track *track; > - int r = 0; > - volatile uint32_t *ib; > - u32 idx_value; > - > - ib = p->ib.ptr; > - track = (struct r100_cs_track *)p->track; > - c = radeon_get_ib_value(p, idx++) & 0x1F; > - if (c > 16) { > - DRM_ERROR("Only 16 vertex buffers are allowed %d\n", > - pkt->opcode); > - r100_cs_dump_packet(p, pkt); > - return -EINVAL; > - } > - track->num_arrays = c; > - for (i = 0; i < (c - 1); i+=2, idx+=3) { > - r = r100_cs_packet_next_reloc(p, &reloc); > - if (r) { > - DRM_ERROR("No reloc for packet3 %d\n", > - pkt->opcode); > - r100_cs_dump_packet(p, pkt); > - return r; > - } > - idx_value = radeon_get_ib_value(p, idx); > - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); > - > - track->arrays[i + 0].esize = idx_value >> 8; > - track->arrays[i + 0].robj = reloc->robj; > - track->arrays[i + 0].esize &= 0x7F; > - r = r100_cs_packet_next_reloc(p, &reloc); > - if (r) { > - DRM_ERROR("No reloc for packet3 %d\n", > - pkt->opcode); > - r100_cs_dump_packet(p, pkt); > - return r; > - } > - ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); > - track->arrays[i + 1].robj = reloc->robj; > - track->arrays[i + 1].esize = idx_value >> 24; > - track->arrays[i + 1].esize &= 0x7F; > - } > - if (c & 1) { > - r = r100_cs_packet_next_reloc(p, &reloc); > - if (r) { > - DRM_ERROR("No reloc for packet3 %d\n", > - pkt->opcode); > - r100_cs_dump_packet(p, pkt); > - return r; > - } > - idx_value = radeon_get_ib_value(p, idx); > - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); > - track->arrays[i + 0].robj = reloc->robj; > - track->arrays[i + 0].esize = idx_value >> 8; > - track->arrays[i + 0].esize &= 0x7F; > - } > - return r; > -} > - > void r100_pre_page_flip(struct radeon_device *rdev, int crtc) > { > /* enable the pflip int */ > @@ -1206,6 +1100,112 @@ void r100_cp_disable(struct radeon_device *rdev) > /* > * CS functions > */ > +int r100_reloc_pitch_offset(struct radeon_cs_parser *p, > + struct radeon_cs_packet *pkt, > + unsigned idx, > + unsigned reg) > +{ > + int r; > + u32 tile_flags = 0; > + u32 tmp; > + struct radeon_cs_reloc *reloc; > + u32 value; > + > + r = r100_cs_packet_next_reloc(p, &reloc); > + if (r) { > + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", > + idx, reg); > + r100_cs_dump_packet(p, pkt); > + return r; > + } > + > + value = radeon_get_ib_value(p, idx); > + tmp = value & 0x003fffff; > + tmp += (((u32)reloc->lobj.gpu_offset) >> 10); > + > + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { > + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) > + tile_flags |= RADEON_DST_TILE_MACRO; > + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { > + if (reg == RADEON_SRC_PITCH_OFFSET) { > + DRM_ERROR("Cannot src blit from microtiled surface\n"); > + r100_cs_dump_packet(p, pkt); > + return -EINVAL; > + } > + tile_flags |= RADEON_DST_TILE_MICRO; > + } > + > + tmp |= tile_flags; > + p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; > + } else > + p->ib.ptr[idx] = (value & 0xffc00000) | tmp; > + return 0; > +} > + > +int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, > + struct radeon_cs_packet *pkt, > + int idx) > +{ > + unsigned c, i; > + struct radeon_cs_reloc *reloc; > + struct r100_cs_track *track; > + int r = 0; > + volatile uint32_t *ib; > + u32 idx_value; > + > + ib = p->ib.ptr; > + track = (struct r100_cs_track *)p->track; > + c = radeon_get_ib_value(p, idx++) & 0x1F; > + if (c > 16) { > + DRM_ERROR("Only 16 vertex buffers are allowed %d\n", > + pkt->opcode); > + r100_cs_dump_packet(p, pkt); > + return -EINVAL; > + } > + track->num_arrays = c; > + for (i = 0; i < (c - 1); i+=2, idx+=3) { > + r = r100_cs_packet_next_reloc(p, &reloc); > + if (r) { > + DRM_ERROR("No reloc for packet3 %d\n", > + pkt->opcode); > + r100_cs_dump_packet(p, pkt); > + return r; > + } > + idx_value = radeon_get_ib_value(p, idx); > + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); > + > + track->arrays[i + 0].esize = idx_value >> 8; > + track->arrays[i + 0].robj = reloc->robj; > + track->arrays[i + 0].esize &= 0x7F; > + r = r100_cs_packet_next_reloc(p, &reloc); > + if (r) { > + DRM_ERROR("No reloc for packet3 %d\n", > + pkt->opcode); > + r100_cs_dump_packet(p, pkt); > + return r; > + } > + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); > + track->arrays[i + 1].robj = reloc->robj; > + track->arrays[i + 1].esize = idx_value >> 24; > + track->arrays[i + 1].esize &= 0x7F; > + } > + if (c & 1) { > + r = r100_cs_packet_next_reloc(p, &reloc); > + if (r) { > + DRM_ERROR("No reloc for packet3 %d\n", > + pkt->opcode); > + r100_cs_dump_packet(p, pkt); > + return r; > + } > + idx_value = radeon_get_ib_value(p, idx); > + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); > + track->arrays[i + 0].robj = reloc->robj; > + track->arrays[i + 0].esize = idx_value >> 8; > + track->arrays[i + 0].esize &= 0x7F; > + } > + return r; > +} > + > int r100_cs_parse_packet0(struct radeon_cs_parser *p, > struct radeon_cs_packet *pkt, > const unsigned *auth, unsigned n, > @@ -2031,1590 +2031,1589 @@ int r100_cs_parse(struct radeon_cs_parser *p) > return 0; > } > > - > -/* > - * Global GPU functions > - */ > -void r100_errata(struct radeon_device *rdev) > +static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) > { > - rdev->pll_errata = 0; > - > - if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { > - rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; > - } > - > - if (rdev->family == CHIP_RV100 || > - rdev->family == CHIP_RS100 || > - rdev->family == CHIP_RS200) { > - rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; > - } > + DRM_ERROR("pitch %d\n", t->pitch); > + DRM_ERROR("use_pitch %d\n", t->use_pitch); > + DRM_ERROR("width %d\n", t->width); > + DRM_ERROR("width_11 %d\n", t->width_11); > + DRM_ERROR("height %d\n", t->height); > + DRM_ERROR("height_11 %d\n", t->height_11); > + DRM_ERROR("num levels %d\n", t->num_levels); > + DRM_ERROR("depth %d\n", t->txdepth); > + DRM_ERROR("bpp %d\n", t->cpp); > + DRM_ERROR("coordinate type %d\n", t->tex_coord_type); > + DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); > + DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); > + DRM_ERROR("compress format %d\n", t->compress_format); > } > > -/* Wait for vertical sync on primary CRTC */ > -void r100_gpu_wait_for_vsync(struct radeon_device *rdev) > +static int r100_track_compress_size(int compress_format, int w, int h) > { > - uint32_t crtc_gen_cntl, tmp; > - int i; > + int block_width, block_height, block_bytes; > + int wblocks, hblocks; > + int min_wblocks; > + int sz; > > - crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); > - if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || > - !(crtc_gen_cntl & RADEON_CRTC_EN)) { > - return; > - } > - /* Clear the CRTC_VBLANK_SAVE bit */ > - WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); > - for (i = 0; i < rdev->usec_timeout; i++) { > - tmp = RREG32(RADEON_CRTC_STATUS); > - if (tmp & RADEON_CRTC_VBLANK_SAVE) { > - return; > - } > - DRM_UDELAY(1); > + block_width = 4; > + block_height = 4; > + > + switch (compress_format) { > + case R100_TRACK_COMP_DXT1: > + block_bytes = 8; > + min_wblocks = 4; > + break; > + default: > + case R100_TRACK_COMP_DXT35: > + block_bytes = 16; > + min_wblocks = 2; > + break; > } > + > + hblocks = (h + block_height - 1) / block_height; > + wblocks = (w + block_width - 1) / block_width; > + if (wblocks < min_wblocks) > + wblocks = min_wblocks; > + sz = wblocks * hblocks * block_bytes; > + return sz; > } > > -/* Wait for vertical sync on secondary CRTC */ > -void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) > +static int r100_cs_track_cube(struct radeon_device *rdev, > + struct r100_cs_track *track, unsigned idx) > { > - uint32_t crtc2_gen_cntl, tmp; > - int i; > + unsigned face, w, h; > + struct radeon_bo *cube_robj; > + unsigned long size; > + unsigned compress_format = track->textures[idx].compress_format; > > - crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); > - if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || > - !(crtc2_gen_cntl & RADEON_CRTC2_EN)) > - return; > + for (face = 0; face < 5; face++) { > + cube_robj = track->textures[idx].cube_info[face].robj; > + w = track->textures[idx].cube_info[face].width; > + h = track->textures[idx].cube_info[face].height; > > - /* Clear the CRTC_VBLANK_SAVE bit */ > - WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); > - for (i = 0; i < rdev->usec_timeout; i++) { > - tmp = RREG32(RADEON_CRTC2_STATUS); > - if (tmp & RADEON_CRTC2_VBLANK_SAVE) { > - return; > + if (compress_format) { > + size = r100_track_compress_size(compress_format, w, h); > + } else > + size = w * h; > + size *= track->textures[idx].cpp; > + > + size += track->textures[idx].cube_info[face].offset; > + > + if (size > radeon_bo_size(cube_robj)) { > + DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", > + size, radeon_bo_size(cube_robj)); > + r100_cs_track_texture_print(&track->textures[idx]); > + return -1; > } > - DRM_UDELAY(1); > } > + return 0; > } > > -int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) > +static int r100_cs_track_texture_check(struct radeon_device *rdev, > + struct r100_cs_track *track) > { > - unsigned i; > - uint32_t tmp; > + struct radeon_bo *robj; > + unsigned long size; > + unsigned u, i, w, h, d; > + int ret; > > - for (i = 0; i < rdev->usec_timeout; i++) { > - tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; > - if (tmp >= n) { > - return 0; > + for (u = 0; u < track->num_texture; u++) { > + if (!track->textures[u].enabled) > + continue; > + if (track->textures[u].lookup_disable) > + continue; > + robj = track->textures[u].robj; > + if (robj == NULL) { > + DRM_ERROR("No texture bound to unit %u\n", u); > + return -EINVAL; > } > - DRM_UDELAY(1); > - } > - return -1; > -} > + size = 0; > + for (i = 0; i <= track->textures[u].num_levels; i++) { > + if (track->textures[u].use_pitch) { > + if (rdev->family < CHIP_R300) > + w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); > + else > + w = track->textures[u].pitch / (1 << i); > + } else { > + w = track->textures[u].width; > + if (rdev->family >= CHIP_RV515) > + w |= track->textures[u].width_11; > + w = w / (1 << i); > + if (track->textures[u].roundup_w) > + w = roundup_pow_of_two(w); > + } > + h = track->textures[u].height; > + if (rdev->family >= CHIP_RV515) > + h |= track->textures[u].height_11; > + h = h / (1 << i); > + if (track->textures[u].roundup_h) > + h = roundup_pow_of_two(h); > + if (track->textures[u].tex_coord_type == 1) { > + d = (1 << track->textures[u].txdepth) / (1 << i); > + if (!d) > + d = 1; > + } else { > + d = 1; > + } > + if (track->textures[u].compress_format) { > > -int r100_gui_wait_for_idle(struct radeon_device *rdev) > -{ > - unsigned i; > - uint32_t tmp; > + size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; > + /* compressed textures are block based */ > + } else > + size += w * h * d; > + } > + size *= track->textures[u].cpp; > > - if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { > - printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" > - " Bad things might happen.\n"); > - } > - for (i = 0; i < rdev->usec_timeout; i++) { > - tmp = RREG32(RADEON_RBBM_STATUS); > - if (!(tmp & RADEON_RBBM_ACTIVE)) { > - return 0; > + switch (track->textures[u].tex_coord_type) { > + case 0: > + case 1: > + break; > + case 2: > + if (track->separate_cube) { > + ret = r100_cs_track_cube(rdev, track, u); > + if (ret) > + return ret; > + } else > + size *= 6; > + break; > + default: > + DRM_ERROR("Invalid texture coordinate type %u for unit " > + "%u\n", track->textures[u].tex_coord_type, u); > + return -EINVAL; > + } > + if (size > radeon_bo_size(robj)) { > + DRM_ERROR("Texture of unit %u needs %lu bytes but is " > + "%lu\n", u, size, radeon_bo_size(robj)); > + r100_cs_track_texture_print(&track->textures[u]); > + return -EINVAL; > } > - DRM_UDELAY(1); > } > - return -1; > + return 0; > } > > -int r100_mc_wait_for_idle(struct radeon_device *rdev) > +int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) > { > unsigned i; > - uint32_t tmp; > + unsigned long size; > + unsigned prim_walk; > + unsigned nverts; > + unsigned num_cb = track->cb_dirty ? track->num_cb : 0; > > - for (i = 0; i < rdev->usec_timeout; i++) { > - /* read MC_STATUS */ > - tmp = RREG32(RADEON_MC_STATUS); > - if (tmp & RADEON_MC_IDLE) { > - return 0; > + if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && > + !track->blend_read_enable) > + num_cb = 0; > + > + for (i = 0; i < num_cb; i++) { > + if (track->cb[i].robj == NULL) { > + DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); > + return -EINVAL; > + } > + size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; > + size += track->cb[i].offset; > + if (size > radeon_bo_size(track->cb[i].robj)) { > + DRM_ERROR("[drm] Buffer too small for color buffer %d " > + "(need %lu have %lu) !\n", i, size, > + radeon_bo_size(track->cb[i].robj)); > + DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", > + i, track->cb[i].pitch, track->cb[i].cpp, > + track->cb[i].offset, track->maxy); > + return -EINVAL; > } > - DRM_UDELAY(1); > } > - return -1; > -} > - > -bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) > -{ > - u32 rbbm_status; > + track->cb_dirty = false; > > - rbbm_status = RREG32(R_000E40_RBBM_STATUS); > - if (!G_000E40_GUI_ACTIVE(rbbm_status)) { > - radeon_ring_lockup_update(ring); > - return false; > + if (track->zb_dirty && track->z_enabled) { > + if (track->zb.robj == NULL) { > + DRM_ERROR("[drm] No buffer for z buffer !\n"); > + return -EINVAL; > + } > + size = track->zb.pitch * track->zb.cpp * track->maxy; > + size += track->zb.offset; > + if (size > radeon_bo_size(track->zb.robj)) { > + DRM_ERROR("[drm] Buffer too small for z buffer " > + "(need %lu have %lu) !\n", size, > + radeon_bo_size(track->zb.robj)); > + DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", > + track->zb.pitch, track->zb.cpp, > + track->zb.offset, track->maxy); > + return -EINVAL; > + } > } > - /* force CP activities */ > - radeon_ring_force_activity(rdev, ring); > - return radeon_ring_test_lockup(rdev, ring); > -} > + track->zb_dirty = false; > > -void r100_bm_disable(struct radeon_device *rdev) > -{ > - u32 tmp; > + if (track->aa_dirty && track->aaresolve) { > + if (track->aa.robj == NULL) { > + DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); > + return -EINVAL; > + } > + /* I believe the format comes from colorbuffer0. */ > + size = track->aa.pitch * track->cb[0].cpp * track->maxy; > + size += track->aa.offset; > + if (size > radeon_bo_size(track->aa.robj)) { > + DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " > + "(need %lu have %lu) !\n", i, size, > + radeon_bo_size(track->aa.robj)); > + DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", > + i, track->aa.pitch, track->cb[0].cpp, > + track->aa.offset, track->maxy); > + return -EINVAL; > + } > + } > + track->aa_dirty = false; > > - /* disable bus mastering */ > - tmp = RREG32(R_000030_BUS_CNTL); > - WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044); > - mdelay(1); > - WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042); > - mdelay(1); > - WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); > - tmp = RREG32(RADEON_BUS_CNTL); > - mdelay(1); > - pci_clear_master(rdev->pdev); > - mdelay(1); > -} > - > -int r100_asic_reset(struct radeon_device *rdev) > -{ > - struct r100_mc_save save; > - u32 status, tmp; > - int ret = 0; > - > - status = RREG32(R_000E40_RBBM_STATUS); > - if (!G_000E40_GUI_ACTIVE(status)) { > - return 0; > - } > - r100_mc_stop(rdev, &save); > - status = RREG32(R_000E40_RBBM_STATUS); > - dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); > - /* stop CP */ > - WREG32(RADEON_CP_CSQ_CNTL, 0); > - tmp = RREG32(RADEON_CP_RB_CNTL); > - WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); > - WREG32(RADEON_CP_RB_RPTR_WR, 0); > - WREG32(RADEON_CP_RB_WPTR, 0); > - WREG32(RADEON_CP_RB_CNTL, tmp); > - /* save PCI state */ > - pci_save_state(rdev->pdev); > - /* disable bus mastering */ > - r100_bm_disable(rdev); > - WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) | > - S_0000F0_SOFT_RESET_RE(1) | > - S_0000F0_SOFT_RESET_PP(1) | > - S_0000F0_SOFT_RESET_RB(1)); > - RREG32(R_0000F0_RBBM_SOFT_RESET); > - mdelay(500); > - WREG32(R_0000F0_RBBM_SOFT_RESET, 0); > - mdelay(1); > - status = RREG32(R_000E40_RBBM_STATUS); > - dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); > - /* reset CP */ > - WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1)); > - RREG32(R_0000F0_RBBM_SOFT_RESET); > - mdelay(500); > - WREG32(R_0000F0_RBBM_SOFT_RESET, 0); > - mdelay(1); > - status = RREG32(R_000E40_RBBM_STATUS); > - dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); > - /* restore PCI & busmastering */ > - pci_restore_state(rdev->pdev); > - r100_enable_bm(rdev); > - /* Check if GPU is idle */ > - if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) || > - G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { > - dev_err(rdev->dev, "failed to reset GPU\n"); > - ret = -1; > - } else > - dev_info(rdev->dev, "GPU reset succeed\n"); > - r100_mc_resume(rdev, &save); > - return ret; > -} > - > -void r100_set_common_regs(struct radeon_device *rdev) > -{ > - struct drm_device *dev = rdev->ddev; > - bool force_dac2 = false; > - u32 tmp; > - > - /* set these so they don't interfere with anything */ > - WREG32(RADEON_OV0_SCALE_CNTL, 0); > - WREG32(RADEON_SUBPIC_CNTL, 0); > - WREG32(RADEON_VIPH_CONTROL, 0); > - WREG32(RADEON_I2C_CNTL_1, 0); > - WREG32(RADEON_DVI_I2C_CNTL_1, 0); > - WREG32(RADEON_CAP0_TRIG_CNTL, 0); > - WREG32(RADEON_CAP1_TRIG_CNTL, 0); > - > - /* always set up dac2 on rn50 and some rv100 as lots > - * of servers seem to wire it up to a VGA port but > - * don't report it in the bios connector > - * table. > - */ > - switch (dev->pdev->device) { > - /* RN50 */ > - case 0x515e: > - case 0x5969: > - force_dac2 = true; > - break; > - /* RV100*/ > - case 0x5159: > - case 0x515a: > - /* DELL triple head servers */ > - if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) && > - ((dev->pdev->subsystem_device == 0x016c) || > - (dev->pdev->subsystem_device == 0x016d) || > - (dev->pdev->subsystem_device == 0x016e) || > - (dev->pdev->subsystem_device == 0x016f) || > - (dev->pdev->subsystem_device == 0x0170) || > - (dev->pdev->subsystem_device == 0x017d) || > - (dev->pdev->subsystem_device == 0x017e) || > - (dev->pdev->subsystem_device == 0x0183) || > - (dev->pdev->subsystem_device == 0x018a) || > - (dev->pdev->subsystem_device == 0x019a))) > - force_dac2 = true; > - break; > - } > - > - if (force_dac2) { > - u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG); > - u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL); > - u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2); > - > - /* For CRT on DAC2, don't turn it on if BIOS didn't > - enable it, even it's detected. > - */ > - > - /* force it to crtc0 */ > - dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL; > - dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL; > - disp_hw_debug |= RADEON_CRT2_DISP1_SEL; > - > - /* set up the TV DAC */ > - tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL | > - RADEON_TV_DAC_STD_MASK | > - RADEON_TV_DAC_RDACPD | > - RADEON_TV_DAC_GDACPD | > - RADEON_TV_DAC_BDACPD | > - RADEON_TV_DAC_BGADJ_MASK | > - RADEON_TV_DAC_DACADJ_MASK); > - tv_dac_cntl |= (RADEON_TV_DAC_NBLANK | > - RADEON_TV_DAC_NHOLD | > - RADEON_TV_DAC_STD_PS2 | > - (0x58 << 16)); > - > - WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl); > - WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug); > - WREG32(RADEON_DAC_CNTL2, dac2_cntl); > + prim_walk = (track->vap_vf_cntl >> 4) & 0x3; > + if (track->vap_vf_cntl & (1 << 14)) { > + nverts = track->vap_alt_nverts; > + } else { > + nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; > } > - > - /* switch PM block to ACPI mode */ > - tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL); > - tmp &= ~RADEON_PM_MODE_SEL; > - WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp); > - > -} > - > -/* > - * VRAM info > - */ > -static void r100_vram_get_type(struct radeon_device *rdev) > -{ > - uint32_t tmp; > - > - rdev->mc.vram_is_ddr = false; > - if (rdev->flags & RADEON_IS_IGP) > - rdev->mc.vram_is_ddr = true; > - else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) > - rdev->mc.vram_is_ddr = true; > - if ((rdev->family == CHIP_RV100) || > - (rdev->family == CHIP_RS100) || > - (rdev->family == CHIP_RS200)) { > - tmp = RREG32(RADEON_MEM_CNTL); > - if (tmp & RV100_HALF_MODE) { > - rdev->mc.vram_width = 32; > - } else { > - rdev->mc.vram_width = 64; > + switch (prim_walk) { > + case 1: > + for (i = 0; i < track->num_arrays; i++) { > + size = track->arrays[i].esize * track->max_indx * 4; > + if (track->arrays[i].robj == NULL) { > + DRM_ERROR("(PW %u) Vertex array %u no buffer " > + "bound\n", prim_walk, i); > + return -EINVAL; > + } > + if (size > radeon_bo_size(track->arrays[i].robj)) { > + dev_err(rdev->dev, "(PW %u) Vertex array %u " > + "need %lu dwords have %lu dwords\n", > + prim_walk, i, size >> 2, > + radeon_bo_size(track->arrays[i].robj) > + >> 2); > + DRM_ERROR("Max indices %u\n", track->max_indx); > + return -EINVAL; > + } > } > - if (rdev->flags & RADEON_SINGLE_CRTC) { > - rdev->mc.vram_width /= 4; > - rdev->mc.vram_is_ddr = true; > + break; > + case 2: > + for (i = 0; i < track->num_arrays; i++) { > + size = track->arrays[i].esize * (nverts - 1) * 4; > + if (track->arrays[i].robj == NULL) { > + DRM_ERROR("(PW %u) Vertex array %u no buffer " > + "bound\n", prim_walk, i); > + return -EINVAL; > + } > + if (size > radeon_bo_size(track->arrays[i].robj)) { > + dev_err(rdev->dev, "(PW %u) Vertex array %u " > + "need %lu dwords have %lu dwords\n", > + prim_walk, i, size >> 2, > + radeon_bo_size(track->arrays[i].robj) > + >> 2); > + return -EINVAL; > + } > } > - } else if (rdev->family <= CHIP_RV280) { > - tmp = RREG32(RADEON_MEM_CNTL); > - if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { > - rdev->mc.vram_width = 128; > - } else { > - rdev->mc.vram_width = 64; > + break; > + case 3: > + size = track->vtx_size * nverts; > + if (size != track->immd_dwords) { > + DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", > + track->immd_dwords, size); > + DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", > + nverts, track->vtx_size); > + return -EINVAL; > } > - } else { > - /* newer IGPs */ > - rdev->mc.vram_width = 128; > - } > -} > - > -static u32 r100_get_accessible_vram(struct radeon_device *rdev) > -{ > - u32 aper_size; > - u8 byte; > - > - aper_size = RREG32(RADEON_CONFIG_APER_SIZE); > - > - /* Set HDP_APER_CNTL only on cards that are known not to be broken, > - * that is has the 2nd generation multifunction PCI interface > - */ > - if (rdev->family == CHIP_RV280 || > - rdev->family >= CHIP_RV350) { > - WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, > - ~RADEON_HDP_APER_CNTL); > - DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); > - return aper_size * 2; > + break; > + default: > + DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", > + prim_walk); > + return -EINVAL; > } > > - /* Older cards have all sorts of funny issues to deal with. First > - * check if it's a multifunction card by reading the PCI config > - * header type... Limit those to one aperture size > - */ > - pci_read_config_byte(rdev->pdev, 0xe, &byte); > - if (byte & 0x80) { > - DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); > - DRM_INFO("Limiting VRAM to one aperture\n"); > - return aper_size; > + if (track->tex_dirty) { > + track->tex_dirty = false; > + return r100_cs_track_texture_check(rdev, track); > } > - > - /* Single function older card. We read HDP_APER_CNTL to see how the BIOS > - * have set it up. We don't write this as it's broken on some ASICs but > - * we expect the BIOS to have done the right thing (might be too optimistic...) > - */ > - if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) > - return aper_size * 2; > - return aper_size; > + return 0; > } > > -void r100_vram_init_sizes(struct radeon_device *rdev) > +void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) > { > - u64 config_aper_size; > + unsigned i, face; > > - /* work out accessible VRAM */ > - rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); > - rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); > - rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev); > - /* FIXME we don't use the second aperture yet when we could use it */ > - if (rdev->mc.visible_vram_size > rdev->mc.aper_size) > - rdev->mc.visible_vram_size = rdev->mc.aper_size; > - config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); > - if (rdev->flags & RADEON_IS_IGP) { > - uint32_t tom; > - /* read NB_TOM to get the amount of ram stolen for the GPU */ > - tom = RREG32(RADEON_NB_TOM); > - rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); > - WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); > - rdev->mc.mc_vram_size = rdev->mc.real_vram_size; > - } else { > - rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); > - /* Some production boards of m6 will report 0 > - * if it's 8 MB > - */ > - if (rdev->mc.real_vram_size == 0) { > - rdev->mc.real_vram_size = 8192 * 1024; > - WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); > - } > - /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - > - * Novell bug 204882 + along with lots of ubuntu ones > - */ > - if (rdev->mc.aper_size > config_aper_size) > - config_aper_size = rdev->mc.aper_size; > + track->cb_dirty = true; > + track->zb_dirty = true; > + track->tex_dirty = true; > + track->aa_dirty = true; > > - if (config_aper_size > rdev->mc.real_vram_size) > - rdev->mc.mc_vram_size = config_aper_size; > + if (rdev->family < CHIP_R300) { > + track->num_cb = 1; > + if (rdev->family <= CHIP_RS200) > + track->num_texture = 3; > else > - rdev->mc.mc_vram_size = rdev->mc.real_vram_size; > - } > -} > - > -void r100_vga_set_state(struct radeon_device *rdev, bool state) > -{ > - uint32_t temp; > - > - temp = RREG32(RADEON_CONFIG_CNTL); > - if (state == false) { > - temp &= ~RADEON_CFG_VGA_RAM_EN; > - temp |= RADEON_CFG_VGA_IO_DIS; > + track->num_texture = 6; > + track->maxy = 2048; > + track->separate_cube = 1; > } else { > - temp &= ~RADEON_CFG_VGA_IO_DIS; > + track->num_cb = 4; > + track->num_texture = 16; > + track->maxy = 4096; > + track->separate_cube = 0; > + track->aaresolve = false; > + track->aa.robj = NULL; > } > - WREG32(RADEON_CONFIG_CNTL, temp); > -} > - > -void r100_mc_init(struct radeon_device *rdev) > -{ > - u64 base; > > - r100_vram_get_type(rdev); > - r100_vram_init_sizes(rdev); > - base = rdev->mc.aper_base; > - if (rdev->flags & RADEON_IS_IGP) > - base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16; > - radeon_vram_location(rdev, &rdev->mc, base); > - rdev->mc.gtt_base_align = 0; > - if (!(rdev->flags & RADEON_IS_AGP)) > - radeon_gtt_location(rdev, &rdev->mc); > - radeon_update_bandwidth_info(rdev); > -} > - > - > -/* > - * Indirect registers accessor > - */ > -void r100_pll_errata_after_index(struct radeon_device *rdev) > -{ > - if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) { > - (void)RREG32(RADEON_CLOCK_CNTL_DATA); > - (void)RREG32(RADEON_CRTC_GEN_CNTL); > + for (i = 0; i < track->num_cb; i++) { > + track->cb[i].robj = NULL; > + track->cb[i].pitch = 8192; > + track->cb[i].cpp = 16; > + track->cb[i].offset = 0; > } > -} > - > -static void r100_pll_errata_after_data(struct radeon_device *rdev) > -{ > - /* This workarounds is necessary on RV100, RS100 and RS200 chips > - * or the chip could hang on a subsequent access > - */ > - if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { > - mdelay(5); > + track->z_enabled = true; > + track->zb.robj = NULL; > + track->zb.pitch = 8192; > + track->zb.cpp = 4; > + track->zb.offset = 0; > + track->vtx_size = 0x7F; > + track->immd_dwords = 0xFFFFFFFFUL; > + track->num_arrays = 11; > + track->max_indx = 0x00FFFFFFUL; > + for (i = 0; i < track->num_arrays; i++) { > + track->arrays[i].robj = NULL; > + track->arrays[i].esize = 0x7F; > } > - > - /* This function is required to workaround a hardware bug in some (all?) > - * revisions of the R300. This workaround should be called after every > - * CLOCK_CNTL_INDEX register access. If not, register reads afterward > - * may not be correct. > - */ > - if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { > - uint32_t save, tmp; > - > - save = RREG32(RADEON_CLOCK_CNTL_INDEX); > - tmp = save & ~(0x3f | RADEON_PLL_WR_EN); > - WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); > - tmp = RREG32(RADEON_CLOCK_CNTL_DATA); > - WREG32(RADEON_CLOCK_CNTL_INDEX, save); > + for (i = 0; i < track->num_texture; i++) { > + track->textures[i].compress_format = R100_TRACK_COMP_NONE; > + track->textures[i].pitch = 16536; > + track->textures[i].width = 16536; > + track->textures[i].height = 16536; > + track->textures[i].width_11 = 1 << 11; > + track->textures[i].height_11 = 1 << 11; > + track->textures[i].num_levels = 12; > + if (rdev->family <= CHIP_RS200) { > + track->textures[i].tex_coord_type = 0; > + track->textures[i].txdepth = 0; > + } else { > + track->textures[i].txdepth = 16; > + track->textures[i].tex_coord_type = 1; > + } > + track->textures[i].cpp = 64; > + track->textures[i].robj = NULL; > + /* CS IB emission code makes sure texture unit are disabled */ > + track->textures[i].enabled = false; > + track->textures[i].lookup_disable = false; > + track->textures[i].roundup_w = true; > + track->textures[i].roundup_h = true; > + if (track->separate_cube) > + for (face = 0; face < 5; face++) { > + track->textures[i].cube_info[face].robj = NULL; > + track->textures[i].cube_info[face].width = 16536; > + track->textures[i].cube_info[face].height = 16536; > + track->textures[i].cube_info[face].offset = 0; > + } > } > } > > -uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) > +/* > + * Global GPU functions > + */ > +void r100_errata(struct radeon_device *rdev) > { > - uint32_t data; > + rdev->pll_errata = 0; > > - WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); > - r100_pll_errata_after_index(rdev); > - data = RREG32(RADEON_CLOCK_CNTL_DATA); > - r100_pll_errata_after_data(rdev); > - return data; > -} > + if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { > + rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; > + } > > -void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) > -{ > - WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); > - r100_pll_errata_after_index(rdev); > - WREG32(RADEON_CLOCK_CNTL_DATA, v); > - r100_pll_errata_after_data(rdev); > + if (rdev->family == CHIP_RV100 || > + rdev->family == CHIP_RS100 || > + rdev->family == CHIP_RS200) { > + rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; > + } > } > > -void r100_set_safe_registers(struct radeon_device *rdev) > +/* Wait for vertical sync on primary CRTC */ > +void r100_gpu_wait_for_vsync(struct radeon_device *rdev) > { > - if (ASIC_IS_RN50(rdev)) { > - rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; > - rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); > - } else if (rdev->family < CHIP_R200) { > - rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; > - rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); > - } else { > - r200_set_safe_registers(rdev); > + uint32_t crtc_gen_cntl, tmp; > + int i; > + > + crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); > + if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || > + !(crtc_gen_cntl & RADEON_CRTC_EN)) { > + return; > + } > + /* Clear the CRTC_VBLANK_SAVE bit */ > + WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); > + for (i = 0; i < rdev->usec_timeout; i++) { > + tmp = RREG32(RADEON_CRTC_STATUS); > + if (tmp & RADEON_CRTC_VBLANK_SAVE) { > + return; > + } > + DRM_UDELAY(1); > } > } > > -/* > - * Debugfs info > - */ > -#if defined(CONFIG_DEBUG_FS) > -static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) > +/* Wait for vertical sync on secondary CRTC */ > +void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) > { > - struct drm_info_node *node = (struct drm_info_node *) m->private; > - struct drm_device *dev = node->minor->dev; > - struct radeon_device *rdev = dev->dev_private; > - uint32_t reg, value; > - unsigned i; > + uint32_t crtc2_gen_cntl, tmp; > + int i; > > - seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); > - seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); > - seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); > - for (i = 0; i < 64; i++) { > - WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); > - reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; > - WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); > - value = RREG32(RADEON_RBBM_CMDFIFO_DATA); > - seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); > + crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); > + if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || > + !(crtc2_gen_cntl & RADEON_CRTC2_EN)) > + return; > + > + /* Clear the CRTC_VBLANK_SAVE bit */ > + WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); > + for (i = 0; i < rdev->usec_timeout; i++) { > + tmp = RREG32(RADEON_CRTC2_STATUS); > + if (tmp & RADEON_CRTC2_VBLANK_SAVE) { > + return; > + } > + DRM_UDELAY(1); > } > - return 0; > } > > -static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) > +int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) > { > - struct drm_info_node *node = (struct drm_info_node *) m->private; > - struct drm_device *dev = node->minor->dev; > - struct radeon_device *rdev = dev->dev_private; > - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > - uint32_t rdp, wdp; > - unsigned count, i, j; > + unsigned i; > + uint32_t tmp; > > - radeon_ring_free_size(rdev, ring); > - rdp = RREG32(RADEON_CP_RB_RPTR); > - wdp = RREG32(RADEON_CP_RB_WPTR); > - count = (rdp + ring->ring_size - wdp) & ring->ptr_mask; > - seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); > - seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); > - seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); > - seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); > - seq_printf(m, "%u dwords in ring\n", count); > - for (j = 0; j <= count; j++) { > - i = (rdp + j) & ring->ptr_mask; > - seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); > + for (i = 0; i < rdev->usec_timeout; i++) { > + tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; > + if (tmp >= n) { > + return 0; > + } > + DRM_UDELAY(1); > } > - return 0; > + return -1; > } > > - > -static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) > +int r100_gui_wait_for_idle(struct radeon_device *rdev) > { > - struct drm_info_node *node = (struct drm_info_node *) m->private; > - struct drm_device *dev = node->minor->dev; > - struct radeon_device *rdev = dev->dev_private; > - uint32_t csq_stat, csq2_stat, tmp; > - unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; > unsigned i; > + uint32_t tmp; > > - seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); > - seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); > - csq_stat = RREG32(RADEON_CP_CSQ_STAT); > - csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); > - r_rptr = (csq_stat >> 0) & 0x3ff; > - r_wptr = (csq_stat >> 10) & 0x3ff; > - ib1_rptr = (csq_stat >> 20) & 0x3ff; > - ib1_wptr = (csq2_stat >> 0) & 0x3ff; > - ib2_rptr = (csq2_stat >> 10) & 0x3ff; > - ib2_wptr = (csq2_stat >> 20) & 0x3ff; > - seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); > - seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); > - seq_printf(m, "Ring rptr %u\n", r_rptr); > - seq_printf(m, "Ring wptr %u\n", r_wptr); > - seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); > - seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); > - seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); > - seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); > - /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms > - * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ > - seq_printf(m, "Ring fifo:\n"); > - for (i = 0; i < 256; i++) { > - WREG32(RADEON_CP_CSQ_ADDR, i << 2); > - tmp = RREG32(RADEON_CP_CSQ_DATA); > - seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); > - } > - seq_printf(m, "Indirect1 fifo:\n"); > - for (i = 256; i <= 512; i++) { > - WREG32(RADEON_CP_CSQ_ADDR, i << 2); > - tmp = RREG32(RADEON_CP_CSQ_DATA); > - seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); > + if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { > + printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" > + " Bad things might happen.\n"); > } > - seq_printf(m, "Indirect2 fifo:\n"); > - for (i = 640; i < ib1_wptr; i++) { > - WREG32(RADEON_CP_CSQ_ADDR, i << 2); > - tmp = RREG32(RADEON_CP_CSQ_DATA); > - seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); > + for (i = 0; i < rdev->usec_timeout; i++) { > + tmp = RREG32(RADEON_RBBM_STATUS); > + if (!(tmp & RADEON_RBBM_ACTIVE)) { > + return 0; > + } > + DRM_UDELAY(1); > } > - return 0; > + return -1; > } > > -static int r100_debugfs_mc_info(struct seq_file *m, void *data) > +int r100_mc_wait_for_idle(struct radeon_device *rdev) > { > - struct drm_info_node *node = (struct drm_info_node *) m->private; > - struct drm_device *dev = node->minor->dev; > - struct radeon_device *rdev = dev->dev_private; > + unsigned i; > uint32_t tmp; > > - tmp = RREG32(RADEON_CONFIG_MEMSIZE); > - seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); > - tmp = RREG32(RADEON_MC_FB_LOCATION); > - seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); > - tmp = RREG32(RADEON_BUS_CNTL); > - seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); > - tmp = RREG32(RADEON_MC_AGP_LOCATION); > - seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); > - tmp = RREG32(RADEON_AGP_BASE); > - seq_printf(m, "AGP_BASE 0x%08x\n", tmp); > - tmp = RREG32(RADEON_HOST_PATH_CNTL); > - seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); > - tmp = RREG32(0x01D0); > - seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); > - tmp = RREG32(RADEON_AIC_LO_ADDR); > - seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); > - tmp = RREG32(RADEON_AIC_HI_ADDR); > - seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); > - tmp = RREG32(0x01E4); > - seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); > - return 0; > + for (i = 0; i < rdev->usec_timeout; i++) { > + /* read MC_STATUS */ > + tmp = RREG32(RADEON_MC_STATUS); > + if (tmp & RADEON_MC_IDLE) { > + return 0; > + } > + DRM_UDELAY(1); > + } > + return -1; > } > > -static struct drm_info_list r100_debugfs_rbbm_list[] = { > - {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, > -}; > - > -static struct drm_info_list r100_debugfs_cp_list[] = { > - {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, > - {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, > -}; > - > -static struct drm_info_list r100_debugfs_mc_info_list[] = { > - {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, > -}; > -#endif > - > -int r100_debugfs_rbbm_init(struct radeon_device *rdev) > +bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) > { > -#if defined(CONFIG_DEBUG_FS) > - return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); > -#else > - return 0; > -#endif > -} > + u32 rbbm_status; > > -int r100_debugfs_cp_init(struct radeon_device *rdev) > -{ > -#if defined(CONFIG_DEBUG_FS) > - return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); > -#else > - return 0; > -#endif > + rbbm_status = RREG32(R_000E40_RBBM_STATUS); > + if (!G_000E40_GUI_ACTIVE(rbbm_status)) { > + radeon_ring_lockup_update(ring); > + return false; > + } > + /* force CP activities */ > + radeon_ring_force_activity(rdev, ring); > + return radeon_ring_test_lockup(rdev, ring); > } > > -int r100_debugfs_mc_info_init(struct radeon_device *rdev) > +void r100_bm_disable(struct radeon_device *rdev) > { > -#if defined(CONFIG_DEBUG_FS) > - return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); > -#else > - return 0; > -#endif > + u32 tmp; > + > + /* disable bus mastering */ > + tmp = RREG32(R_000030_BUS_CNTL); > + WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044); > + mdelay(1); > + WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042); > + mdelay(1); > + WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); > + tmp = RREG32(RADEON_BUS_CNTL); > + mdelay(1); > + pci_clear_master(rdev->pdev); > + mdelay(1); > } > > -int r100_set_surface_reg(struct radeon_device *rdev, int reg, > - uint32_t tiling_flags, uint32_t pitch, > - uint32_t offset, uint32_t obj_size) > +int r100_asic_reset(struct radeon_device *rdev) > { > - int surf_index = reg * 16; > - int flags = 0; > + struct r100_mc_save save; > + u32 status, tmp; > + int ret = 0; > > - if (rdev->family <= CHIP_RS200) { > - if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) > - == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) > - flags |= RADEON_SURF_TILE_COLOR_BOTH; > - if (tiling_flags & RADEON_TILING_MACRO) > - flags |= RADEON_SURF_TILE_COLOR_MACRO; > - } else if (rdev->family <= CHIP_RV280) { > - if (tiling_flags & (RADEON_TILING_MACRO)) > - flags |= R200_SURF_TILE_COLOR_MACRO; > - if (tiling_flags & RADEON_TILING_MICRO) > - flags |= R200_SURF_TILE_COLOR_MICRO; > - } else { > - if (tiling_flags & RADEON_TILING_MACRO) > - flags |= R300_SURF_TILE_MACRO; > - if (tiling_flags & RADEON_TILING_MICRO) > - flags |= R300_SURF_TILE_MICRO; > + status = RREG32(R_000E40_RBBM_STATUS); > + if (!G_000E40_GUI_ACTIVE(status)) { > + return 0; > } > + r100_mc_stop(rdev, &save); > + status = RREG32(R_000E40_RBBM_STATUS); > + dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); > + /* stop CP */ > + WREG32(RADEON_CP_CSQ_CNTL, 0); > + tmp = RREG32(RADEON_CP_RB_CNTL); > + WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); > + WREG32(RADEON_CP_RB_RPTR_WR, 0); > + WREG32(RADEON_CP_RB_WPTR, 0); > + WREG32(RADEON_CP_RB_CNTL, tmp); > + /* save PCI state */ > + pci_save_state(rdev->pdev); > + /* disable bus mastering */ > + r100_bm_disable(rdev); > + WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) | > + S_0000F0_SOFT_RESET_RE(1) | > + S_0000F0_SOFT_RESET_PP(1) | > + S_0000F0_SOFT_RESET_RB(1)); > + RREG32(R_0000F0_RBBM_SOFT_RESET); > + mdelay(500); > + WREG32(R_0000F0_RBBM_SOFT_RESET, 0); > + mdelay(1); > + status = RREG32(R_000E40_RBBM_STATUS); > + dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); > + /* reset CP */ > + WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1)); > + RREG32(R_0000F0_RBBM_SOFT_RESET); > + mdelay(500); > + WREG32(R_0000F0_RBBM_SOFT_RESET, 0); > + mdelay(1); > + status = RREG32(R_000E40_RBBM_STATUS); > + dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); > + /* restore PCI & busmastering */ > + pci_restore_state(rdev->pdev); > + r100_enable_bm(rdev); > + /* Check if GPU is idle */ > + if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) || > + G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { > + dev_err(rdev->dev, "failed to reset GPU\n"); > + ret = -1; > + } else > + dev_info(rdev->dev, "GPU reset succeed\n"); > + r100_mc_resume(rdev, &save); > + return ret; > +} > > - if (tiling_flags & RADEON_TILING_SWAP_16BIT) > - flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; > - if (tiling_flags & RADEON_TILING_SWAP_32BIT) > - flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; > - > - /* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */ > - if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) { > - if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) > - if (ASIC_IS_RN50(rdev)) > - pitch /= 16; > - } > +void r100_set_common_regs(struct radeon_device *rdev) > +{ > + struct drm_device *dev = rdev->ddev; > + bool force_dac2 = false; > + u32 tmp; > > - /* r100/r200 divide by 16 */ > - if (rdev->family < CHIP_R300) > - flags |= pitch / 16; > - else > - flags |= pitch / 8; > + /* set these so they don't interfere with anything */ > + WREG32(RADEON_OV0_SCALE_CNTL, 0); > + WREG32(RADEON_SUBPIC_CNTL, 0); > + WREG32(RADEON_VIPH_CONTROL, 0); > + WREG32(RADEON_I2C_CNTL_1, 0); > + WREG32(RADEON_DVI_I2C_CNTL_1, 0); > + WREG32(RADEON_CAP0_TRIG_CNTL, 0); > + WREG32(RADEON_CAP1_TRIG_CNTL, 0); > > + /* always set up dac2 on rn50 and some rv100 as lots > + * of servers seem to wire it up to a VGA port but > + * don't report it in the bios connector > + * table. > + */ > + switch (dev->pdev->device) { > + /* RN50 */ > + case 0x515e: > + case 0x5969: > + force_dac2 = true; > + break; > + /* RV100*/ > + case 0x5159: > + case 0x515a: > + /* DELL triple head servers */ > + if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) && > + ((dev->pdev->subsystem_device == 0x016c) || > + (dev->pdev->subsystem_device == 0x016d) || > + (dev->pdev->subsystem_device == 0x016e) || > + (dev->pdev->subsystem_device == 0x016f) || > + (dev->pdev->subsystem_device == 0x0170) || > + (dev->pdev->subsystem_device == 0x017d) || > + (dev->pdev->subsystem_device == 0x017e) || > + (dev->pdev->subsystem_device == 0x0183) || > + (dev->pdev->subsystem_device == 0x018a) || > + (dev->pdev->subsystem_device == 0x019a))) > + force_dac2 = true; > + break; > + } > > - DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); > - WREG32(RADEON_SURFACE0_INFO + surf_index, flags); > - WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); > - WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); > - return 0; > -} > + if (force_dac2) { > + u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG); > + u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL); > + u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2); > > -void r100_clear_surface_reg(struct radeon_device *rdev, int reg) > -{ > - int surf_index = reg * 16; > - WREG32(RADEON_SURFACE0_INFO + surf_index, 0); > -} > + /* For CRT on DAC2, don't turn it on if BIOS didn't > + enable it, even it's detected. > + */ > > -void r100_bandwidth_update(struct radeon_device *rdev) > -{ > - fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; > - fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; > - fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; > - uint32_t temp, data, mem_trcd, mem_trp, mem_tras; > - fixed20_12 memtcas_ff[8] = { > - dfixed_init(1), > - dfixed_init(2), > - dfixed_init(3), > - dfixed_init(0), > - dfixed_init_half(1), > - dfixed_init_half(2), > - dfixed_init(0), > - }; > - fixed20_12 memtcas_rs480_ff[8] = { > - dfixed_init(0), > - dfixed_init(1), > - dfixed_init(2), > - dfixed_init(3), > - dfixed_init(0), > - dfixed_init_half(1), > - dfixed_init_half(2), > - dfixed_init_half(3), > - }; > - fixed20_12 memtcas2_ff[8] = { > - dfixed_init(0), > - dfixed_init(1), > - dfixed_init(2), > - dfixed_init(3), > - dfixed_init(4), > - dfixed_init(5), > - dfixed_init(6), > - dfixed_init(7), > - }; > - fixed20_12 memtrbs[8] = { > - dfixed_init(1), > - dfixed_init_half(1), > - dfixed_init(2), > - dfixed_init_half(2), > - dfixed_init(3), > - dfixed_init_half(3), > - dfixed_init(4), > - dfixed_init_half(4) > - }; > - fixed20_12 memtrbs_r4xx[8] = { > - dfixed_init(4), > - dfixed_init(5), > - dfixed_init(6), > - dfixed_init(7), > - dfixed_init(8), > - dfixed_init(9), > - dfixed_init(10), > - dfixed_init(11) > - }; > - fixed20_12 min_mem_eff; > - fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; > - fixed20_12 cur_latency_mclk, cur_latency_sclk; > - fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, > - disp_drain_rate2, read_return_rate; > - fixed20_12 time_disp1_drop_priority; > - int c; > - int cur_size = 16; /* in octawords */ > - int critical_point = 0, critical_point2; > -/* uint32_t read_return_rate, time_disp1_drop_priority; */ > - int stop_req, max_stop_req; > - struct drm_display_mode *mode1 = NULL; > - struct drm_display_mode *mode2 = NULL; > - uint32_t pixel_bytes1 = 0; > - uint32_t pixel_bytes2 = 0; > + /* force it to crtc0 */ > + dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL; > + dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL; > + disp_hw_debug |= RADEON_CRT2_DISP1_SEL; > > - radeon_update_display_priority(rdev); > + /* set up the TV DAC */ > + tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL | > + RADEON_TV_DAC_STD_MASK | > + RADEON_TV_DAC_RDACPD | > + RADEON_TV_DAC_GDACPD | > + RADEON_TV_DAC_BDACPD | > + RADEON_TV_DAC_BGADJ_MASK | > + RADEON_TV_DAC_DACADJ_MASK); > + tv_dac_cntl |= (RADEON_TV_DAC_NBLANK | > + RADEON_TV_DAC_NHOLD | > + RADEON_TV_DAC_STD_PS2 | > + (0x58 << 16)); > > - if (rdev->mode_info.crtcs[0]->base.enabled) { > - mode1 = &rdev->mode_info.crtcs[0]->base.mode; > - pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; > + WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl); > + WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug); > + WREG32(RADEON_DAC_CNTL2, dac2_cntl); > } > - if (!(rdev->flags & RADEON_SINGLE_CRTC)) { > - if (rdev->mode_info.crtcs[1]->base.enabled) { > - mode2 = &rdev->mode_info.crtcs[1]->base.mode; > - pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; > + > + /* switch PM block to ACPI mode */ > + tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL); > + tmp &= ~RADEON_PM_MODE_SEL; > + WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp); > + > +} > + > +/* > + * VRAM info > + */ > +static void r100_vram_get_type(struct radeon_device *rdev) > +{ > + uint32_t tmp; > + > + rdev->mc.vram_is_ddr = false; > + if (rdev->flags & RADEON_IS_IGP) > + rdev->mc.vram_is_ddr = true; > + else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR) > + rdev->mc.vram_is_ddr = true; > + if ((rdev->family == CHIP_RV100) || > + (rdev->family == CHIP_RS100) || > + (rdev->family == CHIP_RS200)) { > + tmp = RREG32(RADEON_MEM_CNTL); > + if (tmp & RV100_HALF_MODE) { > + rdev->mc.vram_width = 32; > + } else { > + rdev->mc.vram_width = 64; > + } > + if (rdev->flags & RADEON_SINGLE_CRTC) { > + rdev->mc.vram_width /= 4; > + rdev->mc.vram_is_ddr = true; > } > + } else if (rdev->family <= CHIP_RV280) { > + tmp = RREG32(RADEON_MEM_CNTL); > + if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) { > + rdev->mc.vram_width = 128; > + } else { > + rdev->mc.vram_width = 64; > + } > + } else { > + /* newer IGPs */ > + rdev->mc.vram_width = 128; > } > +} > > - min_mem_eff.full = dfixed_const_8(0); > - /* get modes */ > - if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { > - uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); > - mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); > - mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); > - /* check crtc enables */ > - if (mode2) > - mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); > - if (mode1) > - mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); > - WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); > +static u32 r100_get_accessible_vram(struct radeon_device *rdev) > +{ > + u32 aper_size; > + u8 byte; > + > + aper_size = RREG32(RADEON_CONFIG_APER_SIZE); > + > + /* Set HDP_APER_CNTL only on cards that are known not to be broken, > + * that is has the 2nd generation multifunction PCI interface > + */ > + if (rdev->family == CHIP_RV280 || > + rdev->family >= CHIP_RV350) { > + WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL, > + ~RADEON_HDP_APER_CNTL); > + DRM_INFO("Generation 2 PCI interface, using max accessible memory\n"); > + return aper_size * 2; > } > > - /* > - * determine is there is enough bw for current mode > + /* Older cards have all sorts of funny issues to deal with. First > + * check if it's a multifunction card by reading the PCI config > + * header type... Limit those to one aperture size > */ > - sclk_ff = rdev->pm.sclk; > - mclk_ff = rdev->pm.mclk; > + pci_read_config_byte(rdev->pdev, 0xe, &byte); > + if (byte & 0x80) { > + DRM_INFO("Generation 1 PCI interface in multifunction mode\n"); > + DRM_INFO("Limiting VRAM to one aperture\n"); > + return aper_size; > + } > > - temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); > - temp_ff.full = dfixed_const(temp); > - mem_bw.full = dfixed_mul(mclk_ff, temp_ff); > + /* Single function older card. We read HDP_APER_CNTL to see how the BIOS > + * have set it up. We don't write this as it's broken on some ASICs but > + * we expect the BIOS to have done the right thing (might be too optimistic...) > + */ > + if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL) > + return aper_size * 2; > + return aper_size; > +} > > - pix_clk.full = 0; > - pix_clk2.full = 0; > - peak_disp_bw.full = 0; > - if (mode1) { > - temp_ff.full = dfixed_const(1000); > - pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */ > - pix_clk.full = dfixed_div(pix_clk, temp_ff); > - temp_ff.full = dfixed_const(pixel_bytes1); > - peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff); > - } > - if (mode2) { > - temp_ff.full = dfixed_const(1000); > - pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */ > - pix_clk2.full = dfixed_div(pix_clk2, temp_ff); > - temp_ff.full = dfixed_const(pixel_bytes2); > - peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff); > - } > +void r100_vram_init_sizes(struct radeon_device *rdev) > +{ > + u64 config_aper_size; > > - mem_bw.full = dfixed_mul(mem_bw, min_mem_eff); > - if (peak_disp_bw.full >= mem_bw.full) { > - DRM_ERROR("You may not have enough display bandwidth for current mode\n" > - "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); > + /* work out accessible VRAM */ > + rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); > + rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); > + rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev); > + /* FIXME we don't use the second aperture yet when we could use it */ > + if (rdev->mc.visible_vram_size > rdev->mc.aper_size) > + rdev->mc.visible_vram_size = rdev->mc.aper_size; > + config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); > + if (rdev->flags & RADEON_IS_IGP) { > + uint32_t tom; > + /* read NB_TOM to get the amount of ram stolen for the GPU */ > + tom = RREG32(RADEON_NB_TOM); > + rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16); > + WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); > + rdev->mc.mc_vram_size = rdev->mc.real_vram_size; > + } else { > + rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); > + /* Some production boards of m6 will report 0 > + * if it's 8 MB > + */ > + if (rdev->mc.real_vram_size == 0) { > + rdev->mc.real_vram_size = 8192 * 1024; > + WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size); > + } > + /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - > + * Novell bug 204882 + along with lots of ubuntu ones > + */ > + if (rdev->mc.aper_size > config_aper_size) > + config_aper_size = rdev->mc.aper_size; > + > + if (config_aper_size > rdev->mc.real_vram_size) > + rdev->mc.mc_vram_size = config_aper_size; > + else > + rdev->mc.mc_vram_size = rdev->mc.real_vram_size; > } > +} > > - /* Get values from the EXT_MEM_CNTL register...converting its contents. */ > - temp = RREG32(RADEON_MEM_TIMING_CNTL); > - if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ > - mem_trcd = ((temp >> 2) & 0x3) + 1; > - mem_trp = ((temp & 0x3)) + 1; > - mem_tras = ((temp & 0x70) >> 4) + 1; > - } else if (rdev->family == CHIP_R300 || > - rdev->family == CHIP_R350) { /* r300, r350 */ > - mem_trcd = (temp & 0x7) + 1; > - mem_trp = ((temp >> 8) & 0x7) + 1; > - mem_tras = ((temp >> 11) & 0xf) + 4; > - } else if (rdev->family == CHIP_RV350 || > - rdev->family <= CHIP_RV380) { > - /* rv3x0 */ > - mem_trcd = (temp & 0x7) + 3; > - mem_trp = ((temp >> 8) & 0x7) + 3; > - mem_tras = ((temp >> 11) & 0xf) + 6; > - } else if (rdev->family == CHIP_R420 || > - rdev->family == CHIP_R423 || > - rdev->family == CHIP_RV410) { > - /* r4xx */ > - mem_trcd = (temp & 0xf) + 3; > - if (mem_trcd > 15) > - mem_trcd = 15; > - mem_trp = ((temp >> 8) & 0xf) + 3; > - if (mem_trp > 15) > - mem_trp = 15; > - mem_tras = ((temp >> 12) & 0x1f) + 6; > - if (mem_tras > 31) > - mem_tras = 31; > - } else { /* RV200, R200 */ > - mem_trcd = (temp & 0x7) + 1; > - mem_trp = ((temp >> 8) & 0x7) + 1; > - mem_tras = ((temp >> 12) & 0xf) + 4; > +void r100_vga_set_state(struct radeon_device *rdev, bool state) > +{ > + uint32_t temp; > + > + temp = RREG32(RADEON_CONFIG_CNTL); > + if (state == false) { > + temp &= ~RADEON_CFG_VGA_RAM_EN; > + temp |= RADEON_CFG_VGA_IO_DIS; > + } else { > + temp &= ~RADEON_CFG_VGA_IO_DIS; > } > - /* convert to FF */ > - trcd_ff.full = dfixed_const(mem_trcd); > - trp_ff.full = dfixed_const(mem_trp); > - tras_ff.full = dfixed_const(mem_tras); > + WREG32(RADEON_CONFIG_CNTL, temp); > +} > > - /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ > - temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); > - data = (temp & (7 << 20)) >> 20; > - if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { > - if (rdev->family == CHIP_RS480) /* don't think rs400 */ > - tcas_ff = memtcas_rs480_ff[data]; > - else > - tcas_ff = memtcas_ff[data]; > - } else > - tcas_ff = memtcas2_ff[data]; > +void r100_mc_init(struct radeon_device *rdev) > +{ > + u64 base; > > - if (rdev->family == CHIP_RS400 || > - rdev->family == CHIP_RS480) { > - /* extra cas latency stored in bits 23-25 0-4 clocks */ > - data = (temp >> 23) & 0x7; > - if (data < 5) > - tcas_ff.full += dfixed_const(data); > + r100_vram_get_type(rdev); > + r100_vram_init_sizes(rdev); > + base = rdev->mc.aper_base; > + if (rdev->flags & RADEON_IS_IGP) > + base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16; > + radeon_vram_location(rdev, &rdev->mc, base); > + rdev->mc.gtt_base_align = 0; > + if (!(rdev->flags & RADEON_IS_AGP)) > + radeon_gtt_location(rdev, &rdev->mc); > + radeon_update_bandwidth_info(rdev); > +} > + > + > +/* > + * Indirect registers accessor > + */ > +void r100_pll_errata_after_index(struct radeon_device *rdev) > +{ > + if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) { > + (void)RREG32(RADEON_CLOCK_CNTL_DATA); > + (void)RREG32(RADEON_CRTC_GEN_CNTL); > } > +} > > - if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { > - /* on the R300, Tcas is included in Trbs. > - */ > - temp = RREG32(RADEON_MEM_CNTL); > - data = (R300_MEM_NUM_CHANNELS_MASK & temp); > - if (data == 1) { > - if (R300_MEM_USE_CD_CH_ONLY & temp) { > - temp = RREG32(R300_MC_IND_INDEX); > - temp &= ~R300_MC_IND_ADDR_MASK; > - temp |= R300_MC_READ_CNTL_CD_mcind; > - WREG32(R300_MC_IND_INDEX, temp); > - temp = RREG32(R300_MC_IND_DATA); > - data = (R300_MEM_RBS_POSITION_C_MASK & temp); > - } else { > - temp = RREG32(R300_MC_READ_CNTL_AB); > - data = (R300_MEM_RBS_POSITION_A_MASK & temp); > - } > - } else { > - temp = RREG32(R300_MC_READ_CNTL_AB); > - data = (R300_MEM_RBS_POSITION_A_MASK & temp); > - } > - if (rdev->family == CHIP_RV410 || > - rdev->family == CHIP_R420 || > - rdev->family == CHIP_R423) > - trbs_ff = memtrbs_r4xx[data]; > - else > - trbs_ff = memtrbs[data]; > - tcas_ff.full += trbs_ff.full; > +static void r100_pll_errata_after_data(struct radeon_device *rdev) > +{ > + /* This workarounds is necessary on RV100, RS100 and RS200 chips > + * or the chip could hang on a subsequent access > + */ > + if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) { > + mdelay(5); > } > > - sclk_eff_ff.full = sclk_ff.full; > + /* This function is required to workaround a hardware bug in some (all?) > + * revisions of the R300. This workaround should be called after every > + * CLOCK_CNTL_INDEX register access. If not, register reads afterward > + * may not be correct. > + */ > + if (rdev->pll_errata & CHIP_ERRATA_R300_CG) { > + uint32_t save, tmp; > > - if (rdev->flags & RADEON_IS_AGP) { > - fixed20_12 agpmode_ff; > - agpmode_ff.full = dfixed_const(radeon_agpmode); > - temp_ff.full = dfixed_const_666(16); > - sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff); > + save = RREG32(RADEON_CLOCK_CNTL_INDEX); > + tmp = save & ~(0x3f | RADEON_PLL_WR_EN); > + WREG32(RADEON_CLOCK_CNTL_INDEX, tmp); > + tmp = RREG32(RADEON_CLOCK_CNTL_DATA); > + WREG32(RADEON_CLOCK_CNTL_INDEX, save); > } > - /* TODO PCIE lanes may affect this - agpmode == 16?? */ > +} > > - if (ASIC_IS_R300(rdev)) { > - sclk_delay_ff.full = dfixed_const(250); > - } else { > - if ((rdev->family == CHIP_RV100) || > - rdev->flags & RADEON_IS_IGP) { > - if (rdev->mc.vram_is_ddr) > - sclk_delay_ff.full = dfixed_const(41); > - else > - sclk_delay_ff.full = dfixed_const(33); > - } else { > - if (rdev->mc.vram_width == 128) > - sclk_delay_ff.full = dfixed_const(57); > - else > - sclk_delay_ff.full = dfixed_const(41); > - } > - } > +uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg) > +{ > + uint32_t data; > > - mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff); > + WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f); > + r100_pll_errata_after_index(rdev); > + data = RREG32(RADEON_CLOCK_CNTL_DATA); > + r100_pll_errata_after_data(rdev); > + return data; > +} > > - if (rdev->mc.vram_is_ddr) { > - if (rdev->mc.vram_width == 32) { > - k1.full = dfixed_const(40); > - c = 3; > - } else { > - k1.full = dfixed_const(20); > - c = 1; > - } > +void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) > +{ > + WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN)); > + r100_pll_errata_after_index(rdev); > + WREG32(RADEON_CLOCK_CNTL_DATA, v); > + r100_pll_errata_after_data(rdev); > +} > + > +void r100_set_safe_registers(struct radeon_device *rdev) > +{ > + if (ASIC_IS_RN50(rdev)) { > + rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm; > + rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm); > + } else if (rdev->family < CHIP_R200) { > + rdev->config.r100.reg_safe_bm = r100_reg_safe_bm; > + rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm); > } else { > - k1.full = dfixed_const(40); > - c = 3; > + r200_set_safe_registers(rdev); > } > +} > > - temp_ff.full = dfixed_const(2); > - mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff); > - temp_ff.full = dfixed_const(c); > - mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff); > - temp_ff.full = dfixed_const(4); > - mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff); > - mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff); > - mc_latency_mclk.full += k1.full; > +/* > + * Debugfs info > + */ > +#if defined(CONFIG_DEBUG_FS) > +static int r100_debugfs_rbbm_info(struct seq_file *m, void *data) > +{ > + struct drm_info_node *node = (struct drm_info_node *) m->private; > + struct drm_device *dev = node->minor->dev; > + struct radeon_device *rdev = dev->dev_private; > + uint32_t reg, value; > + unsigned i; > > - mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff); > - mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff); > + seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS)); > + seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C)); > + seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); > + for (i = 0; i < 64; i++) { > + WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100); > + reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2; > + WREG32(RADEON_RBBM_CMDFIFO_ADDR, i); > + value = RREG32(RADEON_RBBM_CMDFIFO_DATA); > + seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value); > + } > + return 0; > +} > > - /* > - HW cursor time assuming worst case of full size colour cursor. > - */ > - temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); > - temp_ff.full += trcd_ff.full; > - if (temp_ff.full < tras_ff.full) > - temp_ff.full = tras_ff.full; > - cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff); > +static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data) > +{ > + struct drm_info_node *node = (struct drm_info_node *) m->private; > + struct drm_device *dev = node->minor->dev; > + struct radeon_device *rdev = dev->dev_private; > + struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > + uint32_t rdp, wdp; > + unsigned count, i, j; > > - temp_ff.full = dfixed_const(cur_size); > - cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff); > - /* > - Find the total latency for the display data. > - */ > - disp_latency_overhead.full = dfixed_const(8); > - disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff); > - mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; > - mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; > + radeon_ring_free_size(rdev, ring); > + rdp = RREG32(RADEON_CP_RB_RPTR); > + wdp = RREG32(RADEON_CP_RB_WPTR); > + count = (rdp + ring->ring_size - wdp) & ring->ptr_mask; > + seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); > + seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp); > + seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp); > + seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); > + seq_printf(m, "%u dwords in ring\n", count); > + for (j = 0; j <= count; j++) { > + i = (rdp + j) & ring->ptr_mask; > + seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]); > + } > + return 0; > +} > > - if (mc_latency_mclk.full > mc_latency_sclk.full) > - disp_latency.full = mc_latency_mclk.full; > - else > - disp_latency.full = mc_latency_sclk.full; > > - /* setup Max GRPH_STOP_REQ default value */ > - if (ASIC_IS_RV100(rdev)) > - max_stop_req = 0x5c; > - else > - max_stop_req = 0x7c; > +static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data) > +{ > + struct drm_info_node *node = (struct drm_info_node *) m->private; > + struct drm_device *dev = node->minor->dev; > + struct radeon_device *rdev = dev->dev_private; > + uint32_t csq_stat, csq2_stat, tmp; > + unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr; > + unsigned i; > > - if (mode1) { > - /* CRTC1 > - Set GRPH_BUFFER_CNTL register using h/w defined optimal values. > - GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] > - */ > - stop_req = mode1->hdisplay * pixel_bytes1 / 16; > + seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT)); > + seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE)); > + csq_stat = RREG32(RADEON_CP_CSQ_STAT); > + csq2_stat = RREG32(RADEON_CP_CSQ2_STAT); > + r_rptr = (csq_stat >> 0) & 0x3ff; > + r_wptr = (csq_stat >> 10) & 0x3ff; > + ib1_rptr = (csq_stat >> 20) & 0x3ff; > + ib1_wptr = (csq2_stat >> 0) & 0x3ff; > + ib2_rptr = (csq2_stat >> 10) & 0x3ff; > + ib2_wptr = (csq2_stat >> 20) & 0x3ff; > + seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat); > + seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat); > + seq_printf(m, "Ring rptr %u\n", r_rptr); > + seq_printf(m, "Ring wptr %u\n", r_wptr); > + seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr); > + seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr); > + seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr); > + seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr); > + /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms > + * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */ > + seq_printf(m, "Ring fifo:\n"); > + for (i = 0; i < 256; i++) { > + WREG32(RADEON_CP_CSQ_ADDR, i << 2); > + tmp = RREG32(RADEON_CP_CSQ_DATA); > + seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp); > + } > + seq_printf(m, "Indirect1 fifo:\n"); > + for (i = 256; i <= 512; i++) { > + WREG32(RADEON_CP_CSQ_ADDR, i << 2); > + tmp = RREG32(RADEON_CP_CSQ_DATA); > + seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp); > + } > + seq_printf(m, "Indirect2 fifo:\n"); > + for (i = 640; i < ib1_wptr; i++) { > + WREG32(RADEON_CP_CSQ_ADDR, i << 2); > + tmp = RREG32(RADEON_CP_CSQ_DATA); > + seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp); > + } > + return 0; > +} > + > +static int r100_debugfs_mc_info(struct seq_file *m, void *data) > +{ > + struct drm_info_node *node = (struct drm_info_node *) m->private; > + struct drm_device *dev = node->minor->dev; > + struct radeon_device *rdev = dev->dev_private; > + uint32_t tmp; > > - if (stop_req > max_stop_req) > - stop_req = max_stop_req; > + tmp = RREG32(RADEON_CONFIG_MEMSIZE); > + seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp); > + tmp = RREG32(RADEON_MC_FB_LOCATION); > + seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp); > + tmp = RREG32(RADEON_BUS_CNTL); > + seq_printf(m, "BUS_CNTL 0x%08x\n", tmp); > + tmp = RREG32(RADEON_MC_AGP_LOCATION); > + seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp); > + tmp = RREG32(RADEON_AGP_BASE); > + seq_printf(m, "AGP_BASE 0x%08x\n", tmp); > + tmp = RREG32(RADEON_HOST_PATH_CNTL); > + seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp); > + tmp = RREG32(0x01D0); > + seq_printf(m, "AIC_CTRL 0x%08x\n", tmp); > + tmp = RREG32(RADEON_AIC_LO_ADDR); > + seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp); > + tmp = RREG32(RADEON_AIC_HI_ADDR); > + seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp); > + tmp = RREG32(0x01E4); > + seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp); > + return 0; > +} > > - /* > - Find the drain rate of the display buffer. > - */ > - temp_ff.full = dfixed_const((16/pixel_bytes1)); > - disp_drain_rate.full = dfixed_div(pix_clk, temp_ff); > +static struct drm_info_list r100_debugfs_rbbm_list[] = { > + {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL}, > +}; > > - /* > - Find the critical point of the display buffer. > - */ > - crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency); > - crit_point_ff.full += dfixed_const_half(0); > +static struct drm_info_list r100_debugfs_cp_list[] = { > + {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL}, > + {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL}, > +}; > > - critical_point = dfixed_trunc(crit_point_ff); > +static struct drm_info_list r100_debugfs_mc_info_list[] = { > + {"r100_mc_info", r100_debugfs_mc_info, 0, NULL}, > +}; > +#endif > > - if (rdev->disp_priority == 2) { > - critical_point = 0; > - } > +int r100_debugfs_rbbm_init(struct radeon_device *rdev) > +{ > +#if defined(CONFIG_DEBUG_FS) > + return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1); > +#else > + return 0; > +#endif > +} > > - /* > - The critical point should never be above max_stop_req-4. Setting > - GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. > - */ > - if (max_stop_req - critical_point < 4) > - critical_point = 0; > +int r100_debugfs_cp_init(struct radeon_device *rdev) > +{ > +#if defined(CONFIG_DEBUG_FS) > + return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2); > +#else > + return 0; > +#endif > +} > > - if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { > - /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ > - critical_point = 0x10; > - } > +int r100_debugfs_mc_info_init(struct radeon_device *rdev) > +{ > +#if defined(CONFIG_DEBUG_FS) > + return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1); > +#else > + return 0; > +#endif > +} > > - temp = RREG32(RADEON_GRPH_BUFFER_CNTL); > - temp &= ~(RADEON_GRPH_STOP_REQ_MASK); > - temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); > - temp &= ~(RADEON_GRPH_START_REQ_MASK); > - if ((rdev->family == CHIP_R350) && > - (stop_req > 0x15)) { > - stop_req -= 0x10; > - } > - temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); > - temp |= RADEON_GRPH_BUFFER_SIZE; > - temp &= ~(RADEON_GRPH_CRITICAL_CNTL | > - RADEON_GRPH_CRITICAL_AT_SOF | > - RADEON_GRPH_STOP_CNTL); > - /* > - Write the result into the register. > - */ > - WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | > - (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); > +int r100_set_surface_reg(struct radeon_device *rdev, int reg, > + uint32_t tiling_flags, uint32_t pitch, > + uint32_t offset, uint32_t obj_size) > +{ > + int surf_index = reg * 16; > + int flags = 0; > > -#if 0 > - if ((rdev->family == CHIP_RS400) || > - (rdev->family == CHIP_RS480)) { > - /* attempt to program RS400 disp regs correctly ??? */ > - temp = RREG32(RS400_DISP1_REG_CNTL); > - temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | > - RS400_DISP1_STOP_REQ_LEVEL_MASK); > - WREG32(RS400_DISP1_REQ_CNTL1, (temp | > - (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | > - (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); > - temp = RREG32(RS400_DMIF_MEM_CNTL1); > - temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | > - RS400_DISP1_CRITICAL_POINT_STOP_MASK); > - WREG32(RS400_DMIF_MEM_CNTL1, (temp | > - (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | > - (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); > - } > -#endif > + if (rdev->family <= CHIP_RS200) { > + if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) > + == (RADEON_TILING_MACRO|RADEON_TILING_MICRO)) > + flags |= RADEON_SURF_TILE_COLOR_BOTH; > + if (tiling_flags & RADEON_TILING_MACRO) > + flags |= RADEON_SURF_TILE_COLOR_MACRO; > + } else if (rdev->family <= CHIP_RV280) { > + if (tiling_flags & (RADEON_TILING_MACRO)) > + flags |= R200_SURF_TILE_COLOR_MACRO; > + if (tiling_flags & RADEON_TILING_MICRO) > + flags |= R200_SURF_TILE_COLOR_MICRO; > + } else { > + if (tiling_flags & RADEON_TILING_MACRO) > + flags |= R300_SURF_TILE_MACRO; > + if (tiling_flags & RADEON_TILING_MICRO) > + flags |= R300_SURF_TILE_MICRO; > + } > > - DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n", > - /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ > - (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); > + if (tiling_flags & RADEON_TILING_SWAP_16BIT) > + flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; > + if (tiling_flags & RADEON_TILING_SWAP_32BIT) > + flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; > + > + /* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */ > + if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) { > + if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) > + if (ASIC_IS_RN50(rdev)) > + pitch /= 16; > } > > - if (mode2) { > - u32 grph2_cntl; > - stop_req = mode2->hdisplay * pixel_bytes2 / 16; > + /* r100/r200 divide by 16 */ > + if (rdev->family < CHIP_R300) > + flags |= pitch / 16; > + else > + flags |= pitch / 8; > > - if (stop_req > max_stop_req) > - stop_req = max_stop_req; > > - /* > - Find the drain rate of the display buffer. > - */ > - temp_ff.full = dfixed_const((16/pixel_bytes2)); > - disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff); > + DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1); > + WREG32(RADEON_SURFACE0_INFO + surf_index, flags); > + WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset); > + WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1); > + return 0; > +} > > - grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); > - grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); > - grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); > - grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); > - if ((rdev->family == CHIP_R350) && > - (stop_req > 0x15)) { > - stop_req -= 0x10; > - } > - grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); > - grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; > - grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | > - RADEON_GRPH_CRITICAL_AT_SOF | > - RADEON_GRPH_STOP_CNTL); > +void r100_clear_surface_reg(struct radeon_device *rdev, int reg) > +{ > + int surf_index = reg * 16; > + WREG32(RADEON_SURFACE0_INFO + surf_index, 0); > +} > > - if ((rdev->family == CHIP_RS100) || > - (rdev->family == CHIP_RS200)) > - critical_point2 = 0; > - else { > - temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; > - temp_ff.full = dfixed_const(temp); > - temp_ff.full = dfixed_mul(mclk_ff, temp_ff); > - if (sclk_ff.full < temp_ff.full) > - temp_ff.full = sclk_ff.full; > +void r100_bandwidth_update(struct radeon_device *rdev) > +{ > + fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff; > + fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff; > + fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff; > + uint32_t temp, data, mem_trcd, mem_trp, mem_tras; > + fixed20_12 memtcas_ff[8] = { > + dfixed_init(1), > + dfixed_init(2), > + dfixed_init(3), > + dfixed_init(0), > + dfixed_init_half(1), > + dfixed_init_half(2), > + dfixed_init(0), > + }; > + fixed20_12 memtcas_rs480_ff[8] = { > + dfixed_init(0), > + dfixed_init(1), > + dfixed_init(2), > + dfixed_init(3), > + dfixed_init(0), > + dfixed_init_half(1), > + dfixed_init_half(2), > + dfixed_init_half(3), > + }; > + fixed20_12 memtcas2_ff[8] = { > + dfixed_init(0), > + dfixed_init(1), > + dfixed_init(2), > + dfixed_init(3), > + dfixed_init(4), > + dfixed_init(5), > + dfixed_init(6), > + dfixed_init(7), > + }; > + fixed20_12 memtrbs[8] = { > + dfixed_init(1), > + dfixed_init_half(1), > + dfixed_init(2), > + dfixed_init_half(2), > + dfixed_init(3), > + dfixed_init_half(3), > + dfixed_init(4), > + dfixed_init_half(4) > + }; > + fixed20_12 memtrbs_r4xx[8] = { > + dfixed_init(4), > + dfixed_init(5), > + dfixed_init(6), > + dfixed_init(7), > + dfixed_init(8), > + dfixed_init(9), > + dfixed_init(10), > + dfixed_init(11) > + }; > + fixed20_12 min_mem_eff; > + fixed20_12 mc_latency_sclk, mc_latency_mclk, k1; > + fixed20_12 cur_latency_mclk, cur_latency_sclk; > + fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate, > + disp_drain_rate2, read_return_rate; > + fixed20_12 time_disp1_drop_priority; > + int c; > + int cur_size = 16; /* in octawords */ > + int critical_point = 0, critical_point2; > +/* uint32_t read_return_rate, time_disp1_drop_priority; */ > + int stop_req, max_stop_req; > + struct drm_display_mode *mode1 = NULL; > + struct drm_display_mode *mode2 = NULL; > + uint32_t pixel_bytes1 = 0; > + uint32_t pixel_bytes2 = 0; > > - read_return_rate.full = temp_ff.full; > + radeon_update_display_priority(rdev); > > - if (mode1) { > - temp_ff.full = read_return_rate.full - disp_drain_rate.full; > - time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff); > - } else { > - time_disp1_drop_priority.full = 0; > - } > - crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; > - crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2); > - crit_point_ff.full += dfixed_const_half(0); > + if (rdev->mode_info.crtcs[0]->base.enabled) { > + mode1 = &rdev->mode_info.crtcs[0]->base.mode; > + pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; > + } > + if (!(rdev->flags & RADEON_SINGLE_CRTC)) { > + if (rdev->mode_info.crtcs[1]->base.enabled) { > + mode2 = &rdev->mode_info.crtcs[1]->base.mode; > + pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; > + } > + } > > - critical_point2 = dfixed_trunc(crit_point_ff); > + min_mem_eff.full = dfixed_const_8(0); > + /* get modes */ > + if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) { > + uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER); > + mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT); > + mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT); > + /* check crtc enables */ > + if (mode2) > + mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT); > + if (mode1) > + mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT); > + WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer); > + } > > - if (rdev->disp_priority == 2) { > - critical_point2 = 0; > - } > + /* > + * determine is there is enough bw for current mode > + */ > + sclk_ff = rdev->pm.sclk; > + mclk_ff = rdev->pm.mclk; > > - if (max_stop_req - critical_point2 < 4) > - critical_point2 = 0; > + temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1); > + temp_ff.full = dfixed_const(temp); > + mem_bw.full = dfixed_mul(mclk_ff, temp_ff); > > - } > + pix_clk.full = 0; > + pix_clk2.full = 0; > + peak_disp_bw.full = 0; > + if (mode1) { > + temp_ff.full = dfixed_const(1000); > + pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */ > + pix_clk.full = dfixed_div(pix_clk, temp_ff); > + temp_ff.full = dfixed_const(pixel_bytes1); > + peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff); > + } > + if (mode2) { > + temp_ff.full = dfixed_const(1000); > + pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */ > + pix_clk2.full = dfixed_div(pix_clk2, temp_ff); > + temp_ff.full = dfixed_const(pixel_bytes2); > + peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff); > + } > > - if (critical_point2 == 0 && rdev->family == CHIP_R300) { > - /* some R300 cards have problem with this set to 0 */ > - critical_point2 = 0x10; > - } > + mem_bw.full = dfixed_mul(mem_bw, min_mem_eff); > + if (peak_disp_bw.full >= mem_bw.full) { > + DRM_ERROR("You may not have enough display bandwidth for current mode\n" > + "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n"); > + } > > - WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | > - (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); > + /* Get values from the EXT_MEM_CNTL register...converting its contents. */ > + temp = RREG32(RADEON_MEM_TIMING_CNTL); > + if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */ > + mem_trcd = ((temp >> 2) & 0x3) + 1; > + mem_trp = ((temp & 0x3)) + 1; > + mem_tras = ((temp & 0x70) >> 4) + 1; > + } else if (rdev->family == CHIP_R300 || > + rdev->family == CHIP_R350) { /* r300, r350 */ > + mem_trcd = (temp & 0x7) + 1; > + mem_trp = ((temp >> 8) & 0x7) + 1; > + mem_tras = ((temp >> 11) & 0xf) + 4; > + } else if (rdev->family == CHIP_RV350 || > + rdev->family <= CHIP_RV380) { > + /* rv3x0 */ > + mem_trcd = (temp & 0x7) + 3; > + mem_trp = ((temp >> 8) & 0x7) + 3; > + mem_tras = ((temp >> 11) & 0xf) + 6; > + } else if (rdev->family == CHIP_R420 || > + rdev->family == CHIP_R423 || > + rdev->family == CHIP_RV410) { > + /* r4xx */ > + mem_trcd = (temp & 0xf) + 3; > + if (mem_trcd > 15) > + mem_trcd = 15; > + mem_trp = ((temp >> 8) & 0xf) + 3; > + if (mem_trp > 15) > + mem_trp = 15; > + mem_tras = ((temp >> 12) & 0x1f) + 6; > + if (mem_tras > 31) > + mem_tras = 31; > + } else { /* RV200, R200 */ > + mem_trcd = (temp & 0x7) + 1; > + mem_trp = ((temp >> 8) & 0x7) + 1; > + mem_tras = ((temp >> 12) & 0xf) + 4; > + } > + /* convert to FF */ > + trcd_ff.full = dfixed_const(mem_trcd); > + trp_ff.full = dfixed_const(mem_trp); > + tras_ff.full = dfixed_const(mem_tras); > > - if ((rdev->family == CHIP_RS400) || > - (rdev->family == CHIP_RS480)) { > -#if 0 > - /* attempt to program RS400 disp2 regs correctly ??? */ > - temp = RREG32(RS400_DISP2_REQ_CNTL1); > - temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | > - RS400_DISP2_STOP_REQ_LEVEL_MASK); > - WREG32(RS400_DISP2_REQ_CNTL1, (temp | > - (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | > - (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); > - temp = RREG32(RS400_DISP2_REQ_CNTL2); > - temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | > - RS400_DISP2_CRITICAL_POINT_STOP_MASK); > - WREG32(RS400_DISP2_REQ_CNTL2, (temp | > - (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | > - (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); > -#endif > - WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); > - WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); > - WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); > - WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); > - } > + /* Get values from the MEM_SDRAM_MODE_REG register...converting its */ > + temp = RREG32(RADEON_MEM_SDRAM_MODE_REG); > + data = (temp & (7 << 20)) >> 20; > + if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) { > + if (rdev->family == CHIP_RS480) /* don't think rs400 */ > + tcas_ff = memtcas_rs480_ff[data]; > + else > + tcas_ff = memtcas_ff[data]; > + } else > + tcas_ff = memtcas2_ff[data]; > > - DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n", > - (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); > + if (rdev->family == CHIP_RS400 || > + rdev->family == CHIP_RS480) { > + /* extra cas latency stored in bits 23-25 0-4 clocks */ > + data = (temp >> 23) & 0x7; > + if (data < 5) > + tcas_ff.full += dfixed_const(data); > } > -} > > -static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) > -{ > - DRM_ERROR("pitch %d\n", t->pitch); > - DRM_ERROR("use_pitch %d\n", t->use_pitch); > - DRM_ERROR("width %d\n", t->width); > - DRM_ERROR("width_11 %d\n", t->width_11); > - DRM_ERROR("height %d\n", t->height); > - DRM_ERROR("height_11 %d\n", t->height_11); > - DRM_ERROR("num levels %d\n", t->num_levels); > - DRM_ERROR("depth %d\n", t->txdepth); > - DRM_ERROR("bpp %d\n", t->cpp); > - DRM_ERROR("coordinate type %d\n", t->tex_coord_type); > - DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); > - DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); > - DRM_ERROR("compress format %d\n", t->compress_format); > -} > + if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) { > + /* on the R300, Tcas is included in Trbs. > + */ > + temp = RREG32(RADEON_MEM_CNTL); > + data = (R300_MEM_NUM_CHANNELS_MASK & temp); > + if (data == 1) { > + if (R300_MEM_USE_CD_CH_ONLY & temp) { > + temp = RREG32(R300_MC_IND_INDEX); > + temp &= ~R300_MC_IND_ADDR_MASK; > + temp |= R300_MC_READ_CNTL_CD_mcind; > + WREG32(R300_MC_IND_INDEX, temp); > + temp = RREG32(R300_MC_IND_DATA); > + data = (R300_MEM_RBS_POSITION_C_MASK & temp); > + } else { > + temp = RREG32(R300_MC_READ_CNTL_AB); > + data = (R300_MEM_RBS_POSITION_A_MASK & temp); > + } > + } else { > + temp = RREG32(R300_MC_READ_CNTL_AB); > + data = (R300_MEM_RBS_POSITION_A_MASK & temp); > + } > + if (rdev->family == CHIP_RV410 || > + rdev->family == CHIP_R420 || > + rdev->family == CHIP_R423) > + trbs_ff = memtrbs_r4xx[data]; > + else > + trbs_ff = memtrbs[data]; > + tcas_ff.full += trbs_ff.full; > + } > > -static int r100_track_compress_size(int compress_format, int w, int h) > -{ > - int block_width, block_height, block_bytes; > - int wblocks, hblocks; > - int min_wblocks; > - int sz; > + sclk_eff_ff.full = sclk_ff.full; > > - block_width = 4; > - block_height = 4; > + if (rdev->flags & RADEON_IS_AGP) { > + fixed20_12 agpmode_ff; > + agpmode_ff.full = dfixed_const(radeon_agpmode); > + temp_ff.full = dfixed_const_666(16); > + sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff); > + } > + /* TODO PCIE lanes may affect this - agpmode == 16?? */ > > - switch (compress_format) { > - case R100_TRACK_COMP_DXT1: > - block_bytes = 8; > - min_wblocks = 4; > - break; > - default: > - case R100_TRACK_COMP_DXT35: > - block_bytes = 16; > - min_wblocks = 2; > - break; > + if (ASIC_IS_R300(rdev)) { > + sclk_delay_ff.full = dfixed_const(250); > + } else { > + if ((rdev->family == CHIP_RV100) || > + rdev->flags & RADEON_IS_IGP) { > + if (rdev->mc.vram_is_ddr) > + sclk_delay_ff.full = dfixed_const(41); > + else > + sclk_delay_ff.full = dfixed_const(33); > + } else { > + if (rdev->mc.vram_width == 128) > + sclk_delay_ff.full = dfixed_const(57); > + else > + sclk_delay_ff.full = dfixed_const(41); > + } > } > > - hblocks = (h + block_height - 1) / block_height; > - wblocks = (w + block_width - 1) / block_width; > - if (wblocks < min_wblocks) > - wblocks = min_wblocks; > - sz = wblocks * hblocks * block_bytes; > - return sz; > -} > + mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff); > > -static int r100_cs_track_cube(struct radeon_device *rdev, > - struct r100_cs_track *track, unsigned idx) > -{ > - unsigned face, w, h; > - struct radeon_bo *cube_robj; > - unsigned long size; > - unsigned compress_format = track->textures[idx].compress_format; > + if (rdev->mc.vram_is_ddr) { > + if (rdev->mc.vram_width == 32) { > + k1.full = dfixed_const(40); > + c = 3; > + } else { > + k1.full = dfixed_const(20); > + c = 1; > + } > + } else { > + k1.full = dfixed_const(40); > + c = 3; > + } > > - for (face = 0; face < 5; face++) { > - cube_robj = track->textures[idx].cube_info[face].robj; > - w = track->textures[idx].cube_info[face].width; > - h = track->textures[idx].cube_info[face].height; > + temp_ff.full = dfixed_const(2); > + mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff); > + temp_ff.full = dfixed_const(c); > + mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff); > + temp_ff.full = dfixed_const(4); > + mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff); > + mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff); > + mc_latency_mclk.full += k1.full; > > - if (compress_format) { > - size = r100_track_compress_size(compress_format, w, h); > - } else > - size = w * h; > - size *= track->textures[idx].cpp; > + mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff); > + mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff); > > - size += track->textures[idx].cube_info[face].offset; > + /* > + HW cursor time assuming worst case of full size colour cursor. > + */ > + temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1)))); > + temp_ff.full += trcd_ff.full; > + if (temp_ff.full < tras_ff.full) > + temp_ff.full = tras_ff.full; > + cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff); > > - if (size > radeon_bo_size(cube_robj)) { > - DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", > - size, radeon_bo_size(cube_robj)); > - r100_cs_track_texture_print(&track->textures[idx]); > - return -1; > - } > - } > - return 0; > -} > + temp_ff.full = dfixed_const(cur_size); > + cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff); > + /* > + Find the total latency for the display data. > + */ > + disp_latency_overhead.full = dfixed_const(8); > + disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff); > + mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full; > + mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full; > > -static int r100_cs_track_texture_check(struct radeon_device *rdev, > - struct r100_cs_track *track) > -{ > - struct radeon_bo *robj; > - unsigned long size; > - unsigned u, i, w, h, d; > - int ret; > + if (mc_latency_mclk.full > mc_latency_sclk.full) > + disp_latency.full = mc_latency_mclk.full; > + else > + disp_latency.full = mc_latency_sclk.full; > > - for (u = 0; u < track->num_texture; u++) { > - if (!track->textures[u].enabled) > - continue; > - if (track->textures[u].lookup_disable) > - continue; > - robj = track->textures[u].robj; > - if (robj == NULL) { > - DRM_ERROR("No texture bound to unit %u\n", u); > - return -EINVAL; > - } > - size = 0; > - for (i = 0; i <= track->textures[u].num_levels; i++) { > - if (track->textures[u].use_pitch) { > - if (rdev->family < CHIP_R300) > - w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); > - else > - w = track->textures[u].pitch / (1 << i); > - } else { > - w = track->textures[u].width; > - if (rdev->family >= CHIP_RV515) > - w |= track->textures[u].width_11; > - w = w / (1 << i); > - if (track->textures[u].roundup_w) > - w = roundup_pow_of_two(w); > - } > - h = track->textures[u].height; > - if (rdev->family >= CHIP_RV515) > - h |= track->textures[u].height_11; > - h = h / (1 << i); > - if (track->textures[u].roundup_h) > - h = roundup_pow_of_two(h); > - if (track->textures[u].tex_coord_type == 1) { > - d = (1 << track->textures[u].txdepth) / (1 << i); > - if (!d) > - d = 1; > - } else { > - d = 1; > - } > - if (track->textures[u].compress_format) { > + /* setup Max GRPH_STOP_REQ default value */ > + if (ASIC_IS_RV100(rdev)) > + max_stop_req = 0x5c; > + else > + max_stop_req = 0x7c; > > - size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; > - /* compressed textures are block based */ > - } else > - size += w * h * d; > - } > - size *= track->textures[u].cpp; > + if (mode1) { > + /* CRTC1 > + Set GRPH_BUFFER_CNTL register using h/w defined optimal values. > + GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ] > + */ > + stop_req = mode1->hdisplay * pixel_bytes1 / 16; > > - switch (track->textures[u].tex_coord_type) { > - case 0: > - case 1: > - break; > - case 2: > - if (track->separate_cube) { > - ret = r100_cs_track_cube(rdev, track, u); > - if (ret) > - return ret; > - } else > - size *= 6; > - break; > - default: > - DRM_ERROR("Invalid texture coordinate type %u for unit " > - "%u\n", track->textures[u].tex_coord_type, u); > - return -EINVAL; > - } > - if (size > radeon_bo_size(robj)) { > - DRM_ERROR("Texture of unit %u needs %lu bytes but is " > - "%lu\n", u, size, radeon_bo_size(robj)); > - r100_cs_track_texture_print(&track->textures[u]); > - return -EINVAL; > - } > - } > - return 0; > -} > + if (stop_req > max_stop_req) > + stop_req = max_stop_req; > > -int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) > -{ > - unsigned i; > - unsigned long size; > - unsigned prim_walk; > - unsigned nverts; > - unsigned num_cb = track->cb_dirty ? track->num_cb : 0; > + /* > + Find the drain rate of the display buffer. > + */ > + temp_ff.full = dfixed_const((16/pixel_bytes1)); > + disp_drain_rate.full = dfixed_div(pix_clk, temp_ff); > > - if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && > - !track->blend_read_enable) > - num_cb = 0; > + /* > + Find the critical point of the display buffer. > + */ > + crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency); > + crit_point_ff.full += dfixed_const_half(0); > > - for (i = 0; i < num_cb; i++) { > - if (track->cb[i].robj == NULL) { > - DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); > - return -EINVAL; > - } > - size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; > - size += track->cb[i].offset; > - if (size > radeon_bo_size(track->cb[i].robj)) { > - DRM_ERROR("[drm] Buffer too small for color buffer %d " > - "(need %lu have %lu) !\n", i, size, > - radeon_bo_size(track->cb[i].robj)); > - DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", > - i, track->cb[i].pitch, track->cb[i].cpp, > - track->cb[i].offset, track->maxy); > - return -EINVAL; > + critical_point = dfixed_trunc(crit_point_ff); > + > + if (rdev->disp_priority == 2) { > + critical_point = 0; > } > - } > - track->cb_dirty = false; > > - if (track->zb_dirty && track->z_enabled) { > - if (track->zb.robj == NULL) { > - DRM_ERROR("[drm] No buffer for z buffer !\n"); > - return -EINVAL; > + /* > + The critical point should never be above max_stop_req-4. Setting > + GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time. > + */ > + if (max_stop_req - critical_point < 4) > + critical_point = 0; > + > + if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) { > + /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/ > + critical_point = 0x10; > } > - size = track->zb.pitch * track->zb.cpp * track->maxy; > - size += track->zb.offset; > - if (size > radeon_bo_size(track->zb.robj)) { > - DRM_ERROR("[drm] Buffer too small for z buffer " > - "(need %lu have %lu) !\n", size, > - radeon_bo_size(track->zb.robj)); > - DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", > - track->zb.pitch, track->zb.cpp, > - track->zb.offset, track->maxy); > - return -EINVAL; > + > + temp = RREG32(RADEON_GRPH_BUFFER_CNTL); > + temp &= ~(RADEON_GRPH_STOP_REQ_MASK); > + temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); > + temp &= ~(RADEON_GRPH_START_REQ_MASK); > + if ((rdev->family == CHIP_R350) && > + (stop_req > 0x15)) { > + stop_req -= 0x10; > } > - } > - track->zb_dirty = false; > + temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); > + temp |= RADEON_GRPH_BUFFER_SIZE; > + temp &= ~(RADEON_GRPH_CRITICAL_CNTL | > + RADEON_GRPH_CRITICAL_AT_SOF | > + RADEON_GRPH_STOP_CNTL); > + /* > + Write the result into the register. > + */ > + WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) | > + (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT))); > > - if (track->aa_dirty && track->aaresolve) { > - if (track->aa.robj == NULL) { > - DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); > - return -EINVAL; > - } > - /* I believe the format comes from colorbuffer0. */ > - size = track->aa.pitch * track->cb[0].cpp * track->maxy; > - size += track->aa.offset; > - if (size > radeon_bo_size(track->aa.robj)) { > - DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " > - "(need %lu have %lu) !\n", i, size, > - radeon_bo_size(track->aa.robj)); > - DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", > - i, track->aa.pitch, track->cb[0].cpp, > - track->aa.offset, track->maxy); > - return -EINVAL; > +#if 0 > + if ((rdev->family == CHIP_RS400) || > + (rdev->family == CHIP_RS480)) { > + /* attempt to program RS400 disp regs correctly ??? */ > + temp = RREG32(RS400_DISP1_REG_CNTL); > + temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK | > + RS400_DISP1_STOP_REQ_LEVEL_MASK); > + WREG32(RS400_DISP1_REQ_CNTL1, (temp | > + (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) | > + (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); > + temp = RREG32(RS400_DMIF_MEM_CNTL1); > + temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK | > + RS400_DISP1_CRITICAL_POINT_STOP_MASK); > + WREG32(RS400_DMIF_MEM_CNTL1, (temp | > + (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) | > + (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT))); > } > - } > - track->aa_dirty = false; > +#endif > > - prim_walk = (track->vap_vf_cntl >> 4) & 0x3; > - if (track->vap_vf_cntl & (1 << 14)) { > - nverts = track->vap_alt_nverts; > - } else { > - nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; > + DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n", > + /* (unsigned int)info->SavedReg->grph_buffer_cntl, */ > + (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL)); > } > - switch (prim_walk) { > - case 1: > - for (i = 0; i < track->num_arrays; i++) { > - size = track->arrays[i].esize * track->max_indx * 4; > - if (track->arrays[i].robj == NULL) { > - DRM_ERROR("(PW %u) Vertex array %u no buffer " > - "bound\n", prim_walk, i); > - return -EINVAL; > - } > - if (size > radeon_bo_size(track->arrays[i].robj)) { > - dev_err(rdev->dev, "(PW %u) Vertex array %u " > - "need %lu dwords have %lu dwords\n", > - prim_walk, i, size >> 2, > - radeon_bo_size(track->arrays[i].robj) > - >> 2); > - DRM_ERROR("Max indices %u\n", track->max_indx); > - return -EINVAL; > - } > + > + if (mode2) { > + u32 grph2_cntl; > + stop_req = mode2->hdisplay * pixel_bytes2 / 16; > + > + if (stop_req > max_stop_req) > + stop_req = max_stop_req; > + > + /* > + Find the drain rate of the display buffer. > + */ > + temp_ff.full = dfixed_const((16/pixel_bytes2)); > + disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff); > + > + grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); > + grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); > + grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); > + grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); > + if ((rdev->family == CHIP_R350) && > + (stop_req > 0x15)) { > + stop_req -= 0x10; > } > - break; > - case 2: > - for (i = 0; i < track->num_arrays; i++) { > - size = track->arrays[i].esize * (nverts - 1) * 4; > - if (track->arrays[i].robj == NULL) { > - DRM_ERROR("(PW %u) Vertex array %u no buffer " > - "bound\n", prim_walk, i); > - return -EINVAL; > + grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); > + grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; > + grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | > + RADEON_GRPH_CRITICAL_AT_SOF | > + RADEON_GRPH_STOP_CNTL); > + > + if ((rdev->family == CHIP_RS100) || > + (rdev->family == CHIP_RS200)) > + critical_point2 = 0; > + else { > + temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; > + temp_ff.full = dfixed_const(temp); > + temp_ff.full = dfixed_mul(mclk_ff, temp_ff); > + if (sclk_ff.full < temp_ff.full) > + temp_ff.full = sclk_ff.full; > + > + read_return_rate.full = temp_ff.full; > + > + if (mode1) { > + temp_ff.full = read_return_rate.full - disp_drain_rate.full; > + time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff); > + } else { > + time_disp1_drop_priority.full = 0; > } > - if (size > radeon_bo_size(track->arrays[i].robj)) { > - dev_err(rdev->dev, "(PW %u) Vertex array %u " > - "need %lu dwords have %lu dwords\n", > - prim_walk, i, size >> 2, > - radeon_bo_size(track->arrays[i].robj) > - >> 2); > - return -EINVAL; > + crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; > + crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2); > + crit_point_ff.full += dfixed_const_half(0); > + > + critical_point2 = dfixed_trunc(crit_point_ff); > + > + if (rdev->disp_priority == 2) { > + critical_point2 = 0; > } > - } > - break; > - case 3: > - size = track->vtx_size * nverts; > - if (size != track->immd_dwords) { > - DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", > - track->immd_dwords, size); > - DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", > - nverts, track->vtx_size); > - return -EINVAL; > - } > - break; > - default: > - DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", > - prim_walk); > - return -EINVAL; > - } > > - if (track->tex_dirty) { > - track->tex_dirty = false; > - return r100_cs_track_texture_check(rdev, track); > - } > - return 0; > -} > + if (max_stop_req - critical_point2 < 4) > + critical_point2 = 0; > > -void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) > -{ > - unsigned i, face; > + } > > - track->cb_dirty = true; > - track->zb_dirty = true; > - track->tex_dirty = true; > - track->aa_dirty = true; > + if (critical_point2 == 0 && rdev->family == CHIP_R300) { > + /* some R300 cards have problem with this set to 0 */ > + critical_point2 = 0x10; > + } > > - if (rdev->family < CHIP_R300) { > - track->num_cb = 1; > - if (rdev->family <= CHIP_RS200) > - track->num_texture = 3; > - else > - track->num_texture = 6; > - track->maxy = 2048; > - track->separate_cube = 1; > - } else { > - track->num_cb = 4; > - track->num_texture = 16; > - track->maxy = 4096; > - track->separate_cube = 0; > - track->aaresolve = false; > - track->aa.robj = NULL; > - } > + WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | > + (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); > > - for (i = 0; i < track->num_cb; i++) { > - track->cb[i].robj = NULL; > - track->cb[i].pitch = 8192; > - track->cb[i].cpp = 16; > - track->cb[i].offset = 0; > - } > - track->z_enabled = true; > - track->zb.robj = NULL; > - track->zb.pitch = 8192; > - track->zb.cpp = 4; > - track->zb.offset = 0; > - track->vtx_size = 0x7F; > - track->immd_dwords = 0xFFFFFFFFUL; > - track->num_arrays = 11; > - track->max_indx = 0x00FFFFFFUL; > - for (i = 0; i < track->num_arrays; i++) { > - track->arrays[i].robj = NULL; > - track->arrays[i].esize = 0x7F; > - } > - for (i = 0; i < track->num_texture; i++) { > - track->textures[i].compress_format = R100_TRACK_COMP_NONE; > - track->textures[i].pitch = 16536; > - track->textures[i].width = 16536; > - track->textures[i].height = 16536; > - track->textures[i].width_11 = 1 << 11; > - track->textures[i].height_11 = 1 << 11; > - track->textures[i].num_levels = 12; > - if (rdev->family <= CHIP_RS200) { > - track->textures[i].tex_coord_type = 0; > - track->textures[i].txdepth = 0; > - } else { > - track->textures[i].txdepth = 16; > - track->textures[i].tex_coord_type = 1; > + if ((rdev->family == CHIP_RS400) || > + (rdev->family == CHIP_RS480)) { > +#if 0 > + /* attempt to program RS400 disp2 regs correctly ??? */ > + temp = RREG32(RS400_DISP2_REQ_CNTL1); > + temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | > + RS400_DISP2_STOP_REQ_LEVEL_MASK); > + WREG32(RS400_DISP2_REQ_CNTL1, (temp | > + (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | > + (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); > + temp = RREG32(RS400_DISP2_REQ_CNTL2); > + temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | > + RS400_DISP2_CRITICAL_POINT_STOP_MASK); > + WREG32(RS400_DISP2_REQ_CNTL2, (temp | > + (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | > + (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); > +#endif > + WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); > + WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); > + WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); > + WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); > } > - track->textures[i].cpp = 64; > - track->textures[i].robj = NULL; > - /* CS IB emission code makes sure texture unit are disabled */ > - track->textures[i].enabled = false; > - track->textures[i].lookup_disable = false; > - track->textures[i].roundup_w = true; > - track->textures[i].roundup_h = true; > - if (track->separate_cube) > - for (face = 0; face < 5; face++) { > - track->textures[i].cube_info[face].robj = NULL; > - track->textures[i].cube_info[face].width = 16536; > - track->textures[i].cube_info[face].height = 16536; > - track->textures[i].cube_info[face].offset = 0; > - } > + > + DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n", > + (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); > } > } > > -- > 1.7.7.5 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel