Add logic for starting, stopping, suspending and resuming uvd block Signed-off-by: Piotr Redlewski <predlewski at gmail.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 7 - drivers/gpu/drm/amd/amdgpu/si.c | 250 ++++++++++++++++- drivers/gpu/drm/amd/amdgpu/sid.h | 22 +- drivers/gpu/drm/amd/amdgpu/uvd_v4_0.c | 488 ++++++++++++++++++++++++++++++++++ 4 files changed, 736 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 9430d4809b53..0744117ee7d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1695,13 +1695,6 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) WREG32(mmDMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config); WREG32(mmDMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config); -#if 0 - if (adev->has_uvd) { - WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config); - WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); - WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); - } -#endif gfx_v6_0_tiling_mode_table_init(adev); gfx_v6_0_setup_rb(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 2ac1c2be8ca4..bc6fd1ff4f86 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -971,6 +971,28 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->smc_idx_lock, flags); } +static u32 si_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); + r = RREG32(mmUVD_CTX_DATA); + spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + return r; +} + +static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff)); + WREG32(mmUVD_CTX_DATA, (v)); + spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); +} + static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {GRBM_STATUS}, {GB_ADDR_CONFIG}, @@ -1219,9 +1241,231 @@ static u32 si_get_xclk(struct amdgpu_device *adev) return reference_clock; } -//xxx:not implemented + +static unsigned si_uvd_calc_upll_post_div(unsigned vco_freq, + unsigned target_freq, + unsigned pd_min, + unsigned pd_even) +{ + unsigned post_div = vco_freq / target_freq; + + /* adjust to post divider minimum value */ + if (post_div < pd_min) + post_div = pd_min; + + /* we alway need a frequency less than or equal the target */ + if ((vco_freq / post_div) > target_freq) + post_div += 1; + + /* post dividers above a certain value must be even */ + if (post_div > pd_even && post_div % 2) + post_div += 1; + + return post_div; +} + +/** + * si_uvd_calc_upll_dividers - calc UPLL clock dividers + * + * @adev: amdgpu_device pointer + * @vclk: wanted VCLK + * @dclk: wanted DCLK + * @vco_min: minimum VCO frequency + * @vco_max: maximum VCO frequency + * @fb_factor: factor to multiply vco freq with + * @fb_mask: limit and bitmask for feedback divider + * @pd_min: post divider minimum + * @pd_max: post divider maximum + * @pd_even: post divider must be even above this value + * @optimal_fb_div: resulting feedback divider + * @optimal_vclk_div: resulting vclk post divider + * @optimal_dclk_div: resulting dclk post divider + * + * Calculate dividers for UVDs UPLL (R6xx-SI, except APUs). + * Returns zero on success -EINVAL on error. + */ +int si_uvd_calc_upll_dividers(struct amdgpu_device *adev, + unsigned vclk, unsigned dclk, + unsigned vco_min, unsigned vco_max, + unsigned fb_factor, unsigned fb_mask, + unsigned pd_min, unsigned pd_max, + unsigned pd_even, + unsigned *optimal_fb_div, + unsigned *optimal_vclk_div, + unsigned *optimal_dclk_div) +{ + unsigned vco_freq, ref_freq = adev->clock.spll.reference_freq; + + /* start off with something large */ + unsigned optimal_score = ~0; + + /* loop through vco from low to high */ + vco_min = max(max(vco_min, vclk), dclk); + for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { + + uint64_t fb_div = (uint64_t)vco_freq * fb_factor; + unsigned vclk_div, dclk_div, score; + + do_div(fb_div, ref_freq); + + /* fb div out of range ? */ + if (fb_div > fb_mask) + break; /* it can oly get worse */ + + fb_div &= fb_mask; + + /* calc vclk divider with current vco freq */ + vclk_div = si_uvd_calc_upll_post_div(vco_freq, vclk, + pd_min, pd_even); + if (vclk_div > pd_max) + break; /* vco is too big, it has to stop */ + + /* calc dclk divider with current vco freq */ + dclk_div = si_uvd_calc_upll_post_div(vco_freq, dclk, + pd_min, pd_even); + if (vclk_div > pd_max) + break; /* vco is too big, it has to stop */ + + /* calc score with current vco freq */ + score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); + + /* determine if this vco setting is better than current optimal settings */ + if (score < optimal_score) { + *optimal_fb_div = fb_div; + *optimal_vclk_div = vclk_div; + *optimal_dclk_div = dclk_div; + optimal_score = score; + if (optimal_score == 0) + break; /* it can't get better than this */ + } + } + + /* did we found a valid setup ? */ + if (optimal_score == ~0) + return -EINVAL; + + return 0; +} + +int si_uvd_send_upll_ctlreq(struct amdgpu_device *adev, + unsigned cg_upll_func_cntl) +{ + unsigned i; + + /* make sure UPLL_CTLREQ is deasserted */ + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); + + mdelay(10); + + /* assert UPLL_CTLREQ */ + WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < 100; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + if ((RREG32(cg_upll_func_cntl) & mask) == mask) + break; + mdelay(10); + } + + /* deassert UPLL_CTLREQ */ + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); + + if (i == 100) { + DRM_ERROR("Timeout setting UVD clocks!\n"); + return -ETIMEDOUT; + } + + return 0; +} + static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) { + unsigned fb_div = 0, vclk_div = 0, dclk_div = 0; + int r; + + /* bypass vclk and dclk with bclk */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + /* put PLL in bypass mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK); + + if (!vclk || !dclk) { + /* keep the Bypass mode */ + return 0; + } + + r = si_uvd_calc_upll_dividers(adev, vclk, dclk, 125000, 250000, + 16384, 0x03FFFFFF, 0, 128, 5, + &fb_div, &vclk_div, &dclk_div); + + if (r) + return r; + + /* set RESET_ANTI_MUX to 0 */ + WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK); + + /* set VCO_MODE to 1 */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK); + + /* disable sleep mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK); + + /* deassert UPLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(1); + + r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* assert UPLL_RESET again */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK); + + /* disable spread spectrum. */ + WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK); + + /* set feedback divider */ + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK); + + /* set ref divider to 0 */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK); + + if (fb_div < 307200) + WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9); + else + WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9); + + /* set PDIV_A and PDIV_B */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div), + ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK)); + + /* give the PLL some time to settle */ + mdelay(15); + + /* deassert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(15); + + /* switch from bypass mode to normal mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK); + + r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* switch VCLK and DCLK selection */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + mdelay(100); + return 0; } @@ -1260,8 +1504,8 @@ static int si_common_early_init(void *handle) adev->pcie_wreg = &si_pcie_wreg; adev->pciep_rreg = &si_pciep_rreg; adev->pciep_wreg = &si_pciep_wreg; - adev->uvd_ctx_rreg = NULL; - adev->uvd_ctx_wreg = NULL; + adev->uvd_ctx_rreg = &si_uvd_ctx_rreg; + adev->uvd_ctx_wreg = &si_uvd_ctx_wreg; adev->didt_rreg = NULL; adev->didt_wreg = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 42556e2fafd4..1fbe62e95c94 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1621,27 +1621,7 @@ /* * UVD */ -#define UVD_UDEC_ADDR_CONFIG 0x3bd3 -#define UVD_UDEC_DB_ADDR_CONFIG 0x3bd4 -#define UVD_UDEC_DBW_ADDR_CONFIG 0x3bd5 -#define UVD_RBC_RB_RPTR 0x3da4 -#define UVD_RBC_RB_WPTR 0x3da5 -#define UVD_STATUS 0x3daf - -#define UVD_CGC_CTRL 0x3dc2 -# define DCM (1 << 0) -# define CG_DT(x) ((x) << 2) -# define CG_DT_MASK (0xf << 2) -# define CLK_OD(x) ((x) << 6) -# define CLK_OD_MASK (0x1f << 6) - - /* UVD CTX indirect */ -#define UVD_CGC_MEM_CTRL 0xC0 -#define UVD_CGC_CTRL2 0xC1 -# define DYN_OR_EN (1 << 0) -# define DYN_RR_EN (1 << 1) -# define G_DIV_ID(x) ((x) << 2) -# define G_DIV_ID_MASK (0x7 << 2) +#define UVD_VCPU_CHIP_ID 0x3d35 /* * PM4 diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_0.c index cfa6959db43d..e5f5e19657fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_0.c @@ -36,8 +36,12 @@ #include "bif/bif_3_0_d.h" +static void uvd_v4_0_mc_resume(struct amdgpu_device *adev); static void uvd_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v4_0_set_irq_funcs(struct amdgpu_device *adev); +static int uvd_v4_0_start(struct amdgpu_device *adev); +static void uvd_v4_0_stop(struct amdgpu_device *adev); +static void uvd_v4_0_set_dcm(struct amdgpu_device *adev, bool sw_mode); /** * uvd_v4_0_ring_get_rptr - get read pointer @@ -114,6 +118,8 @@ static int uvd_v4_0_sw_init(void *handle) sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + /* TODO: move mc programming to hw init */ + uvd_v4_0_mc_resume(adev); return r; } @@ -129,6 +135,333 @@ static int uvd_v4_0_sw_fini(void *handle) return amdgpu_uvd_sw_fini(adev); } +static void uvd_v4_0_enable_mgcg(struct amdgpu_device *adev, + bool enable) +{ + u32 orig, data; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) { + data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL); + data |= 0x3fff; + WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data); + + orig = data = RREG32(mmUVD_CGC_CTRL); + data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + if (orig != data) + WREG32(mmUVD_CGC_CTRL, data); + + WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0); + WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0); + } else { + data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL); + data &= ~0x3fff; + WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data); + + orig = data = RREG32(mmUVD_CGC_CTRL); + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + if (orig != data) + WREG32(mmUVD_CGC_CTRL, data); + + WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff); + WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff); + } +} + +/** + * uvd_v4_0_hw_init - start and test UVD block + * + * @adev: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int uvd_v4_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->uvd.ring; + uint32_t tmp; + int r; + + uvd_v4_0_enable_mgcg(adev, true); + amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); + uvd_v4_0_start(adev); + + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + + r = amdgpu_ring_alloc(ring, 10); + if (r) { + DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); + goto done; + } + + tmp = PACKET0(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + /* Clear timeout status bits */ + amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_TIMEOUT_STATUS, 0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); + amdgpu_ring_write(ring, 3); + + amdgpu_ring_commit(ring); + +done: + if (!r) + DRM_INFO("UVD initialized successfully.\n"); + + return r; +} + +/** + * uvd_v4_0_hw_fini - stop the hardware block + * + * @adev: amdgpu_device pointer + * + * Stop the UVD block, mark ring as not ready any more + */ +static int uvd_v4_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->uvd.ring; + + if (RREG32(mmUVD_STATUS) != 0) + uvd_v4_0_stop(adev); + + ring->ready = false; + + return 0; +} + +static int uvd_v4_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = uvd_v4_0_hw_fini(adev); + if (r) + return r; + + return amdgpu_uvd_suspend(adev); +} + +static int uvd_v4_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_uvd_resume(adev); + if (r) + return r; + + return uvd_v4_0_hw_init(adev); +} + +/** + * uvd_v4_0_start - start UVD block + * + * @adev: amdgpu_device pointer + * + * Setup and start the UVD block + */ +static int uvd_v4_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->uvd.ring; + uint32_t rb_bufsz; + int i, j, r; + u32 tmp; + /* disable byte swapping */ + u32 lmi_swap_cntl = 0; + u32 mp_swap_cntl = 0; + + /* set uvd busy */ + WREG32_P(mmUVD_STATUS, 1<<2, ~(1<<2)); + + uvd_v4_0_set_dcm(adev, true); + WREG32(mmUVD_CGC_GATE, 0); + + /* take UVD block out of reset */ + WREG32_P(mmSRBM_SOFT_RESET, 0, ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK); + mdelay(5); + + /* enable VCPU clock */ + WREG32(mmUVD_VCPU_CNTL, 1 << 9); + + /* disable interupt */ + WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1)); + +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; + mp_swap_cntl = 0; +#endif + WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl); + /* initialize UVD memory controller */ + WREG32(mmUVD_LMI_CTRL, 0x203108); + + tmp = RREG32(mmUVD_MPC_CNTL); + WREG32(mmUVD_MPC_CNTL, tmp | 0x10); + + WREG32(mmUVD_MPC_SET_MUXA0, 0x40c2040); + WREG32(mmUVD_MPC_SET_MUXA1, 0x0); + WREG32(mmUVD_MPC_SET_MUXB0, 0x40c2040); + WREG32(mmUVD_MPC_SET_MUXB1, 0x0); + WREG32(mmUVD_MPC_SET_ALU, 0); + WREG32(mmUVD_MPC_SET_MUX, 0x88); + + uvd_v4_0_mc_resume(adev); + + tmp = RREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL); + WREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL, tmp & (~0x10)); + + /* enable UMC */ + WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8)); + + WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK); + + WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + + WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + mdelay(10); + + for (i = 0; i < 10; ++i) { + uint32_t status; + for (j = 0; j < 100; ++j) { + status = RREG32(mmUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); + WREG32_P(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("UVD not responding, giving up!!!\n"); + return r; + } + + /* enable interupt */ + WREG32_P(mmUVD_MASTINT_EN, 3<<1, ~(3 << 1)); + + WREG32_P(mmUVD_STATUS, 0, ~(1<<2)); + + /* force RBC into idle state */ + WREG32(mmUVD_RBC_RB_CNTL, 0x11010101); + + /* Set the write pointer delay */ + WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* programm the 4GB memory segment for rptr and ring buffer */ + WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | + (0x7 << 16) | (0x1 << 31)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(mmUVD_RBC_RB_RPTR, 0x0); + + ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + + /* set the ring address */ + WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr); + + /* Set ring buffer size */ + rb_bufsz = order_base_2(ring->ring_size); + rb_bufsz = (0x1 << 8) | rb_bufsz; + WREG32_P(mmUVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f); + + return 0; +} + + +/** + * uvd_v4_0_stop - stop UVD block + * + * @adev: amdgpu_device pointer + * + * stop the UVD block + */ +static void uvd_v4_0_stop(struct amdgpu_device *adev) +{ + uint32_t i, j; + uint32_t status; + + WREG32(mmUVD_RBC_RB_CNTL, 0x11010101); + + for (i = 0; i < 10; ++i) { + for (j = 0; j < 100; ++j) { + status = RREG32(mmUVD_STATUS); + if (status & 2) + break; + mdelay(1); + } + if (status & 2) + break; + } + + for (i = 0; i < 10; ++i) { + for (j = 0; j < 100; ++j) { + status = RREG32(mmUVD_LMI_STATUS); + if (status & 0xf) + break; + mdelay(1); + } + if (status & 0xf) + break; + } + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); + + for (i = 0; i < 10; ++i) { + for (j = 0; j < 100; ++j) { + status = RREG32(mmUVD_LMI_STATUS); + if (status & 0x240) + break; + mdelay(1); + } + if (status & 0x240) + break; + } + + WREG32_P(0x3D49, 0, ~(1 << 2)); + + WREG32_P(mmUVD_VCPU_CNTL, 0, ~(1 << 9)); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + + WREG32(mmUVD_STATUS, 0); + + uvd_v4_0_set_dcm(adev, false); +} + /** * uvd_v4_0_ring_emit_fence - emit an fence & trap command * @@ -245,6 +578,126 @@ static void uvd_v4_0_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } +/** + * uvd_v4_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the UVD memory controller know it's offsets + */ +static void uvd_v4_0_mc_resume(struct amdgpu_device *adev) +{ + uint64_t addr; + uint32_t size, chip_id; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->uvd.fw->data; + /* programm the VCPU memory controller bits 0-27 */ + addr = (adev->uvd.gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; + size = (AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) - + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; + WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); + WREG32(mmUVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = AMDGPU_UVD_HEAP_SIZE >> 3; + WREG32(mmUVD_VCPU_CACHE_OFFSET1, addr); + WREG32(mmUVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = (AMDGPU_UVD_STACK_SIZE + + (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles)) >> 3; + WREG32(mmUVD_VCPU_CACHE_OFFSET2, addr); + WREG32(mmUVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (adev->uvd.gpu_addr >> 28) & 0xF; + WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (adev->uvd.gpu_addr >> 32) & 0xFF; + WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + + /* tell firmware which hardware it is running on */ + switch (adev->asic_type) { + case CHIP_TAHITI: + chip_id = 0x01000014; + break; + case CHIP_VERDE: + chip_id = 0x01000015; + break; + case CHIP_PITCAIRN: + case CHIP_OLAND: + chip_id = 0x01000016; + break; + default: + return; + } + WREG32(UVD_VCPU_CHIP_ID, chip_id); +} + +static void uvd_v4_0_set_dcm(struct amdgpu_device *adev, + bool sw_mode) +{ + u32 tmp, tmp2; + + WREG32_FIELD(UVD_CGC_GATE, REGS, 0); + + tmp = RREG32(mmUVD_CGC_CTRL); + tmp &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); + tmp |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK | + (1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT) | + (4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT); + + if (sw_mode) { + tmp &= ~0x7ffff800; + tmp2 = UVD_CGC_CTRL2__DYN_OCLK_RAMP_EN_MASK | + UVD_CGC_CTRL2__DYN_RCLK_RAMP_EN_MASK | + (7 << UVD_CGC_CTRL2__GATER_DIV_ID__SHIFT); + } else { + tmp |= 0x7ffff800; + tmp2 = 0; + } + + WREG32(mmUVD_CGC_CTRL, tmp); + WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2); +} + +static bool uvd_v4_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); +} + +static int uvd_v4_0_wait_for_idle(void *handle) +{ + unsigned i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK)) + return 0; + } + return -ETIMEDOUT; +} + +static int uvd_v4_0_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + uvd_v4_0_stop(adev); + + WREG32_P(mmSRBM_SOFT_RESET, SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK, ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK); + mdelay(5); + + return uvd_v4_0_start(adev); +} + static int uvd_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -263,12 +716,47 @@ static int uvd_v4_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int uvd_v4_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int uvd_v4_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* This doesn't actually powergate the UVD block. + * That's done in the dpm code via the SMC. This + * just re-inits the block as necessary. The actual + * gating still happens in the dpm code. We should + * revisit this when there is a cleaner line between + * the smc and the hw blocks + */ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (state == AMD_PG_STATE_GATE) { + uvd_v4_0_stop(adev); + return 0; + } else { + return uvd_v4_0_start(adev); + } +} + static const struct amd_ip_funcs uvd_v4_0_ip_funcs = { .name = "uvd_v4_0", .early_init = uvd_v4_0_early_init, .late_init = NULL, .sw_init = uvd_v4_0_sw_init, .sw_fini = uvd_v4_0_sw_fini, + .hw_init = uvd_v4_0_hw_init, + .hw_fini = uvd_v4_0_hw_fini, + .suspend = uvd_v4_0_suspend, + .resume = uvd_v4_0_resume, + .is_idle = uvd_v4_0_is_idle, + .wait_for_idle = uvd_v4_0_wait_for_idle, + .soft_reset = uvd_v4_0_soft_reset, + .set_clockgating_state = uvd_v4_0_set_clockgating_state, + .set_powergating_state = uvd_v4_0_set_powergating_state, }; static const struct amdgpu_ring_funcs uvd_v4_0_ring_funcs = { -- 2.15.0