On Wed, Apr 03, 2013 at 01:18:31AM +0200, Christian König wrote: > Just everything needed to decode videos using UVD. > > v6: just all the bugfixes and support for R7xx-SI merged in one patch > v7: UVD_CGC_GATE is a write only register, lockup detection fix > > Signed-off-by: Christian König <deathsimple@xxxxxxxxxxx> > --- > drivers/gpu/drm/radeon/Makefile | 2 +- > drivers/gpu/drm/radeon/evergreen.c | 40 ++- > drivers/gpu/drm/radeon/evergreend.h | 7 + > drivers/gpu/drm/radeon/ni.c | 49 +++ > drivers/gpu/drm/radeon/nid.h | 9 + > drivers/gpu/drm/radeon/r600.c | 291 ++++++++++++++++++ > drivers/gpu/drm/radeon/r600d.h | 61 ++++ > drivers/gpu/drm/radeon/radeon.h | 47 ++- > drivers/gpu/drm/radeon/radeon_asic.c | 63 ++++ > drivers/gpu/drm/radeon/radeon_asic.h | 19 ++ > drivers/gpu/drm/radeon/radeon_cs.c | 27 +- > drivers/gpu/drm/radeon/radeon_fence.c | 23 +- > drivers/gpu/drm/radeon/radeon_kms.c | 1 + > drivers/gpu/drm/radeon/radeon_object.c | 12 +- > drivers/gpu/drm/radeon/radeon_object.h | 2 +- > drivers/gpu/drm/radeon/radeon_ring.c | 24 +- > drivers/gpu/drm/radeon/radeon_test.c | 72 +++-- > drivers/gpu/drm/radeon/radeon_uvd.c | 521 ++++++++++++++++++++++++++++++++ > drivers/gpu/drm/radeon/rv770.c | 134 ++++++++ > drivers/gpu/drm/radeon/rv770d.h | 14 + > drivers/gpu/drm/radeon/si.c | 32 ++ > drivers/gpu/drm/radeon/sid.h | 6 + > include/uapi/drm/radeon_drm.h | 1 + > 23 files changed, 1400 insertions(+), 57 deletions(-) > create mode 100644 drivers/gpu/drm/radeon/radeon_uvd.c > > diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile > index bf17252..86c5e36 100644 > --- a/drivers/gpu/drm/radeon/Makefile > +++ b/drivers/gpu/drm/radeon/Makefile > @@ -76,7 +76,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ > evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \ > evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ > atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ > - si_blit_shaders.o radeon_prime.o > + si_blit_shaders.o radeon_prime.o radeon_uvd.o > > radeon-$(CONFIG_COMPAT) += radeon_ioc32.o > radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o > diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c > index 305a657..18b66ff 100644 > --- a/drivers/gpu/drm/radeon/evergreen.c > +++ b/drivers/gpu/drm/radeon/evergreen.c > @@ -3360,6 +3360,9 @@ restart_ih: > DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); > break; > } > + case 124: /* UVD */ > + DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); > + radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); > break; > case 146: > case 147: > @@ -3571,7 +3574,7 @@ int evergreen_copy_dma(struct radeon_device *rdev, > > static int evergreen_startup(struct radeon_device *rdev) > { > - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > + struct radeon_ring *ring; > int r; > > /* enable pcie gen2 link */ > @@ -3638,6 +3641,17 @@ static int evergreen_startup(struct radeon_device *rdev) > return r; > } > > + r = rv770_uvd_resume(rdev); > + if (!r) { > + r = radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; > + > /* Enable IRQ */ > r = r600_irq_init(rdev); > if (r) { > @@ -3647,6 +3661,7 @@ static int evergreen_startup(struct radeon_device *rdev) > } > evergreen_irq_set(rdev); > > + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, > R600_CP_RB_RPTR, R600_CP_RB_WPTR, > 0, 0xfffff, RADEON_CP_PACKET2); > @@ -3670,6 +3685,19 @@ static int evergreen_startup(struct radeon_device *rdev) > if (r) > return r; > > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r = radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r = r600_uvd_init(rdev); > + > + if (r) > + DRM_ERROR("radeon: error initializing UVD (%d).\n", r); > + } > + > r = radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -3716,8 +3744,10 @@ int evergreen_resume(struct radeon_device *rdev) > int evergreen_suspend(struct radeon_device *rdev) > { > r600_audio_fini(rdev); > + radeon_uvd_suspend(rdev); > r700_cp_stop(rdev); > r600_dma_stop(rdev); > + r600_uvd_rbc_stop(rdev); > evergreen_irq_suspend(rdev); > radeon_wb_disable(rdev); > evergreen_pcie_gart_disable(rdev); > @@ -3797,6 +3827,13 @@ int evergreen_init(struct radeon_device *rdev) > rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; > r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); > > + r = radeon_uvd_init(rdev); > + if (!r) { > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; > + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], > + 4096); > + } > + > rdev->ih.ring_obj = NULL; > r600_ih_ring_init(rdev, 64 * 1024); > > @@ -3843,6 +3880,7 @@ void evergreen_fini(struct radeon_device *rdev) > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > evergreen_pcie_gart_fini(rdev); > + radeon_uvd_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > radeon_fence_driver_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h > index 982d25a..c5d873e 100644 > --- a/drivers/gpu/drm/radeon/evergreend.h > +++ b/drivers/gpu/drm/radeon/evergreend.h > @@ -992,6 +992,13 @@ > # define TARGET_LINK_SPEED_MASK (0xf << 0) > # define SELECTABLE_DEEMPHASIS (1 << 6) > > + > +/* > + * UVD > + */ > +#define UVD_RBC_RB_RPTR 0xf690 > +#define UVD_RBC_RB_WPTR 0xf694 > + > /* > * PM4 > */ > diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c > index 27769e7..ac944f5 100644 > --- a/drivers/gpu/drm/radeon/ni.c > +++ b/drivers/gpu/drm/radeon/ni.c > @@ -931,6 +931,23 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > radeon_ring_write(ring, 10); /* poll interval */ > } > > +void cayman_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait) > +{ > + uint64_t addr = semaphore->gpu_addr; > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); > + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); > + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); > + radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); > +} > + > static void cayman_cp_enable(struct radeon_device *rdev, bool enable) > { > if (enable) > @@ -1682,6 +1699,16 @@ static int cayman_startup(struct radeon_device *rdev) > return r; > } > > + r = rv770_uvd_resume(rdev); > + if (!r) { > + r = radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; > + > r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); > if (r) { > dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); > @@ -1748,6 +1775,18 @@ static int cayman_startup(struct radeon_device *rdev) > if (r) > return r; > > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r = radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r = r600_uvd_init(rdev); > + if (r) > + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); > + } > + > r = radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -1794,6 +1833,8 @@ int cayman_suspend(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > cayman_cp_enable(rdev, false); > cayman_dma_stop(rdev); > + r600_uvd_rbc_stop(rdev); > + radeon_uvd_suspend(rdev); > evergreen_irq_suspend(rdev); > radeon_wb_disable(rdev); > cayman_pcie_gart_disable(rdev); > @@ -1868,6 +1909,13 @@ int cayman_init(struct radeon_device *rdev) > ring->ring_obj = NULL; > r600_ring_init(rdev, ring, 64 * 1024); > > + r = radeon_uvd_init(rdev); > + if (!r) { > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + ring->ring_obj = NULL; > + r600_ring_init(rdev, ring, 4096); > + } > + > rdev->ih.ring_obj = NULL; > r600_ih_ring_init(rdev, 64 * 1024); > > @@ -1919,6 +1967,7 @@ void cayman_fini(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > + radeon_uvd_fini(rdev); > cayman_pcie_gart_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h > index 079dee2..3731f6c 100644 > --- a/drivers/gpu/drm/radeon/nid.h > +++ b/drivers/gpu/drm/radeon/nid.h > @@ -486,6 +486,15 @@ > # define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0) > > /* > + * UVD > + */ > +#define UVD_SEMA_ADDR_LOW 0xEF00 > +#define UVD_SEMA_ADDR_HIGH 0xEF04 > +#define UVD_SEMA_CMD 0xEF08 > +#define UVD_RBC_RB_RPTR 0xF690 > +#define UVD_RBC_RB_WPTR 0xF694 > + > +/* > * PM4 > */ > #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ > diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c > index 0740db3..ca6117d 100644 > --- a/drivers/gpu/drm/radeon/r600.c > +++ b/drivers/gpu/drm/radeon/r600.c > @@ -2552,6 +2552,185 @@ void r600_dma_fini(struct radeon_device *rdev) > } > > /* > + * UVD > + */ > +int r600_uvd_rbc_start(struct radeon_device *rdev) > +{ > + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + uint64_t rptr_addr; > + uint32_t rb_bufsz, tmp; > + int r; > + > + rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET; > + > + if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) { > + DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n"); > + return -EINVAL; > + } > + > + /* force RBC into idle state */ > + WREG32(UVD_RBC_RB_CNTL, 0x11010101); > + > + /* Set the write pointer delay */ > + WREG32(UVD_RBC_RB_WPTR_CNTL, 0); > + > + /* set the wb address */ > + WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2); > + > + /* programm the 4GB memory segment for rptr and ring buffer */ > + WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) | > + (0x7 << 16) | (0x1 << 31)); > + > + /* Initialize the ring buffer's read and write pointers */ > + WREG32(UVD_RBC_RB_RPTR, 0x0); > + > + ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); > + WREG32(UVD_RBC_RB_WPTR, ring->wptr); > + > + /* set the ring address */ > + WREG32(UVD_RBC_RB_BASE, ring->gpu_addr); > + > + /* Set ring buffer size */ > + rb_bufsz = drm_order(ring->ring_size); > + rb_bufsz = (0x1 << 8) | rb_bufsz; > + WREG32(UVD_RBC_RB_CNTL, rb_bufsz); > + > + ring->ready = true; > + r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); > + if (r) { > + ring->ready = false; > + return r; > + } > + > + r = radeon_ring_lock(rdev, ring, 10); > + if (r) { > + DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); > + return r; > + } > + > + tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, 0xFFFFF); > + > + tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, 0xFFFFF); > + > + tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, 0xFFFFF); > + > + /* Clear timeout status bits */ > + radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); > + radeon_ring_write(ring, 0x8); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); > + radeon_ring_write(ring, 1); > + > + radeon_ring_unlock_commit(rdev, ring); > + > + return 0; > +} > + > +void r600_uvd_rbc_stop(struct radeon_device *rdev) > +{ > + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + > + /* force RBC into idle state */ > + WREG32(UVD_RBC_RB_CNTL, 0x11010101); > + ring->ready = false; > +} > + > +int r600_uvd_init(struct radeon_device *rdev) > +{ > + int i, j, r; > + > + /* disable clock gating */ > + WREG32(UVD_CGC_GATE, 0); > + > + /* disable interupt */ > + WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1)); > + > + /* put LMI, VCPU, RBC etc... into reset */ > + WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET | > + LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET | > + CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET); > + mdelay(5); > + > + /* take UVD block out of reset */ > + WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD); > + mdelay(5); > + > + /* initialize UVD memory controller */ > + WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | > + (1 << 21) | (1 << 9) | (1 << 20)); > + > + /* disable byte swapping */ > + WREG32(UVD_LMI_SWAP_CNTL, 0); > + WREG32(UVD_MP_SWAP_CNTL, 0); > + > + WREG32(UVD_MPC_SET_MUXA0, 0x40c2040); > + WREG32(UVD_MPC_SET_MUXA1, 0x0); > + WREG32(UVD_MPC_SET_MUXB0, 0x40c2040); > + WREG32(UVD_MPC_SET_MUXB1, 0x0); > + WREG32(UVD_MPC_SET_ALU, 0); > + WREG32(UVD_MPC_SET_MUX, 0x88); > + > + /* Stall UMC */ > + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); > + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); > + > + /* take all subblocks out of reset, except VCPU */ > + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); > + mdelay(5); > + > + /* enable VCPU clock */ > + WREG32(UVD_VCPU_CNTL, 1 << 9); > + > + /* enable UMC */ > + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); > + > + /* boot up the VCPU */ > + WREG32(UVD_SOFT_RESET, 0); > + mdelay(10); > + > + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); > + > + for (i = 0; i < 10; ++i) { > + uint32_t status; > + for (j = 0; j < 100; ++j) { > + status = RREG32(UVD_STATUS); > + if (status & 2) > + break; > + mdelay(10); > + } > + r = 0; > + if (status & 2) > + break; > + > + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); > + WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET); > + mdelay(10); > + WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET); > + mdelay(10); > + r = -1; > + } > + if (r) { > + DRM_ERROR("UVD not responding, giving up!!!\n"); > + return r; > + } > + /* enable interupt */ > + WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1)); > + > + r = r600_uvd_rbc_start(rdev); > + if (r) > + return r; > + > + DRM_INFO("UVD initialized successfully.\n"); > + return 0; > +} > + > +/* > * GPU scratch registers helpers function. > */ > void r600_scratch_init(struct radeon_device *rdev) > @@ -2660,6 +2839,40 @@ int r600_dma_ring_test(struct radeon_device *rdev, > return r; > } > > +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) > +{ > + uint32_t tmp = 0; > + unsigned i; > + int r; > + > + WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD); > + r = radeon_ring_lock(rdev, ring, 3); > + if (r) { > + DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", > + ring->idx, r); > + return r; > + } > + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); > + radeon_ring_write(ring, 0xDEADBEEF); > + radeon_ring_unlock_commit(rdev, ring); > + for (i = 0; i < rdev->usec_timeout; i++) { > + tmp = RREG32(UVD_CONTEXT_ID); > + if (tmp == 0xDEADBEEF) > + break; > + DRM_UDELAY(1); > + } > + > + if (i < rdev->usec_timeout) { > + DRM_INFO("ring test on %d succeeded in %d usecs\n", > + ring->idx, i); > + } else { > + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", > + ring->idx, tmp); > + r = -EINVAL; > + } > + return r; > +} > + > /* > * CP fences/semaphores > */ > @@ -2711,6 +2924,30 @@ void r600_fence_ring_emit(struct radeon_device *rdev, > } > } > > +void r600_uvd_fence_emit(struct radeon_device *rdev, > + struct radeon_fence *fence) > +{ > + struct radeon_ring *ring = &rdev->ring[fence->ring]; > + uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr; > + > + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); > + radeon_ring_write(ring, fence->seq); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); > + radeon_ring_write(ring, addr & 0xffffffff); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); > + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); > + radeon_ring_write(ring, 0); > + > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); > + radeon_ring_write(ring, 0); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); > + radeon_ring_write(ring, 0); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); > + radeon_ring_write(ring, 2); > + return; > +} > + > void r600_semaphore_ring_emit(struct radeon_device *rdev, > struct radeon_ring *ring, > struct radeon_semaphore *semaphore, > @@ -2780,6 +3017,23 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, > radeon_ring_write(ring, upper_32_bits(addr) & 0xff); > } > > +void r600_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait) > +{ > + uint64_t addr = semaphore->gpu_addr; > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); > + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); > + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); > + radeon_ring_write(ring, emit_wait ? 1 : 0); > +} > + > int r600_copy_blit(struct radeon_device *rdev, > uint64_t src_offset, > uint64_t dst_offset, > @@ -3183,6 +3437,16 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > radeon_ring_write(ring, ib->length_dw); > } > > +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) > +{ > + struct radeon_ring *ring = &rdev->ring[ib->ring]; > + > + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0)); > + radeon_ring_write(ring, ib->gpu_addr); > + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0)); > + radeon_ring_write(ring, ib->length_dw); > +} > + > int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) > { > struct radeon_ib ib; > @@ -3300,6 +3564,33 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) > return r; > } > > +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) > +{ > + struct radeon_fence *fence; > + int r; > + > + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); > + if (r) { > + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); > + return r; > + } > + > + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence); > + if (r) { > + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); > + return r; > + } > + > + r = radeon_fence_wait(fence, false); > + if (r) { > + DRM_ERROR("radeon: fence wait failed (%d).\n", r); > + return r; > + } > + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); > + radeon_fence_unref(&fence); > + return r; > +} > + > /** > * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine > * > diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h > index a42ba11..441bdb8 100644 > --- a/drivers/gpu/drm/radeon/r600d.h > +++ b/drivers/gpu/drm/radeon/r600d.h > @@ -691,6 +691,7 @@ > #define SRBM_SOFT_RESET 0xe60 > # define SOFT_RESET_DMA (1 << 12) > # define SOFT_RESET_RLC (1 << 13) > +# define SOFT_RESET_UVD (1 << 18) > # define RV770_SOFT_RESET_DMA (1 << 20) > > #define CP_INT_CNTL 0xc124 > @@ -1143,6 +1144,66 @@ > # define AFMT_AZ_AUDIO_ENABLE_CHG_ACK (1 << 30) > > /* > + * UVD > + */ > +#define UVD_SEMA_ADDR_LOW 0xef00 > +#define UVD_SEMA_ADDR_HIGH 0xef04 > +#define UVD_SEMA_CMD 0xef08 > + > +#define UVD_GPCOM_VCPU_CMD 0xef0c > +#define UVD_GPCOM_VCPU_DATA0 0xef10 > +#define UVD_GPCOM_VCPU_DATA1 0xef14 > +#define UVD_ENGINE_CNTL 0xef18 > + > +#define UVD_SEMA_CNTL 0xf400 > +#define UVD_RB_ARB_CTRL 0xf480 > + > +#define UVD_LMI_EXT40_ADDR 0xf498 > +#define UVD_CGC_GATE 0xf4a8 > +#define UVD_LMI_CTRL2 0xf4f4 > +#define UVD_MASTINT_EN 0xf500 > +#define UVD_LMI_ADDR_EXT 0xf594 > +#define UVD_LMI_CTRL 0xf598 > +#define UVD_LMI_SWAP_CNTL 0xf5b4 > +#define UVD_MP_SWAP_CNTL 0xf5bC > +#define UVD_MPC_CNTL 0xf5dC > +#define UVD_MPC_SET_MUXA0 0xf5e4 > +#define UVD_MPC_SET_MUXA1 0xf5e8 > +#define UVD_MPC_SET_MUXB0 0xf5eC > +#define UVD_MPC_SET_MUXB1 0xf5f0 > +#define UVD_MPC_SET_MUX 0xf5f4 > +#define UVD_MPC_SET_ALU 0xf5f8 > + > +#define UVD_VCPU_CNTL 0xf660 > +#define UVD_SOFT_RESET 0xf680 > +#define RBC_SOFT_RESET (1<<0) > +#define LBSI_SOFT_RESET (1<<1) > +#define LMI_SOFT_RESET (1<<2) > +#define VCPU_SOFT_RESET (1<<3) > +#define CSM_SOFT_RESET (1<<5) > +#define CXW_SOFT_RESET (1<<6) > +#define TAP_SOFT_RESET (1<<7) > +#define LMI_UMC_SOFT_RESET (1<<13) > +#define UVD_RBC_IB_BASE 0xf684 > +#define UVD_RBC_IB_SIZE 0xf688 > +#define UVD_RBC_RB_BASE 0xf68c > +#define UVD_RBC_RB_RPTR 0xf690 > +#define UVD_RBC_RB_WPTR 0xf694 > +#define UVD_RBC_RB_WPTR_CNTL 0xf698 > + > +#define UVD_STATUS 0xf6bc > + > +#define UVD_SEMA_TIMEOUT_STATUS 0xf6c0 > +#define UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL 0xf6c4 > +#define UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL 0xf6c8 > +#define UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL 0xf6cc > + > +#define UVD_RBC_RB_CNTL 0xf6a4 > +#define UVD_RBC_RB_RPTR_ADDR 0xf6a8 > + > +#define UVD_CONTEXT_ID 0xf6f4 > + > +/* > * PM4 > */ > #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 8263af3..3f5572d 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -109,24 +109,27 @@ extern int radeon_lockup_timeout; > #define RADEON_BIOS_NUM_SCRATCH 8 > > /* max number of rings */ > -#define RADEON_NUM_RINGS 5 > +#define RADEON_NUM_RINGS 6 > > /* fence seq are set to this number when signaled */ > #define RADEON_FENCE_SIGNALED_SEQ 0LL > > /* internal ring indices */ > /* r1xx+ has gfx CP ring */ > -#define RADEON_RING_TYPE_GFX_INDEX 0 > +#define RADEON_RING_TYPE_GFX_INDEX 0 > > /* cayman has 2 compute CP rings */ > -#define CAYMAN_RING_TYPE_CP1_INDEX 1 > -#define CAYMAN_RING_TYPE_CP2_INDEX 2 > +#define CAYMAN_RING_TYPE_CP1_INDEX 1 > +#define CAYMAN_RING_TYPE_CP2_INDEX 2 > > /* R600+ has an async dma ring */ > #define R600_RING_TYPE_DMA_INDEX 3 > /* cayman add a second async dma ring */ > #define CAYMAN_RING_TYPE_DMA1_INDEX 4 > > +/* R600+ */ > +#define R600_RING_TYPE_UVD_INDEX 5 > + > /* hardcode those limit for now */ > #define RADEON_VA_IB_OFFSET (1 << 20) > #define RADEON_VA_RESERVED_SIZE (8 << 20) > @@ -357,8 +360,9 @@ struct radeon_bo_list { > struct ttm_validate_buffer tv; > struct radeon_bo *bo; > uint64_t gpu_offset; > - unsigned rdomain; > - unsigned wdomain; > + bool written; > + unsigned domain; > + unsigned alt_domain; > u32 tiling_flags; > }; I think that the change to the rdomain/wdomain should be in a patch of its own. I think the change is fine but we had issue with change that touched that part previously, would make bisecting and understanding the change implication easier. > > @@ -826,7 +830,6 @@ struct radeon_cs_reloc { > struct radeon_bo *robj; > struct radeon_bo_list lobj; > uint32_t handle; > - uint32_t flags; > }; Why removing the flags ? iirc it's not really use right now but i remember plan to use it. > > struct radeon_cs_chunk { > @@ -918,6 +921,7 @@ struct radeon_wb { > #define R600_WB_DMA_RPTR_OFFSET 1792 > #define R600_WB_IH_WPTR_OFFSET 2048 > #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 > +#define R600_WB_UVD_RPTR_OFFSET 2560 > #define R600_WB_EVENT_OFFSET 3072 > > /** > @@ -1118,6 +1122,33 @@ struct radeon_pm { > int radeon_pm_get_type_index(struct radeon_device *rdev, > enum radeon_pm_state_type ps_type, > int instance); > +/* > + * UVD > + */ > +#define RADEON_MAX_UVD_HANDLES 10 > +#define RADEON_UVD_STACK_SIZE (1024*1024) > +#define RADEON_UVD_HEAP_SIZE (1024*1024) > + > +struct radeon_uvd { > + struct radeon_bo *vcpu_bo; > + void *cpu_addr; > + uint64_t gpu_addr; > + atomic_t handles[RADEON_MAX_UVD_HANDLES]; > + struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; > +}; > + > +int radeon_uvd_init(struct radeon_device *rdev); > +void radeon_uvd_fini(struct radeon_device *rdev); > +int radeon_uvd_suspend(struct radeon_device *rdev); > +int radeon_uvd_resume(struct radeon_device *rdev); > +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence); > +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence); > +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo); > +void radeon_uvd_free_handles(struct radeon_device *rdev, > + struct drm_file *filp); > +int radeon_uvd_cs_parse(struct radeon_cs_parser *parser); > > struct r600_audio { > int channels; > @@ -1608,6 +1639,7 @@ struct radeon_device { > struct radeon_asic *asic; > struct radeon_gem gem; > struct radeon_pm pm; > + struct radeon_uvd uvd; > uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; > struct radeon_wb wb; > struct radeon_dummy_page dummy_page; > @@ -1621,6 +1653,7 @@ struct radeon_device { > const struct firmware *rlc_fw; /* r6/700 RLC firmware */ > const struct firmware *mc_fw; /* NI MC firmware */ > const struct firmware *ce_fw; /* SI CE firmware */ > + const struct firmware *uvd_fw; /* UVD firmware */ > struct r600_blit r600_blit; > struct r600_vram_scratch vram_scratch; > int msi_enabled; /* msi enabled */ > diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c > index aba0a89..a7a7b2b 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.c > +++ b/drivers/gpu/drm/radeon/radeon_asic.c > @@ -1130,6 +1130,15 @@ static struct radeon_asic rv770_asic = { > .ring_test = &r600_dma_ring_test, > .ib_test = &r600_dma_ib_test, > .is_lockup = &r600_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &r600_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1216,6 +1225,15 @@ static struct radeon_asic evergreen_asic = { > .ring_test = &r600_dma_ring_test, > .ib_test = &r600_dma_ib_test, > .is_lockup = &evergreen_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &r600_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1302,6 +1320,15 @@ static struct radeon_asic sumo_asic = { > .ring_test = &r600_dma_ring_test, > .ib_test = &r600_dma_ib_test, > .is_lockup = &evergreen_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &r600_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1388,6 +1415,15 @@ static struct radeon_asic btc_asic = { > .ring_test = &r600_dma_ring_test, > .ib_test = &r600_dma_ib_test, > .is_lockup = &evergreen_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &r600_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1517,6 +1553,15 @@ static struct radeon_asic cayman_asic = { > .ib_test = &r600_dma_ib_test, > .is_lockup = &cayman_dma_is_lockup, > .vm_flush = &cayman_dma_vm_flush, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &cayman_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1646,6 +1691,15 @@ static struct radeon_asic trinity_asic = { > .ib_test = &r600_dma_ib_test, > .is_lockup = &cayman_dma_is_lockup, > .vm_flush = &cayman_dma_vm_flush, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &cayman_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > @@ -1775,6 +1829,15 @@ static struct radeon_asic si_asic = { > .ib_test = &r600_dma_ib_test, > .is_lockup = &si_dma_is_lockup, > .vm_flush = &si_dma_vm_flush, > + }, > + [R600_RING_TYPE_UVD_INDEX] = { > + .ib_execute = &r600_uvd_ib_execute, > + .emit_fence = &r600_uvd_fence_emit, > + .emit_semaphore = &cayman_uvd_semaphore_emit, > + .cs_parse = &radeon_uvd_cs_parse, > + .ring_test = &r600_uvd_ring_test, > + .ib_test = &r600_uvd_ib_test, > + .is_lockup = &radeon_ring_test_lockup, > } > }, > .irq = { > diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h > index 3535f73..515db96 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.h > +++ b/drivers/gpu/drm/radeon/radeon_asic.h > @@ -330,6 +330,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); > void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); > int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); > int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); > +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); > int r600_copy_blit(struct radeon_device *rdev, > uint64_t src_offset, uint64_t dst_offset, > unsigned num_gpu_pages, struct radeon_fence **fence); > @@ -392,6 +393,19 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev); > u32 r600_get_xclk(struct radeon_device *rdev); > uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); > > +/* uvd */ > +int r600_uvd_init(struct radeon_device *rdev); > +int r600_uvd_rbc_start(struct radeon_device *rdev); > +void r600_uvd_rbc_stop(struct radeon_device *rdev); > +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); > +void r600_uvd_fence_emit(struct radeon_device *rdev, > + struct radeon_fence *fence); > +void r600_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait); > +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); > + > /* > * rv770,rv730,rv710,rv740 > */ > @@ -409,6 +423,7 @@ int rv770_copy_dma(struct radeon_device *rdev, > unsigned num_gpu_pages, > struct radeon_fence **fence); > u32 rv770_get_xclk(struct radeon_device *rdev); > +int rv770_uvd_resume(struct radeon_device *rdev); > > /* > * evergreen > @@ -465,6 +480,10 @@ int evergreen_copy_dma(struct radeon_device *rdev, > */ > void cayman_fence_ring_emit(struct radeon_device *rdev, > struct radeon_fence *fence); > +void cayman_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait); > void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); > int cayman_init(struct radeon_device *rdev); > void cayman_fini(struct radeon_device *rdev); > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index 7d66e01..532ff68 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -75,18 +75,34 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) > p->relocs_ptr[i] = &p->relocs[i]; > p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); > p->relocs[i].lobj.bo = p->relocs[i].robj; > - p->relocs[i].lobj.wdomain = r->write_domain; > - p->relocs[i].lobj.rdomain = r->read_domains; > + p->relocs[i].lobj.written = !!r->write_domain; > + > + /* the first reloc of an UVD job is the > + msg and that must be in VRAM */ > + if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) { > + > + p->relocs[i].lobj.domain = > + RADEON_GEM_DOMAIN_VRAM; > + > + p->relocs[i].lobj.alt_domain = > + RADEON_GEM_DOMAIN_VRAM; > + } else { > + uint32_t domain = r->write_domain ? > + r->write_domain : r->read_domains; > + p->relocs[i].lobj.domain = domain; > + if (domain == RADEON_GEM_DOMAIN_VRAM) > + domain |= RADEON_GEM_DOMAIN_GTT; > + p->relocs[i].lobj.alt_domain = domain; > + } > p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; > p->relocs[i].handle = r->handle; > - p->relocs[i].flags = r->flags; > radeon_bo_list_add_object(&p->relocs[i].lobj, > &p->validated); > > } else > p->relocs[i].handle = 0; > } > - return radeon_bo_list_validate(&p->validated); > + return radeon_bo_list_validate(&p->validated, p->ring); > } > > static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) > @@ -121,6 +137,9 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority > return -EINVAL; > } > break; > + case RADEON_CS_RING_UVD: > + p->ring = R600_RING_TYPE_UVD_INDEX; > + break; > } > return 0; > } > diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c > index 3435625..82fe183 100644 > --- a/drivers/gpu/drm/radeon/radeon_fence.c > +++ b/drivers/gpu/drm/radeon/radeon_fence.c > @@ -31,9 +31,9 @@ > #include <linux/seq_file.h> > #include <linux/atomic.h> > #include <linux/wait.h> > -#include <linux/list.h> > #include <linux/kref.h> > #include <linux/slab.h> > +#include <linux/firmware.h> > #include <drm/drmP.h> > #include "radeon_reg.h" > #include "radeon.h" > @@ -767,8 +767,21 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) > > radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); > if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) { > - rdev->fence_drv[ring].scratch_reg = 0; > - index = R600_WB_EVENT_OFFSET + ring * 4; > + if (ring != R600_RING_TYPE_UVD_INDEX) { > + rdev->fence_drv[ring].scratch_reg = 0; > + index = R600_WB_EVENT_OFFSET + ring * 4; > + rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; > + rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + > + index; > + > + } else { > + /* put fence directly behind firmware */ > + rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + > + rdev->uvd_fw->size; > + rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + > + rdev->uvd_fw->size; > + } > + > } else { > r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); > if (r) { > @@ -778,9 +791,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) > index = RADEON_WB_SCRATCH_OFFSET + > rdev->fence_drv[ring].scratch_reg - > rdev->scratch.reg_base; > + rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; > + rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; > } > - rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; > - rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; > radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); > rdev->fence_drv[ring].initialized = true; > dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", > diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c > index c75cb2c..3019759 100644 > --- a/drivers/gpu/drm/radeon/radeon_kms.c > +++ b/drivers/gpu/drm/radeon/radeon_kms.c > @@ -513,6 +513,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev, > rdev->hyperz_filp = NULL; > if (rdev->cmask_filp == file_priv) > rdev->cmask_filp = NULL; > + radeon_uvd_free_handles(rdev, file_priv); > } > > /* > diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c > index d3aface..0e34446 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.c > +++ b/drivers/gpu/drm/radeon/radeon_object.c > @@ -339,14 +339,14 @@ void radeon_bo_fini(struct radeon_device *rdev) > void radeon_bo_list_add_object(struct radeon_bo_list *lobj, > struct list_head *head) > { > - if (lobj->wdomain) { > + if (lobj->written) { > list_add(&lobj->tv.head, head); > } else { > list_add_tail(&lobj->tv.head, head); > } > } > > -int radeon_bo_list_validate(struct list_head *head) > +int radeon_bo_list_validate(struct list_head *head, int ring) > { > struct radeon_bo_list *lobj; > struct radeon_bo *bo; > @@ -360,15 +360,17 @@ int radeon_bo_list_validate(struct list_head *head) > list_for_each_entry(lobj, head, tv.head) { > bo = lobj->bo; > if (!bo->pin_count) { > - domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain; > + domain = lobj->domain; > > retry: > radeon_ttm_placement_from_domain(bo, domain); > + if (ring == R600_RING_TYPE_UVD_INDEX) > + radeon_uvd_force_into_uvd_segment(bo); > r = ttm_bo_validate(&bo->tbo, &bo->placement, > true, false); > if (unlikely(r)) { > - if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) { > - domain |= RADEON_GEM_DOMAIN_GTT; > + if (r != -ERESTARTSYS && domain != lobj->alt_domain) { > + domain = lobj->alt_domain; > goto retry; > } > return r; > diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h > index 5fc86b0..e2cb80a 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.h > +++ b/drivers/gpu/drm/radeon/radeon_object.h > @@ -128,7 +128,7 @@ extern int radeon_bo_init(struct radeon_device *rdev); > extern void radeon_bo_fini(struct radeon_device *rdev); > extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, > struct list_head *head); > -extern int radeon_bo_list_validate(struct list_head *head); > +extern int radeon_bo_list_validate(struct list_head *head, int ring); > extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, > struct vm_area_struct *vma); > extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, > diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c > index 8d58e26..31e47d8 100644 > --- a/drivers/gpu/drm/radeon/radeon_ring.c > +++ b/drivers/gpu/drm/radeon/radeon_ring.c > @@ -368,7 +368,7 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) > { > u32 rptr; > > - if (rdev->wb.enabled) > + if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX]) > rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); > else > rptr = RREG32(ring->rptr_reg); > @@ -821,18 +821,20 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) > return 0; > } > > -static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX; > -static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX; > -static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; > -static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX; > -static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; > +static int radeon_gfx_index = RADEON_RING_TYPE_GFX_INDEX; > +static int cayman_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX; > +static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; > +static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; > +static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; > +static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; > > static struct drm_info_list radeon_debugfs_ring_info_list[] = { > - {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index}, > - {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index}, > - {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index}, > - {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index}, > - {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index}, > + {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, > + {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index}, > + {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index}, > + {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, > + {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, > + {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, > }; > > static int radeon_debugfs_sa_info(struct seq_file *m, void *data) > diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c > index fda09c9..bbed4af 100644 > --- a/drivers/gpu/drm/radeon/radeon_test.c > +++ b/drivers/gpu/drm/radeon/radeon_test.c > @@ -252,6 +252,36 @@ void radeon_test_moves(struct radeon_device *rdev) > radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT); > } > > +static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_fence **fence) > +{ > + int r; > + > + if (ring->idx == R600_RING_TYPE_UVD_INDEX) { > + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); > + if (r) { > + DRM_ERROR("Failed to get dummy create msg\n"); > + return r; > + } > + > + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence); > + if (r) { > + DRM_ERROR("Failed to get dummy destroy msg\n"); > + return r; > + } > + } else { > + r = radeon_ring_lock(rdev, ring, 64); > + if (r) { > + DRM_ERROR("Failed to lock ring A %d\n", ring->idx); > + return r; > + } > + radeon_fence_emit(rdev, fence, ring->idx); > + radeon_ring_unlock_commit(rdev, ring); > + } > + return 0; > +} > + > void radeon_test_ring_sync(struct radeon_device *rdev, > struct radeon_ring *ringA, > struct radeon_ring *ringB) > @@ -272,21 +302,24 @@ void radeon_test_ring_sync(struct radeon_device *rdev, > goto out_cleanup; > } > radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > - r = radeon_fence_emit(rdev, &fence1, ringA->idx); > - if (r) { > - DRM_ERROR("Failed to emit fence 1\n"); > - radeon_ring_unlock_undo(rdev, ringA); > + radeon_ring_unlock_commit(rdev, ringA); > + > + r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1); > + if (r) > goto out_cleanup; > - } > - radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > - r = radeon_fence_emit(rdev, &fence2, ringA->idx); > + > + r = radeon_ring_lock(rdev, ringA, 64); > if (r) { > - DRM_ERROR("Failed to emit fence 2\n"); > - radeon_ring_unlock_undo(rdev, ringA); > + DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); > goto out_cleanup; > } > + radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > radeon_ring_unlock_commit(rdev, ringA); > > + r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2); > + if (r) > + goto out_cleanup; > + > mdelay(1000); > > if (radeon_fence_signaled(fence1)) { > @@ -364,27 +397,22 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, > goto out_cleanup; > } > radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > - r = radeon_fence_emit(rdev, &fenceA, ringA->idx); > - if (r) { > - DRM_ERROR("Failed to emit sync fence 1\n"); > - radeon_ring_unlock_undo(rdev, ringA); > - goto out_cleanup; > - } > radeon_ring_unlock_commit(rdev, ringA); > > + r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA); > + if (r) > + goto out_cleanup; > + > r = radeon_ring_lock(rdev, ringB, 64); > if (r) { > DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); > goto out_cleanup; > } > radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); > - r = radeon_fence_emit(rdev, &fenceB, ringB->idx); > - if (r) { > - DRM_ERROR("Failed to create sync fence 2\n"); > - radeon_ring_unlock_undo(rdev, ringB); > - goto out_cleanup; > - } > radeon_ring_unlock_commit(rdev, ringB); > + r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB); > + if (r) > + goto out_cleanup; > > mdelay(1000); > > @@ -393,7 +421,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, > goto out_cleanup; > } > if (radeon_fence_signaled(fenceB)) { > - DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); > + DRM_ERROR("Fence B signaled without waiting for semaphore.\n"); > goto out_cleanup; > } > > diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c > new file mode 100644 > index 0000000..8ab7bb9 > --- /dev/null > +++ b/drivers/gpu/drm/radeon/radeon_uvd.c > @@ -0,0 +1,521 @@ > +/* > + * Copyright 2011 Advanced Micro Devices, Inc. > + * All Rights Reserved. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, > + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > + * USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portions > + * of the Software. > + * > + */ > +/* > + * Authors: > + * Christian König <deathsimple@xxxxxxxxxxx> > + */ > + > +#include <linux/firmware.h> > +#include <linux/module.h> > +#include <drm/drmP.h> > +#include <drm/drm.h> > + > +#include "radeon.h" > +#include "r600d.h" > + > +/* Firmware Names */ > +#define FIRMWARE_RV770 "radeon/RV770_uvd.bin" > +#define FIRMWARE_RV710 "radeon/RV710_uvd.bin" > +#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" > +#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" > +#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" > + > +MODULE_FIRMWARE(FIRMWARE_RV770); > +MODULE_FIRMWARE(FIRMWARE_RV710); > +MODULE_FIRMWARE(FIRMWARE_CYPRESS); > +MODULE_FIRMWARE(FIRMWARE_SUMO); > +MODULE_FIRMWARE(FIRMWARE_TAHITI); > + > +int radeon_uvd_init(struct radeon_device *rdev) > +{ > + struct platform_device *pdev; > + unsigned long bo_size; > + const char *fw_name; > + int i, r; > + > + pdev = platform_device_register_simple("radeon_uvd", 0, NULL, 0); > + r = IS_ERR(pdev); > + if (r) { > + dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n"); > + return -EINVAL; > + } > + > + switch (rdev->family) { > + case CHIP_RV770: > + fw_name = FIRMWARE_RV770; > + break; > + > + case CHIP_RV710: > + case CHIP_RV730: > + case CHIP_RV740: > + fw_name = FIRMWARE_RV710; > + break; > + > + case CHIP_CYPRESS: > + case CHIP_JUNIPER: > + case CHIP_REDWOOD: > + case CHIP_CEDAR: > + fw_name = FIRMWARE_CYPRESS; > + break; > + > + case CHIP_SUMO: > + case CHIP_SUMO2: > + case CHIP_PALM: > + case CHIP_CAYMAN: > + case CHIP_BARTS: > + case CHIP_TURKS: > + case CHIP_CAICOS: > + fw_name = FIRMWARE_SUMO; > + break; > + > + case CHIP_TAHITI: > + case CHIP_VERDE: > + case CHIP_PITCAIRN: > + case CHIP_ARUBA: > + fw_name = FIRMWARE_TAHITI; > + break; > + > + default: > + return -EINVAL; > + } > + > + r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev); > + if (r) { > + dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", > + fw_name); > + platform_device_unregister(pdev); > + return r; > + } > + > + platform_device_unregister(pdev); > + > + bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) + > + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; > + r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); > + if (r) { > + dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); > + return r; > + } > + > + r = radeon_uvd_resume(rdev); > + if (r) > + return r; > + > + memset(rdev->uvd.cpu_addr, 0, bo_size); > + memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); > + > + r = radeon_uvd_suspend(rdev); > + if (r) > + return r; > + > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + atomic_set(&rdev->uvd.handles[i], 0); > + rdev->uvd.filp[i] = NULL; > + } > + > + return 0; > +} > + > +void radeon_uvd_fini(struct radeon_device *rdev) > +{ > + radeon_uvd_suspend(rdev); > + radeon_bo_unref(&rdev->uvd.vcpu_bo); > +} > + > +int radeon_uvd_suspend(struct radeon_device *rdev) > +{ > + int r; > + > + if (rdev->uvd.vcpu_bo == NULL) > + return 0; > + > + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); > + if (!r) { > + radeon_bo_kunmap(rdev->uvd.vcpu_bo); > + radeon_bo_unpin(rdev->uvd.vcpu_bo); > + radeon_bo_unreserve(rdev->uvd.vcpu_bo); > + } > + return r; > +} > + > +int radeon_uvd_resume(struct radeon_device *rdev) > +{ > + int r; > + > + if (rdev->uvd.vcpu_bo == NULL) > + return -EINVAL; > + > + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false); > + if (r) { > + radeon_bo_unref(&rdev->uvd.vcpu_bo); > + dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); > + return r; > + } > + > + r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, > + &rdev->uvd.gpu_addr); > + if (r) { > + radeon_bo_unreserve(rdev->uvd.vcpu_bo); > + radeon_bo_unref(&rdev->uvd.vcpu_bo); > + dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); > + return r; > + } > + > + r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); > + if (r) { > + dev_err(rdev->dev, "(%d) UVD map failed\n", r); > + return r; > + } > + > + radeon_bo_unreserve(rdev->uvd.vcpu_bo); > + > + return 0; > +} > + > +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) > +{ > + rbo->placement.fpfn = 0 >> PAGE_SHIFT; > + rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; > +} > + > +void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) > +{ > + int i, r; > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + if (rdev->uvd.filp[i] == filp) { > + uint32_t handle = atomic_read(&rdev->uvd.handles[i]); > + struct radeon_fence *fence; > + > + r = radeon_uvd_get_destroy_msg(rdev, > + R600_RING_TYPE_UVD_INDEX, handle, &fence); > + if (r) { > + DRM_ERROR("Error destroying UVD (%d)!\n", r); > + continue; > + } > + > + radeon_fence_wait(fence, false); > + radeon_fence_unref(&fence); > + > + rdev->uvd.filp[i] = NULL; > + atomic_set(&rdev->uvd.handles[i], 0); > + } > + } > +} > + > +static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *msg) > +{ > + uint32_t *map, msg_type, handle; > + int i, r; > + > + r = radeon_bo_kmap(msg, (void **)&map); > + if (r) > + return r; > + > + msg_type = map[1]; > + handle = map[2]; > + > + radeon_bo_kunmap(msg); > + > + if (handle == 0) { > + DRM_ERROR("Invalid UVD handle!\n"); > + return -EINVAL; > + } > + > + if (msg_type == 2) { > + /* it's a destroy msg, free the handle */ > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) > + atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); > + return 0; > + } > + > + /* create or decode, validate the handle */ > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) > + return 0; > + } > + /* handle not found try to alloc a new one */ > + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { > + p->rdev->uvd.filp[i] = p->filp; > + return 0; > + } > + } > + DRM_ERROR("No more free UVD handles!\n"); > + return -EINVAL; > +} > + > +static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, int data0, int data1) > +{ > + struct radeon_cs_chunk *relocs_chunk; > + struct radeon_cs_reloc *reloc; > + unsigned idx, cmd; > + uint64_t start, end; > + > + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; > + idx = radeon_get_ib_value(p, data1); > + if (idx >= relocs_chunk->length_dw) { > + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", > + idx, relocs_chunk->length_dw); > + return -EINVAL; > + } > + > + reloc = p->relocs_ptr[(idx / 4)]; > + start = reloc->lobj.gpu_offset; > + end = start + radeon_bo_size(reloc->robj); > + start += radeon_get_ib_value(p, data0); I am assuming there is no way for you to know the size that the uvd engine will write to ? You are not checking anything on uvd possibly overwritting after the bo end. > + > + p->ib.ptr[data0] = start & 0xFFFFFFFF; > + p->ib.ptr[data1] = start >> 32; > + > + > + cmd = radeon_get_ib_value(p, p->idx); > + if (cmd == 0) { > + if (end & 0xFFFFFFFFF0000000) { > + DRM_ERROR("msg buffer %LX-%LX out of 256MB segment!\n", > + start, end); > + return -EINVAL; > + } > + > + return radeon_uvd_cs_msg(p, reloc->robj); > + > + } > + > + if ((start & 0xFFFFFFFFF0000000) != (end & 0xFFFFFFFFF0000000)) { > + DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", > + start, end); > + return -EINVAL; > + } > + return 0; > +} > + > +int radeon_uvd_cs_parse(struct radeon_cs_parser *p) > +{ > + struct radeon_cs_packet pkt; > + int i, r, data0 = 0, data1 = 0; > + > + if (p->chunks[p->chunk_ib_idx].length_dw % 16) { > + DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", > + p->chunks[p->chunk_ib_idx].length_dw); > + return -EINVAL; > + } > + > + if (p->chunk_relocs_idx == -1) { > + DRM_ERROR("No relocation chunk !\n"); > + return -EINVAL; > + } > + > + > + do { > + r = radeon_cs_packet_parse(p, &pkt, p->idx); > + if (r) > + return r; > + switch (pkt.type) { > + case RADEON_PACKET_TYPE0: > + p->idx++; > + for (i = 0; i <= pkt.count; ++i) { > + switch (pkt.reg + i*4) { > + case UVD_GPCOM_VCPU_DATA0: > + data0 = p->idx; > + break; > + case UVD_GPCOM_VCPU_DATA1: > + data1 = p->idx; > + break; > + case UVD_GPCOM_VCPU_CMD: > + r = radeon_uvd_cs_reloc(p, data0, > + data1); > + if (r) > + return r; > + break; > + case UVD_ENGINE_CNTL: > + break; > + default: > + DRM_ERROR("Invalid reg 0x%X!\n", > + pkt.reg + i*4); > + return -EINVAL; > + } > + p->idx++; > + } > + break; > + case RADEON_PACKET_TYPE2: > + p->idx += pkt.count + 2; > + break; > + default: > + DRM_ERROR("Unknown packet type %d !\n", pkt.type); > + return -EINVAL; > + } > + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); > + return 0; > +} > + > +static int radeon_uvd_send_msg(struct radeon_device *rdev, > + int ring, struct radeon_bo *bo, > + struct radeon_fence **fence) > +{ > + struct ttm_validate_buffer tv; > + struct list_head head; > + struct radeon_ib ib; > + uint64_t addr; > + int i, r; > + > + memset(&tv, 0, sizeof(tv)); > + tv.bo = &bo->tbo; > + > + INIT_LIST_HEAD(&head); > + list_add(&tv.head, &head); > + > + r = ttm_eu_reserve_buffers(&head); > + if (r) > + return r; > + > + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); > + radeon_uvd_force_into_uvd_segment(bo); > + > + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); > + if (r) { > + ttm_eu_backoff_reservation(&head); > + return r; > + } > + > + r = radeon_ib_get(rdev, ring, &ib, NULL, 16); > + if (r) { > + ttm_eu_backoff_reservation(&head); > + return r; > + } > + > + addr = radeon_bo_gpu_offset(bo); > + ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); > + ib.ptr[1] = addr; > + ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); > + ib.ptr[3] = addr >> 32; > + ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0); > + ib.ptr[5] = 0; > + for (i = 6; i < 16; ++i) > + ib.ptr[i] = PACKET2(0); > + ib.length_dw = 16; > + > + r = radeon_ib_schedule(rdev, &ib, NULL); > + if (r) { > + ttm_eu_backoff_reservation(&head); > + return r; > + } > + ttm_eu_fence_buffer_objects(&head, ib.fence); > + > + if (fence) > + *fence = radeon_fence_ref(ib.fence); > + > + radeon_ib_free(rdev, &ib); > + radeon_bo_unref(&bo); > + return 0; > +} > + > +/* multiple fence commands without any stream commands in between can > + crash the vcpu so just try to emmit a dummy create/destroy msg to > + avoid this */ > +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence) > +{ > + struct radeon_bo *bo; > + uint32_t *msg; > + int r, i; > + > + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); > + if (r) > + return r; > + > + r = radeon_bo_reserve(bo, false); > + if (r) { > + radeon_bo_unref(&bo); > + return r; > + } > + > + r = radeon_bo_kmap(bo, (void **)&msg); > + if (r) { > + radeon_bo_unreserve(bo); > + radeon_bo_unref(&bo); > + return r; > + } > + > + /* stitch together an UVD create msg */ > + msg[0] = 0x00000de4; > + msg[1] = 0x00000000; > + msg[2] = handle; > + msg[3] = 0x00000000; > + msg[4] = 0x00000000; > + msg[5] = 0x00000000; > + msg[6] = 0x00000000; > + msg[7] = 0x00000780; > + msg[8] = 0x00000440; > + msg[9] = 0x00000000; > + msg[10] = 0x01b37000; > + for (i = 11; i < 1024; ++i) > + msg[i] = 0x0; > + > + radeon_bo_kunmap(bo); > + radeon_bo_unreserve(bo); > + > + return radeon_uvd_send_msg(rdev, ring, bo, fence); > +} > + > +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence) > +{ > + struct radeon_bo *bo; > + uint32_t *msg; > + int r, i; > + > + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); > + if (r) > + return r; > + > + r = radeon_bo_reserve(bo, false); > + if (r) { > + radeon_bo_unref(&bo); > + return r; > + } > + > + r = radeon_bo_kmap(bo, (void **)&msg); > + if (r) { > + radeon_bo_unreserve(bo); > + radeon_bo_unref(&bo); > + return r; > + } > + > + /* stitch together an UVD destroy msg */ > + msg[0] = 0x00000de4; > + msg[1] = 0x00000002; > + msg[2] = handle; > + msg[3] = 0x00000000; > + for (i = 4; i < 1024; ++i) > + msg[i] = 0x0; > + > + radeon_bo_kunmap(bo); > + radeon_bo_unreserve(bo); > + > + return radeon_uvd_send_msg(rdev, ring, bo, fence); > +} > diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c > index d63fe1d..5a78cce 100644 > --- a/drivers/gpu/drm/radeon/rv770.c > +++ b/drivers/gpu/drm/radeon/rv770.c > @@ -68,6 +68,107 @@ u32 rv770_get_xclk(struct radeon_device *rdev) > return reference_clock; > } > > +int rv770_uvd_resume(struct radeon_device *rdev) > +{ > + uint64_t addr; > + uint32_t chip_id, size; > + int r; > + > + r = radeon_uvd_resume(rdev); > + if (r) > + return r; > + > + /* programm the VCPU memory controller bits 0-27 */ > + addr = rdev->uvd.gpu_addr >> 3; > + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; > + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); > + WREG32(UVD_VCPU_CACHE_SIZE0, size); > + > + addr += size; > + size = RADEON_UVD_STACK_SIZE >> 3; > + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); > + WREG32(UVD_VCPU_CACHE_SIZE1, size); > + > + addr += size; > + size = RADEON_UVD_HEAP_SIZE >> 3; > + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); > + WREG32(UVD_VCPU_CACHE_SIZE2, size); > + > + /* bits 28-31 */ > + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; > + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); > + > + /* bits 32-39 */ > + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; > + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); > + > + /* tell firmware which hardware it is running on */ > + switch (rdev->family) { > + default: > + return -EINVAL; > + case CHIP_RV770: > + chip_id = 0x01000004; > + break; > + case CHIP_RV710: > + chip_id = 0x01000005; > + break; > + case CHIP_RV730: > + chip_id = 0x01000006; > + break; > + case CHIP_RV740: > + chip_id = 0x01000007; > + break; > + case CHIP_CYPRESS: > + chip_id = 0x01000008; > + break; > + case CHIP_JUNIPER: > + chip_id = 0x01000009; > + break; > + case CHIP_REDWOOD: > + chip_id = 0x0100000a; > + break; > + case CHIP_CEDAR: > + chip_id = 0x0100000b; > + break; > + case CHIP_SUMO: > + chip_id = 0x0100000c; > + break; > + case CHIP_SUMO2: > + chip_id = 0x0100000d; > + break; > + case CHIP_PALM: > + chip_id = 0x0100000e; > + break; > + case CHIP_CAYMAN: > + chip_id = 0x0100000f; > + break; > + case CHIP_BARTS: > + chip_id = 0x01000010; > + break; > + case CHIP_TURKS: > + chip_id = 0x01000011; > + break; > + case CHIP_CAICOS: > + chip_id = 0x01000012; > + break; > + case CHIP_TAHITI: > + chip_id = 0x01000014; > + break; > + case CHIP_VERDE: > + chip_id = 0x01000015; > + break; > + case CHIP_PITCAIRN: > + chip_id = 0x01000016; > + break; > + case CHIP_ARUBA: > + chip_id = 0x01000017; > + break; > + } > + WREG32(UVD_VCPU_CHIP_ID, chip_id); > + > + return 0; > +} > + > u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) > { > struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; > @@ -1040,6 +1141,17 @@ static int rv770_startup(struct radeon_device *rdev) > return r; > } > > + r = rv770_uvd_resume(rdev); > + if (!r) { > + r = radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; > + > /* Enable IRQ */ > r = r600_irq_init(rdev); > if (r) { > @@ -1074,6 +1186,19 @@ static int rv770_startup(struct radeon_device *rdev) > if (r) > return r; > > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r = radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r = r600_uvd_init(rdev); > + > + if (r) > + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); > + } > + > r = radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -1115,6 +1240,7 @@ int rv770_resume(struct radeon_device *rdev) > int rv770_suspend(struct radeon_device *rdev) > { > r600_audio_fini(rdev); > + radeon_uvd_suspend(rdev); > r700_cp_stop(rdev); > r600_dma_stop(rdev); > r600_irq_suspend(rdev); > @@ -1190,6 +1316,13 @@ int rv770_init(struct radeon_device *rdev) > rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL; > r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); > > + r = radeon_uvd_init(rdev); > + if (!r) { > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; > + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], > + 4096); > + } > + > rdev->ih.ring_obj = NULL; > r600_ih_ring_init(rdev, 64 * 1024); > > @@ -1224,6 +1357,7 @@ void rv770_fini(struct radeon_device *rdev) > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > rv770_pcie_gart_fini(rdev); > + radeon_uvd_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > radeon_fence_driver_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h > index c55f950..da158b54 100644 > --- a/drivers/gpu/drm/radeon/rv770d.h > +++ b/drivers/gpu/drm/radeon/rv770d.h > @@ -671,4 +671,18 @@ > # define TARGET_LINK_SPEED_MASK (0xf << 0) > # define SELECTABLE_DEEMPHASIS (1 << 6) > > +/* UVD */ > +#define UVD_LMI_EXT40_ADDR 0xf498 > +#define UVD_VCPU_CHIP_ID 0xf4d4 > +#define UVD_VCPU_CACHE_OFFSET0 0xf4d8 > +#define UVD_VCPU_CACHE_SIZE0 0xf4dc > +#define UVD_VCPU_CACHE_OFFSET1 0xf4e0 > +#define UVD_VCPU_CACHE_SIZE1 0xf4e4 > +#define UVD_VCPU_CACHE_OFFSET2 0xf4e8 > +#define UVD_VCPU_CACHE_SIZE2 0xf4ec > +#define UVD_LMI_ADDR_EXT 0xf594 > + > +#define UVD_RBC_RB_RPTR 0xf690 > +#define UVD_RBC_RB_WPTR 0xf694 > + > #endif > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c > index bafbe32..cc9fe39 100644 > --- a/drivers/gpu/drm/radeon/si.c > +++ b/drivers/gpu/drm/radeon/si.c > @@ -4372,6 +4372,16 @@ static int si_startup(struct radeon_device *rdev) > return r; > } > > + r = rv770_uvd_resume(rdev); > + if (!r) { > + r = radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; > + > /* Enable IRQ */ > r = si_irq_init(rdev); > if (r) { > @@ -4429,6 +4439,18 @@ static int si_startup(struct radeon_device *rdev) > if (r) > return r; > > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r = radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r = r600_uvd_init(rdev); > + if (r) > + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); > + } > + > r = radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -4472,6 +4494,8 @@ int si_suspend(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > si_cp_enable(rdev, false); > cayman_dma_stop(rdev); > + r600_uvd_rbc_stop(rdev); > + radeon_uvd_suspend(rdev); > si_irq_suspend(rdev); > radeon_wb_disable(rdev); > si_pcie_gart_disable(rdev); > @@ -4557,6 +4581,13 @@ int si_init(struct radeon_device *rdev) > ring->ring_obj = NULL; > r600_ring_init(rdev, ring, 64 * 1024); > > + r = radeon_uvd_init(rdev); > + if (!r) { > + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + ring->ring_obj = NULL; > + r600_ring_init(rdev, ring, 4096); > + } > + > rdev->ih.ring_obj = NULL; > r600_ih_ring_init(rdev, 64 * 1024); > > @@ -4605,6 +4636,7 @@ void si_fini(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > + radeon_uvd_fini(rdev); > si_pcie_gart_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h > index 23fc08f..759f682 100644 > --- a/drivers/gpu/drm/radeon/sid.h > +++ b/drivers/gpu/drm/radeon/sid.h > @@ -798,6 +798,12 @@ > # define THREAD_TRACE_FINISH (55 << 0) > > /* > + * UVD > + */ > +#define UVD_RBC_RB_RPTR 0xF690 > +#define UVD_RBC_RB_WPTR 0xF694 > + > +/* > * PM4 > */ > #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ > diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h > index eeda917..cd085d1 100644 > --- a/include/uapi/drm/radeon_drm.h > +++ b/include/uapi/drm/radeon_drm.h > @@ -918,6 +918,7 @@ struct drm_radeon_gem_va { > #define RADEON_CS_RING_GFX 0 > #define RADEON_CS_RING_COMPUTE 1 > #define RADEON_CS_RING_DMA 2 > +#define RADEON_CS_RING_UVD 3 > /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ > /* 0 = normal, + = higher priority, - = lower priority */ > > -- > 1.7.9.5 > Cheers, Jerome _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel