Hi mingjia, Thanks for your patch. Reviewed-by: Yunfei Dong <yunfei.dong@xxxxxxxxxxxx> On Wed, 2022-07-27 at 14:13 +0800, Mingjia Zhang wrote: > In order to reduce decoder latency, enable VP9 inner racing mode. > Send lat trans buffer information to core when trigger lat to work, > need not to wait until lat decode done. > > Signed-off-by: mingjia zhang <mingjia.zhang@xxxxxxxxxxxx> > --- > 1. CTS/GTS test pass > 2. Fluster result: Ran 240/303 tests successfully > --- > .../vcodec/vdec/vdec_vp9_req_lat_if.c | 64 ++++++++++++----- > -- > 1 file changed, 40 insertions(+), 24 deletions(-) > > diff --git > a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c > b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c > index fb1c36a3592d..92b47f0fdf40 100644 > --- > a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c > +++ > b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c > @@ -436,6 +436,7 @@ struct vdec_vp9_slice_ref { > * @frame_ctx: 4 frame context according to VP9 Spec > * @frame_ctx_helper: 4 frame context according to newest > kernel spec > * @dirty: state of each frame context > + * @local_vsi: local instance vsi information > * @init_vsi: vsi used for initialized VP9 instance > * @vsi: vsi used for decoding/flush ... > * @core_vsi: vsi used for Core stage > @@ -482,6 +483,8 @@ struct vdec_vp9_slice_instance { > struct v4l2_vp9_frame_context frame_ctx_helper; > unsigned char dirty[4]; > > + struct vdec_vp9_slice_vsi local_vsi; > + > /* MicroP vsi */ > union { > struct vdec_vp9_slice_init_vsi *init_vsi; > @@ -1616,16 +1619,10 @@ static int > vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance > } > > static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance > *instance, > - struct vdec_lat_buf *lat_buf, > - struct vdec_vp9_slice_pfc *pfc) > + struct vdec_vp9_slice_vsi *vsi) > { > - struct vdec_vp9_slice_vsi *vsi; > - > - vsi = &pfc->vsi; > - memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state)); > - > mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n", > - pfc->seq, vsi->state.crc[0], > + (instance->seq - 1), vsi->state.crc[0], > (unsigned long)vsi->trans.dma_addr, > (unsigned long)vsi->trans.dma_addr_end); > > @@ -2090,6 +2087,13 @@ static int vdec_vp9_slice_lat_decode(void > *h_vdec, struct mtk_vcodec_mem *bs, > return ret; > } > > + if (IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability)) { > + vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0); > + memcpy(&instance->local_vsi, vsi, sizeof(*vsi)); > + vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, > lat_buf); > + vsi = &instance->local_vsi; > + } > + > if (instance->irq) { > ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_ > RECEIVED, > WAIT_INTR_TIMEOUT_MS > , MTK_VDEC_LAT0); > @@ -2102,22 +2106,25 @@ static int vdec_vp9_slice_lat_decode(void > *h_vdec, struct mtk_vcodec_mem *bs, > } > > vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0); > - ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc); > + ret = vdec_vp9_slice_update_lat(instance, vsi); > > - /* LAT trans full, no more UBE or decode timeout */ > - if (ret) { > - mtk_vcodec_err(instance, "VP9 decode error: %d\n", > ret); > - return ret; > - } > + if (!IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability)) > + /* LAT trans full, no more UBE or decode timeout */ > + if (ret) { > + mtk_vcodec_err(instance, "frame[%d] decode > error: %d\n", > + ret, (instance->seq - 1)); > + return ret; > + } > > - mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n", > - (unsigned long)pfc->vsi.trans.dma_addr, > - (unsigned long)pfc->vsi.trans.dma_addr_end); > > - vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, > - vsi->trans.dma_addr_end + > - ctx- > >msg_queue.wdma_addr.dma_addr); > - vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf); > + vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr; > + vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi- > >trans.dma_addr_end); > + if (!IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability)) > + vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, > lat_buf); > + > + mtk_vcodec_debug(instance, "lat trans end addr(0x%lx), ube > start addr(0x%lx)\n", > + (unsigned long)vsi->trans.dma_addr_end, > + (unsigned long)ctx- > >msg_queue.wdma_addr.dma_addr); > > return 0; > } > @@ -2193,10 +2200,14 @@ static int vdec_vp9_slice_core_decode(struct > vdec_lat_buf *lat_buf) > goto err; > } > > - pfc->vsi.trans.dma_addr_end += ctx- > >msg_queue.wdma_addr.dma_addr; > mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n", > (unsigned long)pfc->vsi.trans.dma_addr_end); > - vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc- > >vsi.trans.dma_addr_end); > + > + if (IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability)) > + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc- > >vsi.trans.dma_addr); > + else > + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc- > >vsi.trans.dma_addr_end); > + > ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf- > >src_buf_req); > > return 0; > @@ -2204,7 +2215,12 @@ static int vdec_vp9_slice_core_decode(struct > vdec_lat_buf *lat_buf) > err: > if (ctx && pfc) { > /* always update read pointer */ > - vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc- > >vsi.trans.dma_addr_end); > + if (IS_VDEC_INNER_RACING(instance->ctx->dev- > >dec_capability)) > + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, > + pfc- > >vsi.trans.dma_addr); > + else > + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, > + pfc- > >vsi.trans.dma_addr_end); > > if (fb) > ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, > lat_buf->src_buf_req);