Hi nicolas, Thanks for your suggestion. On Fri, 2022-01-28 at 16:49 -0500, Nicolas Dufresne wrote: > Hi Yunfei, > > thanks for you work, see comments below... > > Le samedi 22 janvier 2022 à 11:53 +0800, Yunfei Dong a écrit : > > For lat and core decode in parallel, need to get capture buffer > > when core start to decode and put capture buffer to display > > list when core decode done. > > > > Signed-off-by: Yunfei Dong <yunfei.dong@xxxxxxxxxxxx> > > --- > > .../mtk-vcodec/mtk_vcodec_dec_stateless.c | 121 ++++++++++++ > > ------ > > .../platform/mtk-vcodec/mtk_vcodec_drv.h | 5 +- > > .../mtk-vcodec/vdec/vdec_h264_req_if.c | 16 ++- > > 3 files changed, 102 insertions(+), 40 deletions(-) > > > > diff --git a/drivers/media/platform/mtk- > > vcodec/mtk_vcodec_dec_stateless.c b/drivers/media/platform/mtk- > > vcodec/mtk_vcodec_dec_stateless.c > > index 23a154c4e321..6d481410bf89 100644 > > --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateless.c > > +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateless.c > > @@ -108,37 +108,87 @@ static const struct mtk_codec_framesizes > > mtk_vdec_framesizes[] = { > > > > #define NUM_SUPPORTED_FRAMESIZE ARRAY_SIZE(mtk_vdec_framesizes) > > > > -static void mtk_vdec_stateless_set_dst_payload(struct > > mtk_vcodec_ctx *ctx, > > - struct vdec_fb *fb) > > +static void mtk_vdec_stateless_out_to_done(struct mtk_vcodec_ctx > > *ctx, > > + struct mtk_vcodec_mem *bs, > > int error) > > { > > - struct mtk_video_dec_buf *vdec_frame_buf = > > - container_of(fb, struct mtk_video_dec_buf, > > frame_buffer); > > - struct vb2_v4l2_buffer *vb = &vdec_frame_buf->m2m_buf.vb; > > - unsigned int cap_y_size = ctx- > > >q_data[MTK_Q_DATA_DST].sizeimage[0]; > > + struct mtk_video_dec_buf *out_buf; > > + struct vb2_v4l2_buffer *vb; > > > > - vb2_set_plane_payload(&vb->vb2_buf, 0, cap_y_size); > > - if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) { > > - unsigned int cap_c_size = > > - ctx->q_data[MTK_Q_DATA_DST].sizeimage[1]; > > + if (!bs) { > > + mtk_v4l2_err("Free bitstream buffer fail."); > > + return; > > + } > > + out_buf = container_of(bs, struct mtk_video_dec_buf, > > bs_buffer); > > + vb = &out_buf->m2m_buf.vb; > > > > - vb2_set_plane_payload(&vb->vb2_buf, 1, cap_c_size); > > + mtk_v4l2_debug(2, "Free bitsteam buffer id = %d to done_list", > > + vb->vb2_buf.index); > > + > > + v4l2_m2m_src_buf_remove(ctx->m2m_ctx); > > + if (error) { > > + v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR); > > + if (error == -EIO) > > + out_buf->error = true; > > + } else { > > + v4l2_m2m_buf_done(vb, VB2_BUF_STATE_DONE); > > } > > } > > > > -static struct vdec_fb *vdec_get_cap_buffer(struct mtk_vcodec_ctx > > *ctx, > > - struct vb2_v4l2_buffer > > *vb2_v4l2) > > +static void mtk_vdec_stateless_cap_to_disp(struct mtk_vcodec_ctx > > *ctx, > > + struct vdec_fb *fb, int > > error) > > { > > - struct mtk_video_dec_buf *framebuf = > > - container_of(vb2_v4l2, struct mtk_video_dec_buf, > > m2m_buf.vb); > > - struct vdec_fb *pfb = &framebuf->frame_buffer; > > - struct vb2_buffer *dst_buf = &vb2_v4l2->vb2_buf; > > + struct mtk_video_dec_buf *vdec_frame_buf; > > + struct vb2_v4l2_buffer *vb; > > + unsigned int cap_y_size, cap_c_size; > > + > > + if (!fb) { > > + mtk_v4l2_err("Free frame buffer fail."); > > + return; > > + } > > + vdec_frame_buf = container_of(fb, struct mtk_video_dec_buf, > > + frame_buffer); > > + vb = &vdec_frame_buf->m2m_buf.vb; > > + > > + cap_y_size = ctx->q_data[MTK_Q_DATA_DST].sizeimage[0]; > > + cap_c_size = ctx->q_data[MTK_Q_DATA_DST].sizeimage[1]; > > + > > + v4l2_m2m_dst_buf_remove(ctx->m2m_ctx); > > > > - pfb->base_y.va = NULL; > > + vb2_set_plane_payload(&vb->vb2_buf, 0, cap_y_size); > > + if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) > > + vb2_set_plane_payload(&vb->vb2_buf, 1, cap_c_size); > > + > > + mtk_v4l2_debug(2, "Free frame buffer id = %d to done_list", > > + vb->vb2_buf.index); > > + if (error) > > + v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR); > > + else > > + v4l2_m2m_buf_done(vb, VB2_BUF_STATE_DONE); > > +} > > + > > +static struct vdec_fb *vdec_get_cap_buffer(struct mtk_vcodec_ctx > > *ctx) > > +{ > > + struct mtk_video_dec_buf *framebuf; > > + struct vb2_v4l2_buffer *vb2_v4l2; > > + struct vb2_buffer *dst_buf; > > + struct vdec_fb *pfb; > > + > > + vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); > > + if (!vb2_v4l2) { > > + mtk_v4l2_debug(1, "[%d] dst_buf empty!!", ctx->id); > > + return NULL; > > + } > > + > > + dst_buf = &vb2_v4l2->vb2_buf; > > + framebuf = container_of(vb2_v4l2, struct mtk_video_dec_buf, > > m2m_buf.vb); > > + > > + pfb = &framebuf->frame_buffer; > > + pfb->base_y.va = vb2_plane_vaddr(dst_buf, 0); > > pfb->base_y.dma_addr = vb2_dma_contig_plane_dma_addr(dst_buf, > > 0); > > pfb->base_y.size = ctx->q_data[MTK_Q_DATA_DST].sizeimage[0]; > > > > if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 2) { > > - pfb->base_c.va = NULL; > > + pfb->base_c.va = vb2_plane_vaddr(dst_buf, 1); > > pfb->base_c.dma_addr = > > vb2_dma_contig_plane_dma_addr(dst_buf, 1); > > pfb->base_c.size = ctx- > > >q_data[MTK_Q_DATA_DST].sizeimage[1]; > > @@ -162,12 +212,11 @@ static void mtk_vdec_worker(struct > > work_struct *work) > > struct mtk_vcodec_ctx *ctx = > > container_of(work, struct mtk_vcodec_ctx, decode_work); > > struct mtk_vcodec_dev *dev = ctx->dev; > > - struct vb2_v4l2_buffer *vb2_v4l2_src, *vb2_v4l2_dst; > > + struct vb2_v4l2_buffer *vb2_v4l2_src; > > struct vb2_buffer *vb2_src; > > struct mtk_vcodec_mem *bs_src; > > struct mtk_video_dec_buf *dec_buf_src; > > struct media_request *src_buf_req; > > - struct vdec_fb *dst_buf; > > bool res_chg = false; > > int ret; > > > > @@ -178,13 +227,6 @@ static void mtk_vdec_worker(struct work_struct > > *work) > > return; > > } > > > > - vb2_v4l2_dst = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); > > - if (!vb2_v4l2_dst) { > > - v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); > > - mtk_v4l2_debug(1, "[%d] no available destination > > buffer", ctx->id); > > - return; > > - } > > - > > vb2_src = &vb2_v4l2_src->vb2_buf; > > dec_buf_src = container_of(vb2_v4l2_src, struct > > mtk_video_dec_buf, > > m2m_buf.vb); > > @@ -193,9 +235,15 @@ static void mtk_vdec_worker(struct work_struct > > *work) > > mtk_v4l2_debug(3, "[%d] (%d) id=%d, vb=%p", ctx->id, > > vb2_src->vb2_queue->type, vb2_src->index, > > vb2_src); > > > > - bs_src->va = NULL; > > + bs_src->va = vb2_plane_vaddr(vb2_src, 0); > > bs_src->dma_addr = vb2_dma_contig_plane_dma_addr(vb2_src, 0); > > bs_src->size = (size_t)vb2_src->planes[0].bytesused; > > + if (!bs_src->va) { > > + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); > > + mtk_v4l2_err("[%d] id=%d source buffer is NULL", ctx- > > >id, > > + vb2_src->index); > > + return; > > + } > > > > mtk_v4l2_debug(3, "[%d] Bitstream VA=%p DMA=%pad Size=%zx > > vb=%p", > > ctx->id, bs_src->va, &bs_src->dma_addr, bs_src- > > >size, vb2_src); > > @@ -206,9 +254,7 @@ static void mtk_vdec_worker(struct work_struct > > *work) > > else > > mtk_v4l2_err("vb2 buffer media request is NULL"); > > > > - dst_buf = vdec_get_cap_buffer(ctx, vb2_v4l2_dst); > > - v4l2_m2m_buf_copy_metadata(vb2_v4l2_src, vb2_v4l2_dst, true); > > Please keep using this helper, it is specially crafted to ease > maintenance. Move this function to each codec inside, for lat and core decode in parallel, can't use this function directly in lat workqueue. > > > - ret = vdec_if_decode(ctx, bs_src, dst_buf, &res_chg); > > + ret = vdec_if_decode(ctx, bs_src, NULL, &res_chg); > > if (ret) { > > mtk_v4l2_err(" <===[%d], src_buf[%d] sz=0x%zx pts=%llu > > vdec_if_decode() ret=%d res_chg=%d===>", > > ctx->id, vb2_src->index, bs_src->size, > > @@ -220,12 +266,9 @@ static void mtk_vdec_worker(struct work_struct > > *work) > > } > > } > > > > - mtk_vdec_stateless_set_dst_payload(ctx, dst_buf); > > - > > - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev_dec, ctx- > > >m2m_ctx, > > - ret ? VB2_BUF_STATE_ERROR : > > VB2_BUF_STATE_DONE); > > - > > + mtk_vdec_stateless_out_to_done(ctx, bs_src, ret); > > v4l2_m2m_buf_done_and_job_finish() was specially crafted to prevent > developer > from implementing the signalling of the request at the wrong moment. > This patch > broke this strict ordering. The relevant comment in the helper > function: > > As we discussed in chat, please help to check whether it's possible to let lat and core decode in parallel. I will continue to fix h264 issue. Thanks for your help. Best Regards, Yunfei Dong > /* > * If the request API is being used, returning the OUTPUT > * (src) buffer will wake-up any process waiting on the > * request file descriptor. > * > * Therefore, return the CAPTURE (dst) buffer first, > * to avoid signalling the request file descriptor > * before the CAPTURE buffer is done. > */ > > In short, as request signalling is bound to the src buffer, with this > change you > signal the request too early, which may lead userland to think it can > DQ the > capture buffer. I see exactly that happening when running with > GStreamer, which > strictly rely on the request polling. Please keep using > v4l2_m2m_buf_done_and_job_finish(), and move it into > mtk_vdec_stateless_cap_to_disp(). > > > > v4l2_ctrl_request_complete(src_buf_req, &ctx->ctrl_hdl); > > + v4l2_m2m_job_finish(dev->m2m_dev_dec, ctx->m2m_ctx); > > } > > > > static void vb2ops_vdec_stateless_buf_queue(struct vb2_buffer *vb) > > @@ -358,6 +401,8 @@ const struct mtk_vcodec_dec_pdata > > mtk_vdec_8183_pdata = { > > .uses_stateless_api = true, > > .worker = mtk_vdec_worker, > > .flush_decoder = mtk_vdec_flush_decoder, > > + .cap_to_disp = mtk_vdec_stateless_cap_to_disp, > > + .get_cap_buffer = vdec_get_cap_buffer, > > .is_subdev_supported = false, > > .hw_arch = MTK_VDEC_PURE_SINGLE_CORE, > > }; > > @@ -376,6 +421,8 @@ const struct mtk_vcodec_dec_pdata > > mtk_lat_sig_core_pdata = { > > .uses_stateless_api = true, > > .worker = mtk_vdec_worker, > > .flush_decoder = mtk_vdec_flush_decoder, > > + .cap_to_disp = mtk_vdec_stateless_cap_to_disp, > > + .get_cap_buffer = vdec_get_cap_buffer, > > .is_subdev_supported = true, > > .hw_arch = MTK_VDEC_LAT_SINGLE_CORE, > > }; > > diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h > > b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h > > index 2d1d878692ca..e0b7d2fda632 100644 > > --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h > > +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h > > @@ -353,7 +353,8 @@ enum mtk_vdec_hw_arch { > > * @ctrls_setup: init vcodec dec ctrls > > * @worker: worker to start a decode job > > * @flush_decoder: function that flushes the decoder > > - * > > + * @get_cap_buffer: get capture buffer from capture queue > > + * @cap_to_disp: put capture buffer to disp list > > * @vdec_vb2_ops: struct vb2_ops > > * > > * @vdec_formats: supported video decoder formats > > @@ -375,6 +376,8 @@ struct mtk_vcodec_dec_pdata { > > int (*ctrls_setup)(struct mtk_vcodec_ctx *ctx); > > void (*worker)(struct work_struct *work); > > int (*flush_decoder)(struct mtk_vcodec_ctx *ctx); > > + struct vdec_fb *(*get_cap_buffer)(struct mtk_vcodec_ctx *ctx); > > + void (*cap_to_disp)(struct mtk_vcodec_ctx *ctx, struct vdec_fb > > *fb, int error); > > > > struct vb2_ops *vdec_vb2_ops; > > > > diff --git a/drivers/media/platform/mtk- > > vcodec/vdec/vdec_h264_req_if.c b/drivers/media/platform/mtk- > > vcodec/vdec/vdec_h264_req_if.c > > index 43542de11e9c..36f3dc1fbe3b 100644 > > --- a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_req_if.c > > +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_req_if.c > > @@ -670,32 +670,42 @@ static void vdec_h264_slice_deinit(void > > *h_vdec) > > } > > > > static int vdec_h264_slice_decode(void *h_vdec, struct > > mtk_vcodec_mem *bs, > > - struct vdec_fb *fb, bool *res_chg) > > + struct vdec_fb *unused, bool > > *res_chg) > > { > > struct vdec_h264_slice_inst *inst = h_vdec; > > const struct v4l2_ctrl_h264_decode_params *dec_params = > > get_ctrl_ptr(inst->ctx, > > V4L2_CID_STATELESS_H264_DECODE_PARAMS); > > struct vdec_vpu_inst *vpu = &inst->vpu; > > + struct mtk_video_dec_buf *src_buf_info; > > + struct mtk_video_dec_buf *dst_buf_info; > > + struct vdec_fb *fb; > > u32 data[2]; > > u64 y_fb_dma; > > u64 c_fb_dma; > > int err; > > > > + inst->num_nalu++; > > /* bs NULL means flush decoder */ > > if (!bs) > > return vpu_dec_reset(vpu); > > > > + fb = inst->ctx->dev->vdec_pdata->get_cap_buffer(inst->ctx); > > + src_buf_info = container_of(bs, struct mtk_video_dec_buf, > > bs_buffer); > > + dst_buf_info = container_of(fb, struct mtk_video_dec_buf, > > frame_buffer); > > + > > y_fb_dma = fb ? (u64)fb->base_y.dma_addr : 0; > > c_fb_dma = fb ? (u64)fb->base_c.dma_addr : 0; > > > > mtk_vcodec_debug(inst, "+ [%d] FB y_dma=%llx c_dma=%llx va=%p", > > - ++inst->num_nalu, y_fb_dma, c_fb_dma, fb); > > + inst->num_nalu, y_fb_dma, c_fb_dma, fb); > > > > inst->vsi_ctx.dec.bs_dma = (uint64_t)bs->dma_addr; > > inst->vsi_ctx.dec.y_fb_dma = y_fb_dma; > > inst->vsi_ctx.dec.c_fb_dma = c_fb_dma; > > inst->vsi_ctx.dec.vdec_fb_va = (u64)(uintptr_t)fb; > > > > + v4l2_m2m_buf_copy_metadata(&src_buf_info->m2m_buf.vb, > > + &dst_buf_info->m2m_buf.vb, true); > > get_vdec_decode_parameters(inst); > > data[0] = bs->size; > > /* > > @@ -734,6 +744,8 @@ static int vdec_h264_slice_decode(void *h_vdec, > > struct mtk_vcodec_mem *bs, > > > > memcpy(&inst->vsi_ctx, inst->vpu.vsi, sizeof(inst->vsi_ctx)); > > mtk_vcodec_debug(inst, "\n - NALU[%d]", inst->num_nalu); > > + > > + inst->ctx->dev->vdec_pdata->cap_to_disp(inst->ctx, fb, 0); > > return 0; > > > > err_free_fb_out: > >