Hi Benjamin, On Thu, 2 Dec 2021 at 09:08, Benjamin Gaignard <benjamin.gaignard@xxxxxxxxxxxxx> wrote: > > > Le 02/12/2021 à 12:11, Ezequiel Garcia a écrit : > > Hi Benjamin, > > > > Almost there :-) > > > > On Thu, 2 Dec 2021 at 07:55, Benjamin Gaignard > > <benjamin.gaignard@xxxxxxxxxxxxx> wrote: > >> Use the postprocessor functions introduced by Hantro G2/VP9 codec series > >> and remove duplicated buffer management. > >> This allow Hantro G12/HEVC to produce NV12_4L4 and NV12. > >> > > s/G12/G2 > > > >> fluster score is 77/147 which equal to previous score. > > s/fluster/Fluster > > > > Can you clarify if this is the HEVC score? While here, can you run VP9 > > to confirm there are no regressions? > > I haven't VP9 GStreamer decoder element ready so I can't do that now. > That said the patch doesn't touch any VP9 code. > VP9 GStreamer is merged, and building with the monorepo is really really easy now. The VP9 driver changes have been sent for merge, see https://www.spinics.net/lists/linux-media/msg202448.html which includes the git repository and tag you can merge/rebase to test it. Please rebase on that and make sure we don't have VP9 regressions and update the commit description. This might sound annoying, but it is what it is until we have CI in place. Thanks! Ezequiel > Benjamin > > >> Note that since NV12_4L2 is the first pixel format enumerated by the > >> driver fluster pipeline and it is well converted into I420 by > >> GStreamer's videoconvert. > >> > >> Beauty, Jockey and ShakeNDry bitstreams from UVG set have also been > >> tested. > >> > > Can you add a link to the UVG test suite, so it's clearer? > > > > Also, this patch is v2, so don't forget to prefix the next one as v3, > > and add a changelog to it. > > > > Thanks a lot, > > Ezequiel > > > >> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@xxxxxxxxxxxxx> > >> --- > >> .../staging/media/hantro/hantro_g2_hevc_dec.c | 25 +++--- > >> drivers/staging/media/hantro/hantro_hevc.c | 79 +++---------------- > >> drivers/staging/media/hantro/hantro_hw.h | 11 +++ > >> .../staging/media/hantro/hantro_postproc.c | 3 + > >> drivers/staging/media/hantro/hantro_v4l2.c | 19 ++--- > >> 5 files changed, 41 insertions(+), 96 deletions(-) > >> > >> diff --git a/drivers/staging/media/hantro/hantro_g2_hevc_dec.c b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c > >> index b35f36109a6f..6ef5430b18eb 100644 > >> --- a/drivers/staging/media/hantro/hantro_g2_hevc_dec.c > >> +++ b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c > >> @@ -341,6 +341,8 @@ static int set_ref(struct hantro_ctx *ctx) > >> const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb; > >> dma_addr_t luma_addr, chroma_addr, mv_addr = 0; > >> struct hantro_dev *vpu = ctx->dev; > >> + struct vb2_v4l2_buffer *vb2_dst; > >> + struct hantro_decoded_buffer *dst; > >> size_t cr_offset = hantro_hevc_chroma_offset(sps); > >> size_t mv_offset = hantro_hevc_motion_vectors_offset(sps); > >> u32 max_ref_frames; > >> @@ -426,10 +428,15 @@ static int set_ref(struct hantro_ctx *ctx) > >> hantro_write_addr(vpu, G2_REF_MV_ADDR(i), mv_addr); > >> } > >> > >> - luma_addr = hantro_hevc_get_ref_buf(ctx, decode_params->pic_order_cnt_val); > >> + vb2_dst = hantro_get_dst_buf(ctx); > >> + dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf); > >> + luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf); > >> if (!luma_addr) > >> return -ENOMEM; > >> > >> + if (hantro_hevc_add_ref_buf(ctx, decode_params->pic_order_cnt_val, luma_addr)) > >> + return -EINVAL; > >> + > >> chroma_addr = luma_addr + cr_offset; > >> mv_addr = luma_addr + mv_offset; > >> > >> @@ -456,16 +463,12 @@ static int set_ref(struct hantro_ctx *ctx) > >> > >> static void set_buffers(struct hantro_ctx *ctx) > >> { > >> - struct vb2_v4l2_buffer *src_buf, *dst_buf; > >> + struct vb2_v4l2_buffer *src_buf; > >> struct hantro_dev *vpu = ctx->dev; > >> - const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; > >> - const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; > >> - size_t cr_offset = hantro_hevc_chroma_offset(sps); > >> - dma_addr_t src_dma, dst_dma; > >> + dma_addr_t src_dma; > >> u32 src_len, src_buf_len; > >> > >> src_buf = hantro_get_src_buf(ctx); > >> - dst_buf = hantro_get_dst_buf(ctx); > >> > >> /* Source (stream) buffer. */ > >> src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); > >> @@ -478,11 +481,6 @@ static void set_buffers(struct hantro_ctx *ctx) > >> hantro_reg_write(vpu, &g2_strm_start_offset, 0); > >> hantro_reg_write(vpu, &g2_write_mvs_e, 1); > >> > >> - /* Destination (decoded frame) buffer. */ > >> - dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf); > >> - > >> - hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma); > >> - hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma + cr_offset); > >> hantro_write_addr(vpu, G2_TILE_SIZES_ADDR, ctx->hevc_dec.tile_sizes.dma); > >> hantro_write_addr(vpu, G2_TILE_FILTER_ADDR, ctx->hevc_dec.tile_filter.dma); > >> hantro_write_addr(vpu, G2_TILE_SAO_ADDR, ctx->hevc_dec.tile_sao.dma); > >> @@ -575,9 +573,6 @@ int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx) > >> /* Don't compress buffers */ > >> hantro_reg_write(vpu, &g2_ref_compress_bypass, 1); > >> > >> - /* use NV12 as output format */ > >> - hantro_reg_write(vpu, &g2_out_rs_e, 1); > >> - > >> /* Bus width and max burst */ > >> hantro_reg_write(vpu, &g2_buswidth, BUS_WIDTH_128); > >> hantro_reg_write(vpu, &g2_max_burst, 16); > >> diff --git a/drivers/staging/media/hantro/hantro_hevc.c b/drivers/staging/media/hantro/hantro_hevc.c > >> index ee03123e7704..b49a41d7ae91 100644 > >> --- a/drivers/staging/media/hantro/hantro_hevc.c > >> +++ b/drivers/staging/media/hantro/hantro_hevc.c > >> @@ -44,47 +44,6 @@ size_t hantro_hevc_motion_vectors_offset(const struct v4l2_ctrl_hevc_sps *sps) > >> return ALIGN((cr_offset * 3) / 2, G2_ALIGN); > >> } > >> > >> -static size_t hantro_hevc_mv_size(const struct v4l2_ctrl_hevc_sps *sps) > >> -{ > >> - u32 min_cb_log2_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3; > >> - u32 ctb_log2_size_y = min_cb_log2_size_y + sps->log2_diff_max_min_luma_coding_block_size; > >> - u32 pic_width_in_ctbs_y = (sps->pic_width_in_luma_samples + (1 << ctb_log2_size_y) - 1) > >> - >> ctb_log2_size_y; > >> - u32 pic_height_in_ctbs_y = (sps->pic_height_in_luma_samples + (1 << ctb_log2_size_y) - 1) > >> - >> ctb_log2_size_y; > >> - size_t mv_size; > >> - > >> - mv_size = pic_width_in_ctbs_y * pic_height_in_ctbs_y * > >> - (1 << (2 * (ctb_log2_size_y - 4))) * 16; > >> - > >> - vpu_debug(4, "%dx%d (CTBs) %zu MV bytes\n", > >> - pic_width_in_ctbs_y, pic_height_in_ctbs_y, mv_size); > >> - > >> - return mv_size; > >> -} > >> - > >> -static size_t hantro_hevc_ref_size(struct hantro_ctx *ctx) > >> -{ > >> - const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls; > >> - const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps; > >> - > >> - return hantro_hevc_motion_vectors_offset(sps) + hantro_hevc_mv_size(sps); > >> -} > >> - > >> -static void hantro_hevc_ref_free(struct hantro_ctx *ctx) > >> -{ > >> - struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; > >> - struct hantro_dev *vpu = ctx->dev; > >> - int i; > >> - > >> - for (i = 0; i < NUM_REF_PICTURES; i++) { > >> - if (hevc_dec->ref_bufs[i].cpu) > >> - dma_free_coherent(vpu->dev, hevc_dec->ref_bufs[i].size, > >> - hevc_dec->ref_bufs[i].cpu, > >> - hevc_dec->ref_bufs[i].dma); > >> - } > >> -} > >> - > >> static void hantro_hevc_ref_init(struct hantro_ctx *ctx) > >> { > >> struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; > >> @@ -108,37 +67,25 @@ dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, > >> } > >> } > >> > >> - /* Allocate a new reference buffer */ > >> + return 0; > >> +} > >> + > >> +int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr) > >> +{ > >> + struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; > >> + int i; > >> + > >> + /* Add a new reference buffer */ > >> for (i = 0; i < NUM_REF_PICTURES; i++) { > >> if (hevc_dec->ref_bufs_poc[i] == UNUSED_REF) { > >> - if (!hevc_dec->ref_bufs[i].cpu) { > >> - struct hantro_dev *vpu = ctx->dev; > >> - > >> - /* > >> - * Allocate the space needed for the raw data + > >> - * motion vector data. Optimizations could be to > >> - * allocate raw data in non coherent memory and only > >> - * clear the motion vector data. > >> - */ > >> - hevc_dec->ref_bufs[i].cpu = > >> - dma_alloc_coherent(vpu->dev, > >> - hantro_hevc_ref_size(ctx), > >> - &hevc_dec->ref_bufs[i].dma, > >> - GFP_KERNEL); > >> - if (!hevc_dec->ref_bufs[i].cpu) > >> - return 0; > >> - > >> - hevc_dec->ref_bufs[i].size = hantro_hevc_ref_size(ctx); > >> - } > >> hevc_dec->ref_bufs_used |= 1 << i; > >> - memset(hevc_dec->ref_bufs[i].cpu, 0, hantro_hevc_ref_size(ctx)); > >> hevc_dec->ref_bufs_poc[i] = poc; > >> - > >> - return hevc_dec->ref_bufs[i].dma; > >> + hevc_dec->ref_bufs[i].dma = addr; > >> + return 0; > >> } > >> } > >> > >> - return 0; > >> + return -EINVAL; > >> } > >> > >> void hantro_hevc_ref_remove_unused(struct hantro_ctx *ctx) > >> @@ -314,8 +261,6 @@ void hantro_hevc_dec_exit(struct hantro_ctx *ctx) > >> hevc_dec->tile_bsd.cpu, > >> hevc_dec->tile_bsd.dma); > >> hevc_dec->tile_bsd.cpu = NULL; > >> - > >> - hantro_hevc_ref_free(ctx); > >> } > >> > >> int hantro_hevc_dec_init(struct hantro_ctx *ctx) > >> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h > >> index dbe51303724b..7286404c32ab 100644 > >> --- a/drivers/staging/media/hantro/hantro_hw.h > >> +++ b/drivers/staging/media/hantro/hantro_hw.h > >> @@ -345,6 +345,7 @@ void hantro_hevc_dec_exit(struct hantro_ctx *ctx); > >> int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx); > >> int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx); > >> dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, int poc); > >> +int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr); > >> void hantro_hevc_ref_remove_unused(struct hantro_ctx *ctx); > >> size_t hantro_hevc_chroma_offset(const struct v4l2_ctrl_hevc_sps *sps); > >> size_t hantro_hevc_motion_vectors_offset(const struct v4l2_ctrl_hevc_sps *sps); > >> @@ -394,6 +395,16 @@ hantro_h264_mv_size(unsigned int width, unsigned int height) > >> return 64 * MB_WIDTH(width) * MB_WIDTH(height) + 32; > >> } > >> > >> +static inline size_t > >> +hantro_hevc_mv_size(unsigned int width, unsigned int height) > >> +{ > >> + /* > >> + * A CTB can be 64x64, 32x32 or 16x16. > >> + * Allocated memory for the "worse" case: 16x16 > >> + */ > >> + return width * height / 16; > >> +} > >> + > >> int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx); > >> int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx); > >> void hantro_mpeg2_dec_copy_qtable(u8 *qtable, > >> diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/staging/media/hantro/hantro_postproc.c > >> index a7774ad4c445..248abe5423f0 100644 > >> --- a/drivers/staging/media/hantro/hantro_postproc.c > >> +++ b/drivers/staging/media/hantro/hantro_postproc.c > >> @@ -146,6 +146,9 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx) > >> else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME) > >> buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width, > >> ctx->dst_fmt.height); > >> + else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE) > >> + buf_size += hantro_hevc_mv_size(ctx->dst_fmt.width, > >> + ctx->dst_fmt.height); > >> > >> for (i = 0; i < num_buffers; ++i) { > >> struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i]; > >> diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c > >> index e4b0645ba6fc..e1fe37afe576 100644 > >> --- a/drivers/staging/media/hantro/hantro_v4l2.c > >> +++ b/drivers/staging/media/hantro/hantro_v4l2.c > >> @@ -150,20 +150,6 @@ static int vidioc_enum_fmt(struct file *file, void *priv, > >> unsigned int num_fmts, i, j = 0; > >> bool skip_mode_none; > >> > >> - /* > >> - * The HEVC decoder on the G2 core needs a little quirk to offer NV12 > >> - * only on the capture side. Once the post-processor logic is used, > >> - * we will be able to expose NV12_4L4 and NV12 as the other cases, > >> - * and therefore remove this quirk. > >> - */ > >> - if (capture && ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE) { > >> - if (f->index == 0) { > >> - f->pixelformat = V4L2_PIX_FMT_NV12; > >> - return 0; > >> - } > >> - return -EINVAL; > >> - } > >> - > >> /* > >> * When dealing with an encoder: > >> * - on the capture side we want to filter out all MODE_NONE formats. > >> @@ -304,6 +290,11 @@ static int hantro_try_fmt(const struct hantro_ctx *ctx, > >> pix_mp->plane_fmt[0].sizeimage += > >> hantro_vp9_mv_size(pix_mp->width, > >> pix_mp->height); > >> + else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE && > >> + !hantro_needs_postproc(ctx, fmt)) > >> + pix_mp->plane_fmt[0].sizeimage += > >> + hantro_hevc_mv_size(pix_mp->width, > >> + pix_mp->height); > >> } else if (!pix_mp->plane_fmt[0].sizeimage) { > >> /* > >> * For coded formats the application can specify > >> -- > >> 2.30.2 > >>