Re: [PATCH v2 2/4] media: mediatek: vcodec: support extend h264 video shared information

Chen-Yu Tsai <wenst@xxxxxxxxxxxx> · Tue, 22 Oct 2024 15:08:00 +0800

On Wed, Oct 16, 2024 at 11:49 AM Yunfei Dong <yunfei.dong@xxxxxxxxxxxx> wrote:
>
> The address end of working buffer can't be calculated directly
> with buffer size in kernel for some special architecture. Adding
> new extend vsi to calculate the address end in other os.
> Refactoring the driver flow to make the driver looks more reasonable.
>
> Signed-off-by: Yunfei Dong <yunfei.dong@xxxxxxxxxxxx>
> ---
>  .../decoder/vdec/vdec_h264_req_multi_if.c     | 355 ++++++++++++------
>  1 file changed, 238 insertions(+), 117 deletions(-)
>
> diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c
> index 57c85af5ffb4..e02ed8dfe0ce 100644
> --- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c
> +++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_h264_req_multi_if.c
> @@ -48,10 +48,29 @@ struct vdec_h264_slice_lat_dec_param {
>  };
>
>  /**
> - * struct vdec_h264_slice_info - decode information
> + * struct vdec_h264_slice_info_ex - extend decode information

Please move these structs around so that the extended ones are
grouped together.

Please also mark every struct that is included in the `vsi` shared
memory struct as "shared interface with firmware" so that they do
not get changed accidentally.

>   *
> + * @wdma_end_addr_offset:      offset from buffer start
>   * @nal_info:          nal info of current picture
>   * @timeout:           Decode timeout: 1 timeout, 0 no timeout
> + * @vdec_fb_va:        VDEC frame buffer struct virtual address
> + * @crc:               Used to check whether hardware's status is right
> + * @reserved:          reserved
> + */
> +struct vdec_h264_slice_info_ex {
> +       u64 wdma_end_addr_offset;
> +       u16 nal_info;
> +       u16 timeout;

There is a 4 byte hole here. Can you make it explicit by adding
an extra reserved field?

> +       u64 vdec_fb_va;
> +       u32 crc[8];
> +       u32 reserved;
> +};
> +
> +/**
> + * struct vdec_h264_slice_info - decode information
> + *
> + * @nal_info:          nal info of current picture
> + * @timeout:           Decode timeout: 1 timeout, 0 no timeount
>   * @bs_buf_size:       bitstream size
>   * @bs_buf_addr:       bitstream buffer dma address
>   * @y_fb_dma:          Y frame buffer dma address
> @@ -70,6 +89,64 @@ struct vdec_h264_slice_info {
>         u32 crc[8];
>  };
>
> +/*
> + * struct vdec_h264_slice_mem - memory address and size
> + */
> +struct vdec_h264_slice_mem {
> +       union {
> +               u64 buf;
> +               u64 dma_addr;
> +       };
> +       union {
> +               size_t size;
> +               u64 dma_addr_end;
> +       };
> +};
> +
> +/**
> + * struct vdec_h264_slice_fb - frame buffer for decoding
> + *
> + * @y:  current y buffer address info
> + * @c:  current c buffer address info
> + */
> +struct vdec_h264_slice_fb {
> +       struct vdec_h264_slice_mem y;
> +       struct vdec_h264_slice_mem c;
> +};
> +
> +/**
> + * struct vdec_h264_slice_vsi_ex - extend shared memory for decode information exchange
> + *        between SCP and Host.
> + *
> + * @bs:                input buffer info
> + * @fb:                current y/c buffer
> + *
> + * @ube:               ube buffer
> + * @trans:             transcoded buffer
> + * @row_info:          row info buffer
> + * @err_map:           err map buffer
> + * @slice_bc:          slice buffer
> + *
> + * @mv_buf_dma:                HW working motion vector buffer
> + * @dec:               decode information (AP-R, VPU-W)
> + * @h264_slice_params: decode parameters for hw used
> + */
> +struct vdec_h264_slice_vsi_ex {
> +       /* LAT dec addr */
> +       struct vdec_h264_slice_mem bs;
> +       struct vdec_h264_slice_fb fb;
> +
> +       struct vdec_h264_slice_mem ube;
> +       struct vdec_h264_slice_mem trans;
> +       struct vdec_h264_slice_mem row_info;
> +       struct vdec_h264_slice_mem err_map;
> +       struct vdec_h264_slice_mem slice_bc;
> +
> +       struct vdec_h264_slice_mem mv_buf_dma[H264_MAX_MV_NUM];
> +       struct vdec_h264_slice_info_ex dec;
> +       struct vdec_h264_slice_lat_dec_param h264_slice_params;
> +};
> +
>  /**
>   * struct vdec_h264_slice_vsi - shared memory for decode information exchange
>   *        between SCP and Host.
> @@ -136,10 +213,10 @@ struct vdec_h264_slice_share_info {
>   * @pred_buf:          HW working prediction buffer
>   * @mv_buf:            HW working motion vector buffer
>   * @vpu:               VPU instance
> - * @vsi:               vsi used for lat
> - * @vsi_core:          vsi used for core
> + * @vsi_ex:            extend vsi used for lat
> + * @vsi_core_ex:       extend vsi used for core
>   *
> - * @vsi_ctx:           Local VSI data for this decoding context
> + * @vsi_ctx_ex:        Local extend vsi data for this decoding context
>   * @h264_slice_param:  the parameters that hardware use to decode
>   *
>   * @resolution_changed:resolution changed
> @@ -156,10 +233,19 @@ struct vdec_h264_slice_inst {
>         struct mtk_vcodec_mem pred_buf;
>         struct mtk_vcodec_mem mv_buf[H264_MAX_MV_NUM];
>         struct vdec_vpu_inst vpu;
> -       struct vdec_h264_slice_vsi *vsi;
> -       struct vdec_h264_slice_vsi *vsi_core;
> -
> -       struct vdec_h264_slice_vsi vsi_ctx;
> +       union {
> +               struct vdec_h264_slice_vsi_ex *vsi_ex;
> +               struct vdec_h264_slice_vsi *vsi;
> +       };
> +       union {
> +               struct vdec_h264_slice_vsi_ex *vsi_core_ex;
> +               struct vdec_h264_slice_vsi *vsi_core;
> +       };
> +
> +       union {
> +               struct vdec_h264_slice_vsi_ex vsi_ctx_ex;
> +               struct vdec_h264_slice_vsi vsi_ctx;
> +       };

Code wise I think it would be better to have a union of structs of structs:

    union {
        struct {
            struct vdec_h264_slice_vsi *vsi;
            struct vdec_h264_slice_vsi *vsi_core;
            struct vdec_h264_slice_vsi vsi_ctx;
        };
        struct {
            struct vdec_h264_slice_vsi_ex *vsi_ex;
            struct vdec_h264_slice_vsi_ex *vsi_core_ex;
            struct vdec_h264_slice_vsi_ex vsi_ctx_ex;
        };
    };

This makes it clear that the *_ex variants are used together.
The code compiles down to the same layout.

>         struct vdec_h264_slice_lat_dec_param h264_slice_param;
>
>         unsigned int resolution_changed;
> @@ -389,6 +475,98 @@ static void vdec_h264_slice_get_crop_info(struct vdec_h264_slice_inst *inst,
>                        cr->left, cr->top, cr->width, cr->height);
>  }
>
> +static void vdec_h264_slice_setup_lat_buffer(struct vdec_h264_slice_inst *inst,

Please add the "_ex" suffix to mark this function as used for the extended
architecture.

> +                                            struct mtk_vcodec_mem *bs,
> +                                            struct vdec_lat_buf *lat_buf)
> +{
> +       struct mtk_vcodec_mem *mem;
> +       int i;
> +
> +       inst->vsi_ex->bs.dma_addr = (u64)bs->dma_addr;
> +       inst->vsi_ex->bs.size = bs->size;
> +
> +       for (i = 0; i < H264_MAX_MV_NUM; i++) {
> +               mem = &inst->mv_buf[i];
> +               inst->vsi_ex->mv_buf_dma[i].dma_addr = mem->dma_addr;
> +               inst->vsi_ex->mv_buf_dma[i].size = mem->size;
> +       }
> +       inst->vsi_ex->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
> +       inst->vsi_ex->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
> +
> +       inst->vsi_ex->row_info.dma_addr = 0;
> +       inst->vsi_ex->row_info.size = 0;
> +
> +       inst->vsi_ex->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
> +       inst->vsi_ex->err_map.size = lat_buf->wdma_err_addr.size;
> +
> +       inst->vsi_ex->slice_bc.dma_addr = lat_buf->slice_bc_addr.dma_addr;
> +       inst->vsi_ex->slice_bc.size = lat_buf->slice_bc_addr.size;
> +
> +       inst->vsi_ex->trans.dma_addr_end = inst->ctx->msg_queue.wdma_rptr_addr;
> +       inst->vsi_ex->trans.dma_addr = inst->ctx->msg_queue.wdma_wptr_addr;
> +}
> +
> +static int vdec_h264_slice_setup_core_buffer(struct vdec_h264_slice_inst *inst,
> +                                            struct vdec_h264_slice_share_info *share_info,
> +                                            struct vdec_lat_buf *lat_buf)

Same here.

> +{
> +       struct mtk_vcodec_mem *mem;
> +       struct mtk_vcodec_dec_ctx *ctx = inst->ctx;
> +       struct vb2_v4l2_buffer *vb2_v4l2;
> +       struct vdec_fb *fb;
> +       u64 y_fb_dma, c_fb_dma = 0;
> +       int i;
> +
> +       fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
> +       if (!fb) {
> +               mtk_vdec_err(ctx, "fb buffer is NULL");
> +               return -EBUSY;
> +       }
> +
> +       y_fb_dma = (u64)fb->base_y.dma_addr;
> +       if (ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes == 1)
> +               c_fb_dma =
> +                       y_fb_dma + inst->ctx->picinfo.buf_w * inst->ctx->picinfo.buf_h;
> +       else
> +               c_fb_dma = (u64)fb->base_c.dma_addr;
> +
> +       mtk_vdec_debug(ctx, "[h264-core] y/c addr = 0x%llx 0x%llx", y_fb_dma, c_fb_dma);
> +
> +       inst->vsi_core_ex->fb.y.dma_addr = y_fb_dma;
> +       inst->vsi_core_ex->fb.y.size = ctx->picinfo.fb_sz[0];
> +       inst->vsi_core_ex->fb.c.dma_addr = c_fb_dma;
> +       inst->vsi_core_ex->fb.c.size = ctx->picinfo.fb_sz[1];
> +
> +       inst->vsi_core_ex->dec.vdec_fb_va = (unsigned long)fb;
> +       inst->vsi_core_ex->dec.nal_info = share_info->nal_info;
> +
> +       inst->vsi_core_ex->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
> +       inst->vsi_core_ex->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
> +
> +       inst->vsi_core_ex->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
> +       inst->vsi_core_ex->err_map.size = lat_buf->wdma_err_addr.size;
> +
> +       inst->vsi_core_ex->slice_bc.dma_addr = lat_buf->slice_bc_addr.dma_addr;
> +       inst->vsi_core_ex->slice_bc.size = lat_buf->slice_bc_addr.size;
> +
> +       inst->vsi_core_ex->row_info.dma_addr = 0;
> +       inst->vsi_core_ex->row_info.size = 0;
> +
> +       inst->vsi_core_ex->trans.dma_addr = share_info->trans_start;
> +       inst->vsi_core_ex->trans.dma_addr_end = share_info->trans_end;
> +
> +       for (i = 0; i < H264_MAX_MV_NUM; i++) {
> +               mem = &inst->mv_buf[i];
> +               inst->vsi_core_ex->mv_buf_dma[i].dma_addr = mem->dma_addr;
> +               inst->vsi_core_ex->mv_buf_dma[i].size = mem->size;
> +       }
> +
> +       vb2_v4l2 = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
> +       v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, vb2_v4l2, true);
> +
> +       return 0;
> +}
> +
>  static int vdec_h264_slice_init(struct mtk_vcodec_dec_ctx *ctx)
>  {
>         struct vdec_h264_slice_inst *inst;
> @@ -412,10 +590,10 @@ static int vdec_h264_slice_init(struct mtk_vcodec_dec_ctx *ctx)
>                 goto error_free_inst;
>         }
>
> -       vsi_size = round_up(sizeof(struct vdec_h264_slice_vsi), VCODEC_DEC_ALIGNED_64);
> -       inst->vsi = inst->vpu.vsi;
> -       inst->vsi_core =
> -               (struct vdec_h264_slice_vsi *)(((char *)inst->vpu.vsi) + vsi_size);
> +       vsi_size = round_up(sizeof(struct vdec_h264_slice_vsi_ex), VCODEC_DEC_ALIGNED_64);
> +       inst->vsi_ex = inst->vpu.vsi;
> +       inst->vsi_core_ex =
> +               (struct vdec_h264_slice_vsi_ex *)(((char *)inst->vpu.vsi) + vsi_size);

Changing this here without any feature checking will break existing
platforms because the vsi_core pointer now points to the wrong address.

You can't change the existing code path in a non backwards compatible
way in one patch then fix it back up in the next patch. It has to be
done at the same time.

In other words, existing platforms _must_ continue to work throughout
your patch series, i.e. with only patches 1 & 2 applied, MT8192 / MT8195
should continue to work properly.

Looking at this patch more, it seems that it is better to squash patches
2 & 3 together. The two patches are working on the same thing: adding
support for the extended architecture. Having the changes together in
one single patch makes more sense.

ChenYu