Re: [PATCH 1/5] media: mediatek: vcodec: Add encoder driver support for 34-bit iova

Irui Wang <irui.wang@xxxxxxxxxxxx> · Mon, 18 Jul 2022 19:31:50 +0800

Dear Angelo,

Thanks for your reviewing.

On Mon, 2022-07-18 at 11:51 +0200, AngeloGioacchino Del Regno wrote:
> Il 16/07/22 11:38, Irui Wang ha scritto:
> > Encoder driver got iova from IOMMU is 34-bit, for example:
> > 
> > Here is the sample code:
> > encoder input frame buffer dma address is:
> > frm_buf =
> >      vb2_dma_contig_plane_dma_addr(&vb2_v4l2_buffer->vb2_buf, 0);
> > the value of frm_buf is 0x1_ff30_0000.
> > 
> > encoder driver got the frm_buf and send the iova to SCP firmware
> > through SCP IPI message, then write to encoder hardware in SCP.
> > The iova is stored in IPI message as uint32_t data type, so the
> > value will be truncated from *0x1_ff30_0000* to *0xff30_0000*,
> > and then *0xff30_0000* will be written to encoder hardware, but
> > IOMMU will help to add the high *0x1_* bit back, so IOMMU can
> > translate the iova to PA correctly, encoder hardware can access
> > the correct memory for encoding.
> > Another reason to do this is the encoder hardware can't access
> > the 34-bit iova, IOMMU will help to add the remaining high bits
> > of iova. But for mt8188, encoder hardware can access 34-bit iova
> > directly, and encoder driver need write all 34-bit iova because
> > IOMMU can't help driver do this if the hardware support access
> > 34-bit iova.
> > For the reasons above, this patch is added to support transfer
> > 34-bit iova between kernel and SCP encoder driver. Use uint64_t
> > data type to store the iova, for compatibility with old chipsets,
> > add some new struct definitions for 34-bit.
> > 
> > Signed-off-by: Irui Wang <irui.wang@xxxxxxxxxxxx>
> > ---
> >   .../platform/mediatek/vcodec/mtk_vcodec_drv.h |   3 +
> >   .../mediatek/vcodec/venc/venc_h264_if.c       | 200
> > +++++++++++++++---
> >   .../platform/mediatek/vcodec/venc_ipi_msg.h   |  24 +++
> >   .../platform/mediatek/vcodec/venc_vpu_if.c    |  34 ++-
> >   4 files changed, 227 insertions(+), 34 deletions(-)
> > 
> > diff --git
> > a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h
> > b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h
> > index ef4584a46417..ab80e1b1979e 100644
> > --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h
> > +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h
> > @@ -401,6 +401,7 @@ struct mtk_vcodec_dec_pdata {
> >    * @output_formats: array of supported output formats
> >    * @num_output_formats: number of entries in output_formats
> >    * @core_id: stand for h264 or vp8 encode index
> > + * @is_34bit: whether the encoder uses 34bit iova
> >    */
> >   struct mtk_vcodec_enc_pdata {
> >   	bool uses_ext;
> > @@ -411,9 +412,11 @@ struct mtk_vcodec_enc_pdata {
> >   	const struct mtk_video_fmt *output_formats;
> >   	size_t num_output_formats;
> >   	int core_id;
> > +	bool is_34bit;
> >   };
> >   
> >   #define MTK_ENC_CTX_IS_EXT(ctx) ((ctx)->dev->venc_pdata-
> > >uses_ext)
> > +#define MTK_ENC_IOVA_IS_34BIT(ctx) ((ctx)->dev->venc_pdata-
> > >is_34bit)
> >   
> >   /**
> >    * struct mtk_vcodec_dev - driver data
> > diff --git
> > a/drivers/media/platform/mediatek/vcodec/venc/venc_h264_if.c
> > b/drivers/media/platform/mediatek/vcodec/venc/venc_h264_if.c
> > index 4d9b8798dffe..3a5af6cca040 100644
> > --- a/drivers/media/platform/mediatek/vcodec/venc/venc_h264_if.c
> > +++ b/drivers/media/platform/mediatek/vcodec/venc/venc_h264_if.c
> > @@ -127,6 +127,72 @@ struct venc_h264_vsi {
> >   	struct venc_h264_vpu_buf work_bufs[VENC_H264_VPU_WORK_BUF_MAX];
> >   };
> >   
> > +/**
> > + * struct venc_h264_vpu_config_ext - Structure for h264 encoder
> > configuration
> > + *                                   AP-W/R : AP is writer/reader
> > on this item
> > + *                                   VPU-W/R: VPU is write/reader
> > on this item
> > + * @input_fourcc: input fourcc
> > + * @bitrate: target bitrate (in bps)
> > + * @pic_w: picture width. Picture size is visible stream
> > resolution, in pixels,
> > + *         to be used for display purposes; must be smaller or
> > equal to buffer
> > + *         size.
> > + * @pic_h: picture height
> > + * @buf_w: buffer width. Buffer size is stream resolution in
> > pixels aligned to
> > + *         hardware requirements.
> > + * @buf_h: buffer height
> > + * @gop_size: group of picture size (idr frame)
> > + * @intra_period: intra frame period
> > + * @framerate: frame rate in fps
> > + * @profile: as specified in standard
> > + * @level: as specified in standard
> > + * @wfd: WFD mode 1:on, 0:off
> > + * @max_qp: max quant parameter
> > + * @min_qp: min quant parameter
> > + * @reserved: reserved configs
> > + */
> > +struct venc_h264_vpu_config_ext {
> > +	u32 input_fourcc;
> > +	u32 bitrate;
> > +	u32 pic_w;
> > +	u32 pic_h;
> > +	u32 buf_w;
> > +	u32 buf_h;
> > +	u32 gop_size;
> > +	u32 intra_period;
> > +	u32 framerate;
> > +	u32 profile;
> > +	u32 level;
> > +	u32 wfd;
> > +	u32 max_qp;
> > +	u32 min_qp;
> > +	u32 reserved[8];
> > +};
> > +
> > +/**
> > + * struct venc_h264_vpu_buf_34 - Structure for 34 bit buffer
> > information
> > + *                               AP-W/R : AP is writer/reader on
> > this item
> > + *                               VPU-W/R: VPU is write/reader on
> > this item
> > + * @iova: 34 bit IO virtual address
> > + * @vpua: VPU side memory addr which is used by RC_CODE
> > + * @size: buffer size (in bytes)
> > + */
> > +struct venc_h264_vpu_buf_34 {
> > +	u64 iova;
> > +	u32 vpua;
> > +	u32 size;
> > +};
> > +
> > +/**
> > + * struct venc_h264_vsi_64 - Structure for VPU driver control and
> > info share
> 
> Typo here --------------  ^^^^
Fix it in next version.
> 
> > + *                           Used for 64 bit iova sharing
> > + * @config: h264 encoder configuration
> > + * @work_bufs: working buffer information in VPU side
> > + */
> > +struct venc_h264_vsi_34 {
> > +	struct venc_h264_vpu_config_ext config;
> > +	struct venc_h264_vpu_buf_34
> > work_bufs[VENC_H264_VPU_WORK_BUF_MAX];
> > +};
> > +
> >   /*
> >    * struct venc_h264_inst - h264 encoder AP driver instance
> >    * @hw_base: h264 encoder hardware register base
> > @@ -140,6 +206,8 @@ struct venc_h264_vsi {
> 
> ..snip..
> 
> > diff --git a/drivers/media/platform/mediatek/vcodec/venc_vpu_if.c
> > b/drivers/media/platform/mediatek/vcodec/venc_vpu_if.c
> > index d3570c4c177d..25c1b13559c9 100644
> > --- a/drivers/media/platform/mediatek/vcodec/venc_vpu_if.c
> > +++ b/drivers/media/platform/mediatek/vcodec/venc_vpu_if.c
> > @@ -228,17 +228,28 @@ int vpu_enc_encode(struct venc_vpu_inst *vpu,
> > unsigned int bs_mode,
> >   		   struct venc_frame_info *frame_info)
> >   {
> 
> That's practically 75% flow differences (or more, actually)... and
> there's going
> to always be a useless memzero, because a platform will always use
> either 34, or 32
> bits code.
> 
> At this point I think that for both performance and readability
> purposes, you
> should simply create another function.
> 
> Perhaps something like
> 
> static int vpu_enc_encode_32bits(struct venc_vpu_inst *vpu, unsigned
> int bs_mode,
> 				 struct venc_frm_buf *frm_buf,
> 				 struct mtk_vcodec_mem *bs_buf,
> 				 struct venc_frame_info *frame_info)
> {
> 	..... function .....
> }
> 
> static int vpu_enc_encode_34bits(struct venc_vpu_inst *vpu, unsigned
> int bs_mode,
> 
> 				 struct venc_frm_buf *frm_buf,
> 
> 				 struct mtk_vcodec_mem *bs_buf,
> 
> 				 struct venc_frame_info *frame_info)
> 
> {
> 	...... function ......
> }
Will add such function in next version, thank you.

Thanks
Best Regards
> 
> int vpu_enc_encode(struct venc_vpu_inst *vpu, unsigned int bs_mode,
> 
> 		   struct venc_frm_buf *frm_buf,
> 
> 		   struct mtk_vcodec_mem *bs_buf,
> 
> 		   struct venc_frame_info *frame_info)
> 
> {
> 	int ret;
> 
> 	if (MTK_ENC_IOVA_IS_34BIT(vpu->ctx))
> 		ret = vpu_enc_encode_34bits(......);
> 	else
> 		ret = vpu_enc_encode_32bits(....);
> 
> 	if (ret)
> 		return ret;
> 
> 	mtk_vcodec_debug(vpu, "bs_mode %d state %d size %d key_frm %d
> <-",
> 
> 			 bs_mode, vpu->state, vpu->bs_size, vpu-
> >is_key_frm);
> 
> 	return 0;
> }
> 
> 
> >   	const bool is_ext = MTK_ENC_CTX_IS_EXT(vpu->ctx);
> > +	const bool is_34bit = MTK_ENC_IOVA_IS_34BIT(vpu->ctx);
> > +
> >   	size_t msg_size = is_ext ?
> >   		sizeof(struct venc_ap_ipi_msg_enc_ext) :
> >   		sizeof(struct venc_ap_ipi_msg_enc);
> > +	int status;
> >   	struct venc_ap_ipi_msg_enc_ext out;
> > +	struct venc_ap_ipi_msg_enc_ext_34 out_34;
> >   
> >   	mtk_vcodec_debug(vpu, "bs_mode %d ->", bs_mode);
> >   
> >   	memset(&out, 0, sizeof(out));
> > +	memset(&out_34, 0, sizeof(out_34));
> > +
> >   	out.base.msg_id = AP_IPIMSG_ENC_ENCODE;
> >   	out.base.vpu_inst_addr = vpu->inst_addr;
> >   	out.base.bs_mode = bs_mode;
> > +
> > +	out_34.msg_id = AP_IPIMSG_ENC_ENCODE;
> > +	out_34.vpu_inst_addr = vpu->inst_addr;
> > +	out_34.bs_mode = bs_mode;
> > +
> >   	if (frm_buf) {
> >   		if ((frm_buf->fb_addr[0].dma_addr % 16 == 0) &&
> >   		    (frm_buf->fb_addr[1].dma_addr % 16 == 0) &&
> > @@ -246,6 +257,10 @@ int vpu_enc_encode(struct venc_vpu_inst *vpu,
> > unsigned int bs_mode,
> >   			out.base.input_addr[0] = frm_buf-
> > >fb_addr[0].dma_addr;
> >   			out.base.input_addr[1] = frm_buf-
> > >fb_addr[1].dma_addr;
> >   			out.base.input_addr[2] = frm_buf-
> > >fb_addr[2].dma_addr;
> > +
> > +			out_34.input_addr[0] = frm_buf-
> > >fb_addr[0].dma_addr;
> > +			out_34.input_addr[1] = frm_buf-
> > >fb_addr[1].dma_addr;
> > +			out_34.input_addr[2] = frm_buf-
> > >fb_addr[2].dma_addr;
> >   		} else {
> >   			mtk_vcodec_err(vpu, "dma_addr not align to
> > 16");
> >   			return -EINVAL;
> > @@ -254,14 +269,31 @@ int vpu_enc_encode(struct venc_vpu_inst *vpu,
> > unsigned int bs_mode,
> >   	if (bs_buf) {
> >   		out.base.bs_addr = bs_buf->dma_addr;
> >   		out.base.bs_size = bs_buf->size;
> > +
> > +		out_34.bs_addr = bs_buf->dma_addr;
> > +		out_34.bs_size = bs_buf->size;
> >   	}
> > +
> >   	if (is_ext && frame_info) {
> >   		out.data_item = 3;
> >   		out.data[0] = frame_info->frm_count;
> >   		out.data[1] = frame_info->skip_frm_count;
> >   		out.data[2] = frame_info->frm_type;
> > +
> > +		out_34.data_item = 3;
> > +		out_34.data[0] = frame_info->frm_count;
> > +		out_34.data[1] = frame_info->skip_frm_count;
> > +		out_34.data[2] = frame_info->frm_type;
> >   	}
> > -	if (vpu_enc_send_msg(vpu, &out, msg_size)) {
> > +
> > +	if (is_34bit) {
> > +		msg_size = sizeof(struct venc_ap_ipi_msg_enc_ext_34);
> > +		status = vpu_enc_send_msg(vpu, &out_34, msg_size);
> > +	} else {
> > +		status = vpu_enc_send_msg(vpu, &out, msg_size);
> > +	}
> > +
> > +	if (status) {
> >   		mtk_vcodec_err(vpu, "AP_IPIMSG_ENC_ENCODE %d fail",
> >   			       bs_mode);
> >   		return -EINVAL;
> 
>