Re: [PATCH 1/5] media: mediatek: vcodec: Add encoder driver support for 34-bit iova

AngeloGioacchino Del Regno <angelogioacchino.delregno@xxxxxxxxxxxxx> · Mon, 18 Jul 2022 11:51:19 +0200

Il 16/07/22 11:38, Irui Wang ha scritto:
Encoder driver got iova from IOMMU is 34-bit, for example:

Here is the sample code:
encoder input frame buffer dma address is:
frm_buf =
     vb2_dma_contig_plane_dma_addr(&vb2_v4l2_buffer->vb2_buf, 0);
the value of frm_buf is 0x1_ff30_0000.

encoder driver got the frm_buf and send the iova to SCP firmware
through SCP IPI message, then write to encoder hardware in SCP.
The iova is stored in IPI message as uint32_t data type, so the
value will be truncated from *0x1_ff30_0000* to *0xff30_0000*,
and then *0xff30_0000* will be written to encoder hardware, but
IOMMU will help to add the high *0x1_* bit back, so IOMMU can
translate the iova to PA correctly, encoder hardware can access
the correct memory for encoding.
Another reason to do this is the encoder hardware can't access
the 34-bit iova, IOMMU will help to add the remaining high bits
of iova. But for mt8188, encoder hardware can access 34-bit iova
directly, and encoder driver need write all 34-bit iova because
IOMMU can't help driver do this if the hardware support access
34-bit iova.
For the reasons above, this patch is added to support transfer
34-bit iova between kernel and SCP encoder driver. Use uint64_t
data type to store the iova, for compatibility with old chipsets,
add some new struct definitions for 34-bit.

Signed-off-by: Irui Wang <irui.wang@xxxxxxxxxxxx>
---
  .../platform/mediatek/vcodec/mtk_vcodec_drv.h |   3 +
  .../mediatek/vcodec/venc/venc_h264_if.c       | 200 +++++++++++++++---
  .../platform/mediatek/vcodec/venc_ipi_msg.h   |  24 +++
  .../platform/mediatek/vcodec/venc_vpu_if.c    |  34 ++-
  4 files changed, 227 insertions(+), 34 deletions(-)

diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h
index ef4584a46417..ab80e1b1979e 100644
--- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h
+++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h
@@ -401,6 +401,7 @@ struct mtk_vcodec_dec_pdata {
   * @output_formats: array of supported output formats
   * @num_output_formats: number of entries in output_formats
   * @core_id: stand for h264 or vp8 encode index
+ * @is_34bit: whether the encoder uses 34bit iova
   */
  struct mtk_vcodec_enc_pdata {
  	bool uses_ext;
@@ -411,9 +412,11 @@ struct mtk_vcodec_enc_pdata {
  	const struct mtk_video_fmt *output_formats;
  	size_t num_output_formats;
  	int core_id;
+	bool is_34bit;
  };
  
  #define MTK_ENC_CTX_IS_EXT(ctx) ((ctx)->dev->venc_pdata->uses_ext)
+#define MTK_ENC_IOVA_IS_34BIT(ctx) ((ctx)->dev->venc_pdata->is_34bit)
  
  /**
   * struct mtk_vcodec_dev - driver data
diff --git a/drivers/media/platform/mediatek/vcodec/venc/venc_h264_if.c b/drivers/media/platform/mediatek/vcodec/venc/venc_h264_if.c
index 4d9b8798dffe..3a5af6cca040 100644
--- a/drivers/media/platform/mediatek/vcodec/venc/venc_h264_if.c
+++ b/drivers/media/platform/mediatek/vcodec/venc/venc_h264_if.c
@@ -127,6 +127,72 @@ struct venc_h264_vsi {
  	struct venc_h264_vpu_buf work_bufs[VENC_H264_VPU_WORK_BUF_MAX];
  };
  
+/**
+ * struct venc_h264_vpu_config_ext - Structure for h264 encoder configuration
+ *                                   AP-W/R : AP is writer/reader on this item
+ *                                   VPU-W/R: VPU is write/reader on this item
+ * @input_fourcc: input fourcc
+ * @bitrate: target bitrate (in bps)
+ * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
+ *         to be used for display purposes; must be smaller or equal to buffer
+ *         size.
+ * @pic_h: picture height
+ * @buf_w: buffer width. Buffer size is stream resolution in pixels aligned to
+ *         hardware requirements.
+ * @buf_h: buffer height
+ * @gop_size: group of picture size (idr frame)
+ * @intra_period: intra frame period
+ * @framerate: frame rate in fps
+ * @profile: as specified in standard
+ * @level: as specified in standard
+ * @wfd: WFD mode 1:on, 0:off
+ * @max_qp: max quant parameter
+ * @min_qp: min quant parameter
+ * @reserved: reserved configs
+ */
+struct venc_h264_vpu_config_ext {
+	u32 input_fourcc;
+	u32 bitrate;
+	u32 pic_w;
+	u32 pic_h;
+	u32 buf_w;
+	u32 buf_h;
+	u32 gop_size;
+	u32 intra_period;
+	u32 framerate;
+	u32 profile;
+	u32 level;
+	u32 wfd;
+	u32 max_qp;
+	u32 min_qp;
+	u32 reserved[8];
+};
+
+/**
+ * struct venc_h264_vpu_buf_34 - Structure for 34 bit buffer information
+ *                               AP-W/R : AP is writer/reader on this item
+ *                               VPU-W/R: VPU is write/reader on this item
+ * @iova: 34 bit IO virtual address
+ * @vpua: VPU side memory addr which is used by RC_CODE
+ * @size: buffer size (in bytes)
+ */
+struct venc_h264_vpu_buf_34 {
+	u64 iova;
+	u32 vpua;
+	u32 size;
+};
+
+/**
+ * struct venc_h264_vsi_64 - Structure for VPU driver control and info share

Typo here --------------  ^^^^

+ *                           Used for 64 bit iova sharing
+ * @config: h264 encoder configuration
+ * @work_bufs: working buffer information in VPU side
+ */
+struct venc_h264_vsi_34 {
+	struct venc_h264_vpu_config_ext config;
+	struct venc_h264_vpu_buf_34 work_bufs[VENC_H264_VPU_WORK_BUF_MAX];
+};
+
  /*
   * struct venc_h264_inst - h264 encoder AP driver instance
   * @hw_base: h264 encoder hardware register base
@@ -140,6 +206,8 @@ struct venc_h264_vsi {

..snip..

diff --git a/drivers/media/platform/mediatek/vcodec/venc_vpu_if.c b/drivers/media/platform/mediatek/vcodec/venc_vpu_if.c
index d3570c4c177d..25c1b13559c9 100644
--- a/drivers/media/platform/mediatek/vcodec/venc_vpu_if.c
+++ b/drivers/media/platform/mediatek/vcodec/venc_vpu_if.c
@@ -228,17 +228,28 @@ int vpu_enc_encode(struct venc_vpu_inst *vpu, unsigned int bs_mode,
  		   struct venc_frame_info *frame_info)
  {

That's practically 75% flow differences (or more, actually)... and there's going
to always be a useless memzero, because a platform will always use either 34, or 32
bits code.

At this point I think that for both performance and readability purposes, you
should simply create another function.

Perhaps something like

static int vpu_enc_encode_32bits(struct venc_vpu_inst *vpu, unsigned int bs_mode,
				 struct venc_frm_buf *frm_buf,
				 struct mtk_vcodec_mem *bs_buf,
				 struct venc_frame_info *frame_info)
{
	..... function .....
}

static int vpu_enc_encode_34bits(struct venc_vpu_inst *vpu, unsigned int bs_mode,

				 struct venc_frm_buf *frm_buf,

				 struct mtk_vcodec_mem *bs_buf,

				 struct venc_frame_info *frame_info)

{
	...... function ......
}

int vpu_enc_encode(struct venc_vpu_inst *vpu, unsigned int bs_mode,

		   struct venc_frm_buf *frm_buf,

		   struct mtk_vcodec_mem *bs_buf,

		   struct venc_frame_info *frame_info)

{
	int ret;

	if (MTK_ENC_IOVA_IS_34BIT(vpu->ctx))
		ret = vpu_enc_encode_34bits(......);
	else
		ret = vpu_enc_encode_32bits(....);

	if (ret)
		return ret;

	mtk_vcodec_debug(vpu, "bs_mode %d state %d size %d key_frm %d <-",

			 bs_mode, vpu->state, vpu->bs_size, vpu->is_key_frm);

	return 0;
}


  	const bool is_ext = MTK_ENC_CTX_IS_EXT(vpu->ctx);
+	const bool is_34bit = MTK_ENC_IOVA_IS_34BIT(vpu->ctx);
+
  	size_t msg_size = is_ext ?
  		sizeof(struct venc_ap_ipi_msg_enc_ext) :
  		sizeof(struct venc_ap_ipi_msg_enc);
+	int status;
  	struct venc_ap_ipi_msg_enc_ext out;
+	struct venc_ap_ipi_msg_enc_ext_34 out_34;
  
  	mtk_vcodec_debug(vpu, "bs_mode %d ->", bs_mode);
  
  	memset(&out, 0, sizeof(out));
+	memset(&out_34, 0, sizeof(out_34));
+
  	out.base.msg_id = AP_IPIMSG_ENC_ENCODE;
  	out.base.vpu_inst_addr = vpu->inst_addr;
  	out.base.bs_mode = bs_mode;
+
+	out_34.msg_id = AP_IPIMSG_ENC_ENCODE;
+	out_34.vpu_inst_addr = vpu->inst_addr;
+	out_34.bs_mode = bs_mode;
+
  	if (frm_buf) {
  		if ((frm_buf->fb_addr[0].dma_addr % 16 == 0) &&
  		    (frm_buf->fb_addr[1].dma_addr % 16 == 0) &&
@@ -246,6 +257,10 @@ int vpu_enc_encode(struct venc_vpu_inst *vpu, unsigned int bs_mode,
  			out.base.input_addr[0] = frm_buf->fb_addr[0].dma_addr;
  			out.base.input_addr[1] = frm_buf->fb_addr[1].dma_addr;
  			out.base.input_addr[2] = frm_buf->fb_addr[2].dma_addr;
+
+			out_34.input_addr[0] = frm_buf->fb_addr[0].dma_addr;
+			out_34.input_addr[1] = frm_buf->fb_addr[1].dma_addr;
+			out_34.input_addr[2] = frm_buf->fb_addr[2].dma_addr;
  		} else {
  			mtk_vcodec_err(vpu, "dma_addr not align to 16");
  			return -EINVAL;
@@ -254,14 +269,31 @@ int vpu_enc_encode(struct venc_vpu_inst *vpu, unsigned int bs_mode,
  	if (bs_buf) {
  		out.base.bs_addr = bs_buf->dma_addr;
  		out.base.bs_size = bs_buf->size;
+
+		out_34.bs_addr = bs_buf->dma_addr;
+		out_34.bs_size = bs_buf->size;
  	}
+
  	if (is_ext && frame_info) {
  		out.data_item = 3;
  		out.data[0] = frame_info->frm_count;
  		out.data[1] = frame_info->skip_frm_count;
  		out.data[2] = frame_info->frm_type;
+
+		out_34.data_item = 3;
+		out_34.data[0] = frame_info->frm_count;
+		out_34.data[1] = frame_info->skip_frm_count;
+		out_34.data[2] = frame_info->frm_type;
  	}
-	if (vpu_enc_send_msg(vpu, &out, msg_size)) {
+
+	if (is_34bit) {
+		msg_size = sizeof(struct venc_ap_ipi_msg_enc_ext_34);
+		status = vpu_enc_send_msg(vpu, &out_34, msg_size);
+	} else {
+		status = vpu_enc_send_msg(vpu, &out, msg_size);
+	}
+
+	if (status) {
  		mtk_vcodec_err(vpu, "AP_IPIMSG_ENC_ENCODE %d fail",
  			       bs_mode);
  		return -EINVAL;