Set allow_cache_hints to 1 for the vb2_queue source and destination queues in the mediatek vcodec V4L2 driver. This allows us to allocate buffers with the V4L2_MEMORY_FLAG_NON_COHERENT set. On Mediatek SoCs, this enables caching for this memory, which vastly improves performance when being read from CPU. Read performance for these buffers is in turn important for detiling MM21 video frames in software. This change should be safe from race conditions since videobuf2 already invalidates or flushes the appropriate cache lines in its prepare() and finish() methods. Tested on a MT8183 SoC. Resulted in both correct detiling and a 10X speedup. Signed-off-by: Justin Green <greenjustin@xxxxxxxxxxxx> --- .../platform/mediatek/vcodec/mtk_vcodec_dec.c | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec.c index 52e5d36aa912..f093aa715e1f 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec.c @@ -929,30 +929,32 @@ int mtk_vcodec_dec_queue_init(void *priv, struct vb2_queue *src_vq, mtk_v4l2_debug(3, "[%d]", ctx->id); - src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; - src_vq->io_modes = VB2_DMABUF | VB2_MMAP; - src_vq->drv_priv = ctx; - src_vq->buf_struct_size = sizeof(struct mtk_video_dec_buf); - src_vq->ops = ctx->dev->vdec_pdata->vdec_vb2_ops; - src_vq->mem_ops = &vb2_dma_contig_memops; - src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; - src_vq->lock = &ctx->dev->dev_mutex; - src_vq->dev = &ctx->dev->plat_dev->dev; + src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; + src_vq->io_modes = VB2_DMABUF | VB2_MMAP; + src_vq->drv_priv = ctx; + src_vq->buf_struct_size = sizeof(struct mtk_video_dec_buf); + src_vq->ops = ctx->dev->vdec_pdata->vdec_vb2_ops; + src_vq->mem_ops = &vb2_dma_contig_memops; + src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + src_vq->lock = &ctx->dev->dev_mutex; + src_vq->dev = &ctx->dev->plat_dev->dev; + src_vq->allow_cache_hints = 1; ret = vb2_queue_init(src_vq); if (ret) { mtk_v4l2_err("Failed to initialize videobuf2 queue(output)"); return ret; } - dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; - dst_vq->io_modes = VB2_DMABUF | VB2_MMAP; - dst_vq->drv_priv = ctx; - dst_vq->buf_struct_size = sizeof(struct mtk_video_dec_buf); - dst_vq->ops = ctx->dev->vdec_pdata->vdec_vb2_ops; - dst_vq->mem_ops = &vb2_dma_contig_memops; - dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; - dst_vq->lock = &ctx->dev->dev_mutex; - dst_vq->dev = &ctx->dev->plat_dev->dev; + dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + dst_vq->io_modes = VB2_DMABUF | VB2_MMAP; + dst_vq->drv_priv = ctx; + dst_vq->buf_struct_size = sizeof(struct mtk_video_dec_buf); + dst_vq->ops = ctx->dev->vdec_pdata->vdec_vb2_ops; + dst_vq->mem_ops = &vb2_dma_contig_memops; + dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + dst_vq->lock = &ctx->dev->dev_mutex; + dst_vq->dev = &ctx->dev->plat_dev->dev; + dst_vq->allow_cache_hints = 1; ret = vb2_queue_init(dst_vq); if (ret) -- 2.36.1.255.ge46751e96f-goog