Re: [PATCH v6 10/13] media: verisilicon: Add Rockchip AV1 decoder

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Il 12/04/23 13:56, Benjamin Gaignard ha scritto:
Implement AV1 stateless decoder for rockchip VPU981.
It decode 8 and 10 bits AV1 bitstreams.
AV1 scaling feature is done by the postprocessor.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@xxxxxxxxxxxxx>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@xxxxxxxxxxxxx>
---
Changes in v6:
- Fix frame-larger-than warning.

  drivers/media/platform/verisilicon/Makefile   |    1 +
  .../media/platform/verisilicon/hantro_hw.h    |   64 +-
  .../verisilicon/rockchip_vpu981_hw_av1_dec.c  | 2024 +++++++++++++++++
  .../verisilicon/rockchip_vpu981_regs.h        |  477 ++++
  4 files changed, 2564 insertions(+), 2 deletions(-)
  create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
  create mode 100644 drivers/media/platform/verisilicon/rockchip_vpu981_regs.h


..snip..

diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
index d7641eced32e..25456fb960c8 100644
--- a/drivers/media/platform/verisilicon/hantro_hw.h
+++ b/drivers/media/platform/verisilicon/hantro_hw.h
@@ -37,6 +37,8 @@
#define NUM_REF_PICTURES (V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1) +#define AV1_MAX_FRAME_BUF_COUNT (V4L2_AV1_TOTAL_REFS_PER_FRAME + 1)
+
  struct hantro_dev;
  struct hantro_ctx;
  struct hantro_buf;
@@ -250,23 +252,81 @@ struct hantro_vp9_dec_hw_ctx {
  };
/**
- * hantro_av1_dec_hw_ctx
+ * struct hantro_av1_dec_ctrls
+ * @sequence:		AV1 Sequence
+ * @tile_group_entry:	AV1 Tile Group entry
+ * @frame:		AV1 Frame Header OBU
+ * @film_grain:		AV1 Film Grain
+ */
+struct hantro_av1_dec_ctrls {
+	const struct v4l2_ctrl_av1_sequence *sequence;
+	const struct v4l2_ctrl_av1_tile_group_entry *tile_group_entry;
+	const struct v4l2_ctrl_av1_frame *frame;
+	const struct v4l2_ctrl_av1_film_grain *film_grain;
+};
+
+struct hantro_av1_frame_ref {
+	int width;
+	int height;
+	int mi_cols;
+	int mi_rows;

I wonder if we can save some memory here, if w/h/mi_c/mi_r can never be
negative, it would be possible to change them to a unsigned type, in which
case, a u16 would probably be fine at least for width and height as I would
never expect a resolution of more than 65535x65535.

Even a s16 would probably be fine, anyway - but that's your call.

P.S.: If this is a structure coming from the HW instead, all members shall
use fixed size type!

..snip..

diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
new file mode 100644
index 000000000000..b9fc70f606a3
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
@@ -0,0 +1,2024 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Collabora

Shouldn't this be   2023, Collabora Ltd.   ? :-)

+ *
+ * Author: Benjamin Gaignard <benjamin.gaignard@xxxxxxxxxxxxx>
+ */
+
+#include <media/v4l2-mem2mem.h>
+#include "hantro.h"
+#include "hantro_v4l2.h"
+#include "rockchip_vpu981_regs.h"
+

..snip..

+
+static int rockchip_vpu981_av1_tile_log2(int target)
+{
+	int k;
+
+	/*
+	 * returns the smallest value for k such that 1 << k is greater
+         * than or equal to target

Spaces -> tabs here, please.

+	 */
+	for (k = 0; (1 << k) < target; k++);
+
+	return k;
+}
+

..snip..

+static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
+	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
+	    ctrls->tile_group_entry;
+	int context_update_y =
+	    tile_info->context_update_tile_id / tile_info->tile_cols;
+	int context_update_x =
+	    tile_info->context_update_tile_id % tile_info->tile_cols;
+	int context_update_tile_id =
+	    context_update_x * tile_info->tile_rows + context_update_y;
+	u8 *dst = av1_dec->tile_info.cpu;
+	struct hantro_dev *vpu = ctx->dev;
+	int tile0, tile1;
+
+	memset(dst, 0, av1_dec->tile_info.size);
+
+	for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
+		for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
+			int tile_id = tile1 * tile_info->tile_cols + tile0;
+			u32 start, end;
+			u32 y0 =
+			    tile_info->height_in_sbs_minus_1[tile1] + 1;
+			u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
+
+			// tile size in SB units (width,height)

Please be consistent with comments style.
You used C-style before, so please do the same here.

+			*dst++ = x0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = y0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+
+			// tile start position
+			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
+			*dst++ = start & 255;
+			*dst++ = (start >> 8) & 255;
+			*dst++ = (start >> 16) & 255;
+			*dst++ = (start >> 24) & 255;
+
+			// # of bytes in tile data

s/#/number/g

+			end = start + group_entry[tile_id].tile_size;
+			*dst++ = end & 255;
+			*dst++ = (end >> 8) & 255;
+			*dst++ = (end >> 16) & 255;
+			*dst++ = (end >> 24) & 255;
+		}
+	}
+
+	hantro_reg_write(vpu, &av1_multicore_expect_context_update,
+			 !!(context_update_x == 0));

fits in one line of 96 columns

+	hantro_reg_write(vpu, &av1_tile_enable,
+			 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
+	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
+	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
+	hantro_reg_write(vpu, &av1_context_update_tile_id,
+			 context_update_tile_id);

ditto

+	hantro_reg_write(vpu, &av1_tile_transpose, 1);
+	if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
+	    rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
+		hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
+	else
+		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
+
+	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
+}
+

..snip..


+static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	bool frame_is_intra = IS_INTRA(frame->frame_type);
+	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
+	int i;
+
+	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
+		return;
+
+	for (i = 0; i < NUM_REF_FRAMES; i++) {
+		if (frame->refresh_frame_flags & (1 << i)) {

You can use the BIT() macro here:

if (frame->refresh_frame_flags & BIT(i)) {

+			struct mvcdfs stored_mv_cdf;
+
+			rockchip_av1_get_cdfs(ctx, i);
+			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
+			*av1_dec->cdfs = *out_cdfs;
+			if (frame_is_intra) {
+				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
+				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
+			}
+			rockchip_av1_store_cdfs(ctx,
+						frame->refresh_frame_flags);
+			break;
+		}
+	}
+}

..snip..

+
+static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_cdef *cdef = &frame->cdef;
+	struct hantro_dev *vpu = ctx->dev;
+	u32 luma_pri_strength = 0;
+	u16 luma_sec_strength = 0;
+	u32 chroma_pri_strength = 0;
+	u16 chroma_sec_strength = 0;
+	int i;
+
+	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
+	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
+
+	for (i = 0; i < (1 << cdef->bits); i++) {

for (i = 0; i < BIT(cdef->bits); i++) {

+		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
+		if (cdef->y_sec_strength[i] == 4)
+			luma_sec_strength |= 3 << (i * 2);
+		else
+			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
+
+		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
+		if (cdef->uv_sec_strength[i] == 4)
+			chroma_sec_strength |= 3 << (i * 2);
+		else
+			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
+	}
+
+	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
+			 luma_pri_strength);
+	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
+			 luma_sec_strength);
+	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
+			 chroma_pri_strength);
+	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
+			 chroma_sec_strength);
+
+	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
+}
+

..snip..

diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
new file mode 100644
index 000000000000..182e6c830ff6
--- /dev/null
+++ b/drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
@@ -0,0 +1,477 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022, Collabora
+ *
+ * Author: Benjamin Gaignard <benjamin.gaignard@xxxxxxxxxxxxx>
+ */
+
+#ifndef _ROCKCHIP_VPU981_REGS_H_
+#define _ROCKCHIP_VPU981_REGS_H_
+
+#include "hantro.h"
+
+#define AV1_SWREG(nr)	((nr) * 4)
+
+#define AV1_DEC_REG(b, s, m) \
+	((const struct hantro_reg) { \
+		.base = AV1_SWREG(b), \
+		.shift = s, \
+		.mask = m, \
+	})
+
+#define AV1_REG_INTERRUPT		AV1_SWREG(1)
+#define AV1_REG_INTERRUPT_DEC_RDY_INT	BIT(12)
+
+#define AV1_REG_CONFIG			AV1_SWREG(2)
+#define AV1_REG_CONFIG_DEC_CLK_GATE_E	BIT(10)
+
+#define av1_dec_e			AV1_DEC_REG(1, 0, 0x1)

I personally dislike definitions in lowercase, but it's like that across
the entire driver and we must keep consistency, so it's fine.

Regards,
Angelo



[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux