Re: [PATCH v4 4/4] media: mediatek: add MT8188 AIE driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2025-02-20 at 14:59 +0800, bo.kong wrote:
> From: Bo Kong <Bo.Kong@xxxxxxxxxxxx>
> 
> Add a V4L2 sub-device driver for MT8188 AIE.
> 
> Signed-off-by: Bo Kong <Bo.Kong@xxxxxxxxxxxx>
> ---
> Changes in v4:
> 1. Remove ccflags that are not used in Makefile
> 2. Optimize the write register operation
> 3. Remove excess config from Kconfig
> 4. Remove the probe function related to hw_version
> 5. Remove mtk_aie_hw_connect,mtk_aie_hw_enable,mtk_aie_hw_disconnect etc
> 6. Fix coding style in mtk_aie.h
> 7. Replace signed short with s16
> 8. Remove aie_reset_output_buf function, optimize this part of the code
> 9. Remove struct race_result and other structures, and use s16 instead
> of structures
> 10. Change multiple different types of width and height into the same
> structure
> 11. Remove all unnecessary variable initialization operations,retaining
> some variables that need to be initialized
> 12. Delete print physical address
> 13. Remove aie_init_table and combine aie_init_table into the
> aie_update_table function
> 14. Optimize the readability of some codes, add aie_calculate_pa
> function
> 15. Optimize the aie_execute function and divide this function into
> aie_execute_face_detection, aie_execute_attribute_detection,
> aie_execute_fld_detection functions
> 16. Modify arrays with all 5 into define ANCHOR_EN_NUM
> 17. Remove mtk_aie_get_variant function
> 18. The version in the code was deleted, but the variables in the enque
> info structure cannot be deleted. Once deleted, the AIE will not work
> because this structure needs to be aligned with User Space.
> 
> Changes in v3:
> 1. Remove not used include file, include only headers which AIE use
> 2. Remove Makefile some private driver headers
> 
> Changes in v2:
> 1. Fix coding style
> ---
>  drivers/media/platform/mediatek/Kconfig       |    1 +
>  drivers/media/platform/mediatek/Makefile      |    1 +
>  drivers/media/platform/mediatek/aie/Kconfig   |   12 +
>  drivers/media/platform/mediatek/aie/Makefile  |    5 +
>  drivers/media/platform/mediatek/aie/mtk_aie.h | 1181 +++++++++
>  .../media/platform/mediatek/aie/mtk_aie_53.c  | 1300 ++++++++++
>  .../media/platform/mediatek/aie/mtk_aie_drv.c | 2309 +++++++++++++++++
>  7 files changed, 4809 insertions(+)
>  create mode 100644 drivers/media/platform/mediatek/aie/Kconfig
>  create mode 100644 drivers/media/platform/mediatek/aie/Makefile
>  create mode 100644 drivers/media/platform/mediatek/aie/mtk_aie.h
>  create mode 100644 drivers/media/platform/mediatek/aie/mtk_aie_53.c
>  create mode 100644 drivers/media/platform/mediatek/aie/mtk_aie_drv.c
> 
> diff --git a/drivers/media/platform/mediatek/Kconfig b/drivers/media/platform/mediatek/Kconfig
> index 84104e2cd024..cd161272666b 100644
> --- a/drivers/media/platform/mediatek/Kconfig
> +++ b/drivers/media/platform/mediatek/Kconfig
> @@ -2,6 +2,7 @@
>  
>  comment "Mediatek media platform drivers"
>  
> +source "drivers/media/platform/mediatek/aie/Kconfig"
>  source "drivers/media/platform/mediatek/jpeg/Kconfig"
>  source "drivers/media/platform/mediatek/mdp/Kconfig"
>  source "drivers/media/platform/mediatek/vcodec/Kconfig"
> diff --git a/drivers/media/platform/mediatek/Makefile b/drivers/media/platform/mediatek/Makefile
> index 38e6ba917fe5..23a096fdf21c 100644
> --- a/drivers/media/platform/mediatek/Makefile
> +++ b/drivers/media/platform/mediatek/Makefile
> @@ -1,4 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0-only
> +obj-y += aie/
>  obj-y += jpeg/
>  obj-y += mdp/
>  obj-y += vcodec/
> diff --git a/drivers/media/platform/mediatek/aie/Kconfig b/drivers/media/platform/mediatek/aie/Kconfig
> new file mode 100644
> index 000000000000..ffbe0a79a2ea
> --- /dev/null
> +++ b/drivers/media/platform/mediatek/aie/Kconfig
> @@ -0,0 +1,12 @@
> +config VIDEO_MTK_AIE
> +	tristate "MediaTek AI engine function"
> +	depends on OF
> +	select V4L2_MEM2MEM_DEV
> +	select VIDEOBUF2_DMA_CONTIG
> +	select MEDIA_CONTROLLER_REQUEST_API
> +	help
> +	  Support the AI engine (AIE) feature
> +
> +	  AIE driver is a V4L2 memory-to-memory device driver which
> +	  provides hardware accelerated face detection function,
> +	  it can detect different sizes of faces in a raw image.
> diff --git a/drivers/media/platform/mediatek/aie/Makefile b/drivers/media/platform/mediatek/aie/Makefile
> new file mode 100644
> index 000000000000..20265699e4d3
> --- /dev/null
> +++ b/drivers/media/platform/mediatek/aie/Makefile
> @@ -0,0 +1,5 @@
> +# SPDX-License-Identifier: GPL-2.0
> +mtk-aie-$(CONFIG_VIDEO_MTK_AIE) += mtk_aie_53.o
> +mtk-aie-$(CONFIG_VIDEO_MTK_AIE) += mtk_aie_drv.o
> +
> +obj-$(CONFIG_VIDEO_MTK_AIE) += mtk-aie.o
> \ No newline at end of file
> diff --git a/drivers/media/platform/mediatek/aie/mtk_aie.h b/drivers/media/platform/mediatek/aie/mtk_aie.h
> new file mode 100644
> index 000000000000..d2eec80a4dda
> --- /dev/null
> +++ b/drivers/media/platform/mediatek/aie/mtk_aie.h
> @@ -0,0 +1,1181 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2020 MediaTek Inc.
> + * Author: Fish Wu <fish.wu@xxxxxxxxxxxx>
> + */
> +
> +#ifndef __MTK_AIE_H__
> +#define __MTK_AIE_H__
> +
> +#include <media/v4l2-ctrls.h>
> +#include <media/v4l2-device.h>
> +
> +#define Y2R_SRC_DST_FORMAT		0
> +#define Y2R_IN_W_H			1
> +#define Y2R_OUT_W_H			2
> +#define Y2R_RA0_RA1_EN			3
> +#define Y2R_IN_X_Y_SIZE0		4
> +#define Y2R_IN_STRIDE0_BUS_SIZE0	5
> +#define Y2R_IN_X_Y_SIZE1		6
> +#define Y2R_IN_STRIDE1_BUS_SIZE1	7
> +#define Y2R_OUT_X_Y_SIZE0		8
> +#define Y2R_OUT_STRIDE0_BUS_SIZE0	9
> +#define Y2R_OUT_X_Y_SIZE1		10
> +#define Y2R_OUT_STRIDE1_BUS_SIZE1	11
> +#define Y2R_OUT_X_Y_SIZE2		12
> +#define Y2R_OUT_STRIDE2_BUS_SIZE2	13
> +#define Y2R_IN_0			14
> +#define Y2R_IN_1			15
> +#define Y2R_OUT_0			16
> +#define Y2R_OUT_1			17
> +#define Y2R_OUT_2			18
> +#define Y2R_RS_SEL_SRZ_EN		19
> +#define Y2R_X_Y_MAG			20
> +#define Y2R_SRZ_HORI_STEP		22
> +#define Y2R_SRZ_VERT_STEP		23
> +#define Y2R_PADDING_EN_UP_DOWN		26
> +#define Y2R_PADDING_RIGHT_LEFT		27
> +#define Y2R_CO2_FMT_MODE_EN		28
> +#define Y2R_CO2_CROP_X			29
> +#define Y2R_CO2_CROP_Y			30
> +#define Y2R_CON_IN_BA_MSB		31
> +#define Y2R_CON_OUT_BA_MSB		32

Use a struct to define this.

struct y2r_info {
	u32 src_dst_format;
	u32 in_w_h;
...
	u32 con_in_ba_msb;
	u32 con_out_ba_msb;
};

> +
> +#define RS_IN_0				22
> +#define RS_IN_1				23
> +#define RS_IN_2				24
> +#define RS_OUT_0			25
> +#define RS_OUT_1			26
> +#define RS_OUT_2			27
> +#define RS_X_Y_MAG			1
> +#define RS_SRZ_HORI_STEP		3
> +#define RS_SRZ_VERT_STEP		4
> +#define RS_INPUT_W_H			7
> +#define RS_OUTPUT_W_H			8
> +#define RS_IN_X_Y_SIZE0			10
> +#define RS_IN_STRIDE0			11
> +#define RS_IN_X_Y_SIZE1			12
> +#define RS_IN_STRIDE1			13
> +#define RS_IN_X_Y_SIZE2			14
> +#define RS_IN_STRIDE2			15
> +#define RS_OUT_X_Y_SIZE0		16
> +#define RS_OUT_STRIDE0			17
> +#define RS_OUT_X_Y_SIZE1		18
> +#define RS_OUT_STRIDE1			19
> +#define RS_OUT_X_Y_SIZE2		20
> +#define RS_OUT_STRIDE2			21
> +#define RS_CON_IN_BA_MSB		28
> +#define RS_CON_OUT_BA_MSB		29

Use a struct to define this.

struct rs_info {
	u32 in_0;
	u32 in_1;
...
	u32 rs_con_in_ba_msb;
	u32 rs_con_out_ba_msb;
};

> +
> +#define FD_INPUT_ROTATE			1
> +#define FD_CONV_WIDTH_MOD6		2
> +#define FD_CONV_IMG_W_H			4
> +
> +#define FD_IN_IMG_W_H			5
> +#define FD_OUT_IMG_W_H			6
> +
> +#define FD_IN_X_Y_SIZE0			9
> +#define FD_IN_X_Y_SIZE1			11
> +#define FD_IN_X_Y_SIZE2			13
> +#define FD_IN_X_Y_SIZE3			15
> +
> +#define FD_IN_STRIDE0_BUS_SIZE0		10
> +#define FD_IN_STRIDE1_BUS_SIZE1		12
> +#define FD_IN_STRIDE2_BUS_SIZE2		14
> +#define FD_IN_STRIDE3_BUS_SIZE3		16
> +
> +#define FD_OUT_X_Y_SIZE0		17
> +#define FD_OUT_X_Y_SIZE1		19
> +#define FD_OUT_X_Y_SIZE2		21
> +#define FD_OUT_X_Y_SIZE3		23
> +
> +#define FD_OUT_STRIDE0_BUS_SIZE0	18
> +#define FD_OUT_STRIDE1_BUS_SIZE1	20
> +#define FD_OUT_STRIDE2_BUS_SIZE2	22
> +#define FD_OUT_STRIDE3_BUS_SIZE3	24
> +
> +#define FD_IN_0				27
> +#define FD_IN_1				28
> +#define FD_IN_2				29
> +#define FD_IN_3				30
> +
> +#define FD_OUT_0			31
> +#define FD_OUT_1			32
> +#define FD_OUT_2			33
> +#define FD_OUT_3			34
> +
> +#define FD_KERNEL_0			35
> +#define FD_KERNEL_1			36
> +
> +#define FD_RPN_SET			37
> +#define FD_IMAGE_COORD			38
> +#define FD_IMAGE_COORD_XY_OFST		39
> +#define FD_BIAS_ACCU			47
> +#define FD_SRZ_FDRZ_RS			48
> +#define FD_SRZ_HORI_STEP		49
> +#define FD_SRZ_VERT_STEP		50
> +#define FD_SRZ_HORI_SUB_INT_OFST	51
> +#define FD_SRZ_VERT_SUB_INT_OFST	52
> +
> +#define FD_CON_IN_BA_MSB		53
> +#define FD_CON_OUT_BA_MSB		54
> +#define FD_CON_KERNEL_BA_MSB		55

Use a struct to define this.

struct fd_info {
	u32 input_rotate;
	u32 conv_width_mod6;
	u32 conv_width;
...
	u32 fd_con_in_ba_msb;
	u32 fd_con_out_ba_msb;
	u32 fd_con_kernel_ba_msb;
};

> +
> +#define SRZ_BIT (BIT_MASK(16) | BIT_MASK(12) | BIT_MASK(8) | BIT_MASK(0))
> +#define RESET_BIT16 BIT(16)
> +#define RESET_BIT17 BIT(17)
> +#define RESET_BIT (RESET_BIT16 | RESET_BIT17)
> +
> +#define FD_LOOP_NUM			87
> +#define RPN0_LOOP_NUM			86
> +#define RPN1_LOOP_NUM			57
> +#define RPN2_LOOP_NUM			28
> +
> +#define PYM0_START_LOOP			58
> +#define PYM1_START_LOOP			29
> +#define PYM2_START_LOOP			0

#define RPN_NUM		3
#define RPN_LOOP_NUM	29
#define FD_LOOP_NUM	RPN_NUM * RPN_LOOP_NUM

#define RPNX_LOOP_NUM(x)	((3 - x) * RPN_LOOP_NUM - 1)
#define PYMX_START_LOOP(x)	(2 - x) * RPN_LOOP_NUM

You may not need RPNX_LOOP_NUM and PYMX_START_LOOP by some optimization.

> +
> +#define ATTR_LOOP_NUM			26
> +#define AGE_OUT_RGS			17
> +#define GENDER_OUT_RGS			20
> +#define INDIAN_OUT_RGS			22
> +#define RACE_OUT_RGS			25
> +
> +#define INPUT_WDMA_WRA_NUM		4
> +#define OUTPUT_WDMA_WRA_NUM		4
> +#define KERNEL_RDMA_RA_NUM		2
> +
> +#define MAX_ENQUE_FRAME_NUM		10
> +#define PYM_NUM				3
> +#define COLOR_NUM			3
> +
> +#define ATTR_MODE_PYRAMID_WIDTH		128
> +#define ATTR_OUT_SIZE			32
> +
> +/* AIE 2.0 3.X register offset */
> +#define AIE_START_REG			0x000
> +#define AIE_ENABLE_REG			0x004
> +#define AIE_LOOP_REG			0x008
> +#define AIE_YUV2RGB_CON_BASE_ADR_REG	0x00c
> +#define AIE_RS_CON_BASE_ADR_REG		0x010
> +#define AIE_FD_CON_BASE_ADR_REG		0x014
> +#define AIE_INT_EN_REG			0x018
> +#define AIE_INT_REG			0x01c
> +#define AIE_RESULT_0_REG		0x08c
> +#define AIE_RESULT_1_REG		0x090
> +#define AIE_DMA_CTL_REG			0x094
> +
> +/* AIE 3.0 register offset */
> +#define AIE_YUV2RGB_CON_BASE_ADR_MSB	0x14C
> +#define AIE_RS_CON_BASE_ADR_MSB		0x150
> +#define AIE_FD_CON_BASE_ADR_MSB		0x154
> +
> +/* AIE 3.0 FLD register offset */
> +#define FLD_EN				0x400
> +#define FLD_BASE_ADDR_FACE_0		0x404
> +#define FLD_BASE_ADDR_FACE_1		0x408
> +#define FLD_BASE_ADDR_FACE_2		0x40C
> +#define FLD_BASE_ADDR_FACE_3		0x410
> +#define FLD_BASE_ADDR_FACE_4		0x414
> +#define FLD_BASE_ADDR_FACE_5		0x418
> +#define FLD_BASE_ADDR_FACE_6		0x41C
> +#define FLD_BASE_ADDR_FACE_7		0x420
> +#define FLD_BASE_ADDR_FACE_8		0x424
> +#define FLD_BASE_ADDR_FACE_9		0x428
> +#define FLD_BASE_ADDR_FACE_10		0x42C
> +#define FLD_BASE_ADDR_FACE_11		0x430
> +#define FLD_BASE_ADDR_FACE_12		0x434
> +#define FLD_BASE_ADDR_FACE_13		0x438
> +#define FLD_BASE_ADDR_FACE_14		0x43C
> +
> +#define FLD_INFO_0_FACE_0		0x440
> +#define FLD_INFO_1_FACE_0		0x444
> +#define FLD_INFO_2_FACE_0		0x448
> +#define FLD_INFO_0_FACE_1		0x44C
> +#define FLD_INFO_1_FACE_1		0x450
> +#define FLD_INFO_2_FACE_1		0x454
> +#define FLD_INFO_0_FACE_2		0x458
> +#define FLD_INFO_1_FACE_2		0x45C
> +#define FLD_INFO_2_FACE_2		0x460
> +#define FLD_INFO_0_FACE_3		0x464
> +#define FLD_INFO_1_FACE_3		0x468
> +#define FLD_INFO_2_FACE_3		0x46C
> +#define FLD_INFO_0_FACE_4		0x470
> +#define FLD_INFO_1_FACE_4		0x474
> +#define FLD_INFO_2_FACE_4		0x478
> +#define FLD_INFO_0_FACE_5		0x47C
> +#define FLD_INFO_1_FACE_5		0x480
> +#define FLD_INFO_2_FACE_5		0x484
> +#define FLD_INFO_0_FACE_6		0x488
> +#define FLD_INFO_1_FACE_6		0x48C
> +#define FLD_INFO_2_FACE_6		0x490
> +#define FLD_INFO_0_FACE_7		0x494
> +#define FLD_INFO_1_FACE_7		0x498
> +
> +#define FLD_INFO_2_FACE_7		0x4A0
> +#define FLD_INFO_0_FACE_8		0x4A4
> +#define FLD_INFO_1_FACE_8		0x4A8
> +#define FLD_INFO_2_FACE_8		0x4AC
> +#define FLD_INFO_0_FACE_9		0x4B0
> +#define FLD_INFO_1_FACE_9		0x4B4
> +#define FLD_INFO_2_FACE_9		0x4B8
> +#define FLD_INFO_0_FACE_10		0x4BC
> +#define FLD_INFO_1_FACE_10		0x4C0
> +#define FLD_INFO_2_FACE_10		0x4C4
> +#define FLD_INFO_0_FACE_11		0x4C8
> +#define FLD_INFO_1_FACE_11		0x4CC
> +#define FLD_INFO_2_FACE_11		0x4D0
> +#define FLD_INFO_0_FACE_12		0x4D4
> +#define FLD_INFO_1_FACE_12		0x4D8
> +#define FLD_INFO_2_FACE_12		0x4DC
> +#define FLD_INFO_0_FACE_13		0x4E0
> +#define FLD_INFO_1_FACE_13		0x4E4
> +#define FLD_INFO_2_FACE_13		0x4E8
> +#define FLD_INFO_0_FACE_14		0x4EC
> +#define FLD_INFO_1_FACE_14		0x4F0
> +#define FLD_INFO_2_FACE_14		0x4F4
> +
> +#define FLD_MODEL_PARA0			0x4F8
> +#define FLD_MODEL_PARA1			0x4FC
> +#define FLD_MODEL_PARA2			0x500
> +#define FLD_MODEL_PARA3			0x504
> +#define FLD_MODEL_PARA4			0x508
> +#define FLD_MODEL_PARA5			0x50C
> +#define FLD_MODEL_PARA6			0x510
> +#define FLD_MODEL_PARA7			0x514
> +#define FLD_MODEL_PARA8			0x518
> +#define FLD_MODEL_PARA9			0x51C
> +#define FLD_MODEL_PARA10		0x520
> +#define FLD_MODEL_PARA11		0x524
> +#define FLD_MODEL_PARA12		0x528
> +#define FLD_MODEL_PARA13		0x52C
> +#define FLD_MODEL_PARA14		0x530
> +#define FLD_MODEL_PARA15		0x534
> +#define FLD_MODEL_PARA16		0x538
> +#define FLD_DEBUG_INFO0			0x53C
> +#define FLD_DEBUG_INFO1			0x540
> +
> +#define FLD_BUSY			0x544
> +#define FLD_DONE			0x548

From

#define FLD_BASE_ADDR_FACE_1 0x408

to

define FLD_DONE 0x548

are useless, so drop them.

> +#define FLD_SRC_WD_HT			0x54C
> +
> +/* n: min 0, max 14 */
> +#define FLD_PL_IN_BASE_ADDR_0_(n)	(0x550 + 4 * (n))
> +#define FLD_PL_IN_BASE_ADDR_1_(n)	(0x5C8 + 4 * (n))
> +#define FLD_PL_IN_BASE_ADDR_2_(n)	(0x640 + 4 * (n))
> +#define FLD_PL_IN_BASE_ADDR_3_(n)	(0x6B8 + 4 * (n))
> +#define FLD_SH_IN_BASE_ADDR_(n)		(0x85C + 4 * (n))
> +
> +#define FLD_PL_IN_SIZE_0		0x730
> +#define FLD_PL_IN_STRIDE_0		0x734
> +#define FLD_PL_IN_SIZE_1		0x738
> +#define FLD_PL_IN_STRIDE_1		0x73C
> +#define FLD_PL_IN_SIZE_2_0		0x740
> +#define FLD_PL_IN_STRIDE_2_0		0x744
> +#define FLD_PL_IN_SIZE_2_1		0x748
> +#define FLD_PL_IN_STRIDE_2_1		0x74C
> +#define FLD_PL_IN_SIZE_2_2		0x750
> +#define FLD_PL_IN_STRIDE_2_2		0x754
> +#define FLD_PL_IN_SIZE_3		0x758
> +#define FLD_PL_IN_STRIDE_3		0x75C
> +
> +#define FLD_SH_IN_BASE_ADDR_0		0x760
> +#define FLD_SH_IN_BASE_ADDR_1		0x764
> +#define FLD_SH_IN_BASE_ADDR_2		0x768
> +#define FLD_SH_IN_BASE_ADDR_3		0x76C
> +#define FLD_SH_IN_BASE_ADDR_4		0x770
> +#define FLD_SH_IN_BASE_ADDR_5		0x774
> +#define FLD_SH_IN_BASE_ADDR_6		0x778
> +#define FLD_SH_IN_BASE_ADDR_7		0x77C
> +#define FLD_SH_IN_BASE_ADDR_8		0x780
> +#define FLD_SH_IN_BASE_ADDR_9		0x784
> +#define FLD_SH_IN_BASE_ADDR_10		0x788
> +#define FLD_SH_IN_BASE_ADDR_11		0x78C
> +#define FLD_SH_IN_BASE_ADDR_12		0x790
> +#define FLD_SH_IN_BASE_ADDR_13		0x794
> +#define FLD_SH_IN_BASE_ADDR_14		0x798
> +#define FLD_SH_IN_BASE_ADDR_15		0x79C
> +#define FLD_SH_IN_BASE_ADDR_16		0x7A0
> +#define FLD_SH_IN_BASE_ADDR_17		0x7A4
> +#define FLD_SH_IN_BASE_ADDR_18		0x7A8
> +#define FLD_SH_IN_BASE_ADDR_19		0x7AC
> +#define FLD_SH_IN_BASE_ADDR_20		0x7B0
> +#define FLD_SH_IN_BASE_ADDR_21		0x7B4
> +#define FLD_SH_IN_BASE_ADDR_22		0x7B8
> +#define FLD_SH_IN_BASE_ADDR_23		0x7BC
> +#define FLD_SH_IN_BASE_ADDR_24		0x7C0
> +#define FLD_SH_IN_BASE_ADDR_25		0x7C4
> +#define FLD_SH_IN_BASE_ADDR_26		0x7C8
> +#define FLD_SH_IN_BASE_ADDR_27		0x7CC
> +#define FLD_SH_IN_BASE_ADDR_28		0x7D0
> +#define FLD_SH_IN_BASE_ADDR_29		0x7D4

From

#define FLD_SH_IN_BASE_ADDR_0 0x760

to

#define FLD_SH_IN_BASE_ADDR_29 0x7D4

are useless, so drop them.

> +
> +#define FLD_SH_IN_SIZE_0		0x7D8
> +#define FLD_SH_IN_STRIDE_0		0x7DC
> +#define FLD_TR_OUT_BASE_ADDR_0		0x7E0
> +#define FLD_TR_OUT_SIZE_0		0x7E4
> +#define FLD_TR_OUT_STRIDE_0		0x7E8
> +#define FLD_PP_OUT_BASE_ADDR_0		0x7EC
> +#define FLD_PP_OUT_SIZE_0		0x7F0
> +#define FLD_PP_OUT_STRIDE_0		0x7F4
> +#define FLD_SPARE			0x7F8
> +
> +#define FLD_BASE_ADDR_FACE_0_7_MSB	0x7FC
> +#define FLD_BASE_ADDR_FACE_8_14_MSB	0x800
> +
> +#define FLD_PL_IN_BASE_ADDR_0_0_7_MSB	0x804
> +#define FLD_PL_IN_BASE_ADDR_0_8_15_MSB	0x808
> +#define FLD_PL_IN_BASE_ADDR_0_16_23_MSB	0x80C
> +#define FLD_PL_IN_BASE_ADDR_0_24_29_MSB	0x810
> +
> +#define FLD_PL_IN_BASE_ADDR_1_0_7_MSB	0x814
> +#define FLD_PL_IN_BASE_ADDR_1_8_15_MSB	0x818
> +#define FLD_PL_IN_BASE_ADDR_1_16_23_MSB	0x81C
> +#define FLD_PL_IN_BASE_ADDR_1_24_29_MSB	0x820
> +
> +#define FLD_PL_IN_BASE_ADDR_2_0_7_MSB	0x824
> +#define FLD_PL_IN_BASE_ADDR_2_8_15_MSB	0x828
> +#define FLD_PL_IN_BASE_ADDR_2_16_23_MSB	0x82C
> +#define FLD_PL_IN_BASE_ADDR_2_24_29_MSB	0x830
> +
> +#define FLD_PL_IN_BASE_ADDR_3_0_7_MSB	0x834
> +#define FLD_PL_IN_BASE_ADDR_3_8_15_MSB	0x838
> +#define FLD_PL_IN_BASE_ADDR_3_16_23_MSB	0x83C
> +#define FLD_PL_IN_BASE_ADDR_3_24_29_MSB	0x840
> +
> +#define FLD_SH_IN_BASE_ADDR_0_7_MSB	0x844
> +#define FLD_SH_IN_BASE_ADDR_8_15_MSB	0x848
> +#define FLD_SH_IN_BASE_ADDR_16_23_MSB	0x84C
> +#define FLD_SH_IN_BASE_ADDR_24_29_MSB	0x850
> +
> +#define FLD_BS_IN_BASE_ADDR_0_7_MSB	0x8d4
> +#define FLD_BS_IN_BASE_ADDR_8_15_MSB	0x8d8
> +
> +#define FLD_TR_OUT_BASE_ADDR_0_MSB	0x854
> +#define FLD_PP_OUT_BASE_ADDR_0_MSB	0x858
> +
> +#define FLD_BS_IN_BASE_ADDR_14		0x894
> +
> +#define FLD_BS_BIAS			0x8E4
> +#define FLD_CV_FM_RANGE_0		0x8E8
> +#define FLD_CV_FM_RANGE_1		0x8EC
> +#define FLD_CV_PM_RANGE_0		0x8F0
> +#define FLD_CV_PM_RANGE_1		0x8F4
> +#define FLD_BS_RANGE_0			0x8F8
> +#define FLD_BS_RANGE_1			0x8FC
> +
> +#define MTK_FD_OUTPUT_MIN_WIDTH		16U
> +#define MTK_FD_OUTPUT_MIN_HEIGHT	16U
> +#define MTK_FD_OUTPUT_MAX_WIDTH		4096U
> +#define MTK_FD_OUTPUT_MAX_HEIGHT	4096U
> +
> +#define MTK_FD_HW_TIMEOUT_IN_MSEC	2000
> +#define MAX_FACE_NUM			1024
> +#define RLT_NUM				48
> +#define GENDER_OUT			32
> +
> +#define RACE_RST_X_NUM			4
> +#define RACE_RST_Y_NUM			64
> +#define GENDER_RST_X_NUM		2
> +#define GENDER_RST_Y_NUM		64
> +#define MRACE_RST_NUM			4
> +#define MGENDER_RST_NUM			2
> +#define MAGE_RST_NUM			2
> +#define MINDIAN_RST_NUM			2
> +
> +#define FLD_FOREST			14
> +#define FLD_POINT			500
> +
> +#define FLD_STEP_NUM			6
> +#define FLD_MAX_FRAME			15
> +
> +#define FLD_STEP_BLINK			0
> +#define FLD_STEP_CV			1
> +#define FLD_STEP_FP			2
> +#define FLD_STEP_LEAF			3
> +#define FLD_STEP_KM02			4
> +#define FLD_STEP_KM13			5
> +
> +#define FLD_BLINK_WEIGHT_FOREST14_SIZE	6416
> +#define FLD_CV_SIZE			19392
> +#define FLD_FP_SIZE			80160
> +#define FLD_LEAFNODE_SIZE		4608000
> +#define FLD_TREE_KM02_SIZE		120000
> +#define FLD_TREE_KM13_SIZE		120000
> +#define FLD_OUTPUT_SIZE			112
> +
> +/* FLD_OUTPUT_X_SIZE: min: 1, max: 15 */
> +#define FLD_OUTPUT_X_SIZE		9
> +
> +#define FLD_CUR_LANDMARK		11
> +
> +#define RESULT_SIZE			(RLT_NUM * MAX_FACE_NUM)
> +
> +#define ANCHOR_EN_NUM			5
> +
> +static const unsigned int fd_wdma_en[FD_LOOP_NUM][OUTPUT_WDMA_WRA_NUM] = {

When out_stride_size[i][j] > 0, imply fd_wdma_en[i][j] = 1, otherwise 0.
So you could drop fd_wdma_en[][] and use out_stride_size[][] for fd_wdma_en.

> +	{ 1, 0, 0, 0 }, { 1, 0, 1, 0 }, { 1, 0, 1, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 0 }, { 1, 0, 1, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 1, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 },
> +	{ 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 1, 0 }, { 1, 0, 1, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 1, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 1, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 1, 1, 1 },
> +	{ 1, 1, 1, 1 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 1, 0 },
> +	{ 1, 0, 1, 0 }, { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 1, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 1, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }
> +};
> +
> +static const unsigned int out_stride_size[FD_LOOP_NUM][OUTPUT_WDMA_WRA_NUM] = {
> +	{ 1, 0, 0, 0 }, { 1, 0, 2, 0 }, { 1, 0, 2, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 1, 2, 2 }, { 1, 1, 2, 2 }, { 1, 0, 0, 0 }, { 1, 0, 2, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 2, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 },
> +	{ 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 3, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 2, 0 }, { 1, 0, 2, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 1, 2, 2 }, { 1, 1, 2, 2 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 2, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 2, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 1, 1, 1 },
> +	{ 1, 1, 1, 1 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 3, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 2, 0 },
> +	{ 1, 0, 2, 0 }, { 1, 0, 0, 0 }, { 1, 1, 2, 2 }, { 1, 1, 2, 2 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 2, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 2, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 1, 0, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 3, 0, 0, 0 }
> +};
> +
> +static const unsigned int fd_ker_rdma_size[FD_LOOP_NUM][KERNEL_RDMA_RA_NUM] = {
> +	{ 240, 240 }, { 1168, 1168 }, { 1168, 1168 }, { 272, 272 },
> +	{ 2320, 2320 }, { 2080, 2080 }, { 1040, 1040 }, { 4624, 4624 },
> +	{ 3104, 3104 }, { 9232, 9232 }, { 4624, 4624 }, { 4128, 4128 },
> +	{ 1040, 1040 }, { 4624, 4624 }, { 4624, 4624 }, { 1552, 1552 },
> +	{ 4624, 4624 }, { 4624, 4624 }, { 4128, 4128 }, { 1040, 1040 },
> +	{ 1040, 1040 }, { 528, 528 },	{ 4160, 4160 }, { 4160, 4160 },
> +	{ 2080, 2080 }, { 2080, 2080 }, { 2080, 2080 }, { 1040, 1040 },
> +	{ 0, 0 }, { 240, 240 }, { 1168, 1168 }, { 1168, 1168 },
> +	{ 272, 272 }, { 2320, 2320 }, { 2080, 2080 }, { 1040, 1040 },
> +	{ 4624, 4624 }, { 3104, 3104 }, { 9232, 9232 }, { 4624, 4624 },
> +	{ 4128, 4128 }, { 1040, 1040 }, { 4624, 4624 }, { 4624, 4624 },
> +	{ 1552, 1552 }, { 4624, 4624 }, { 4624, 4624 }, { 4128, 4128 },
> +	{ 1040, 1040 }, { 1040, 1040 }, { 528, 528 }, { 4160, 4160 },
> +	{ 4160, 4160 }, { 2080, 2080 }, { 2080, 2080 }, { 2080, 2080 },
> +	{ 1040, 1040 }, { 0, 0 }, { 240, 240 }, { 1168, 1168 },
> +	{ 1168, 1168 }, { 272, 272 },	{ 2320, 2320 }, { 2080, 2080 },
> +	{ 1040, 1040 }, { 4624, 4624 }, { 3104, 3104 }, { 9232, 9232 },
> +	{ 4624, 4624 }, { 4128, 4128 }, { 1040, 1040 }, { 4624, 4624 },
> +	{ 4624, 4624 }, { 1552, 1552 }, { 4624, 4624 }, { 4624, 4624 },
> +	{ 4128, 4128 }, { 1040, 1040 }, { 1040, 1040 }, { 528, 528 },
> +	{ 4160, 4160 }, { 4160, 4160 }, { 2080, 2080 }, { 2080, 2080 },
> +	{ 2080, 2080 }, { 1040, 1040 }, { 0, 0 }
> +};

This array include 3 identical copy, so you could reduce this array as:

#define RPN_NUM		3
#define RPN_LOOP_NUM	29

static const unsigned int fd_ker_rdma_size[RPN_LOOP_NUM][KERNEL_RDMA_RA_NUM] = {
 { 240, 240 }, { 1168, 1168 }, { 1168, 1168 }, { 272, 272 },
 { 2320, 2320 }, { 2080, 2080 }, { 1040, 1040 }, { 4624, 4624 },
 { 3104, 3104 }, { 9232, 9232 }, { 4624, 4624 }, { 4128, 4128 },
 { 1040, 1040 }, { 4624, 4624 }, { 4624, 4624 }, { 1552, 1552 },
 { 4624, 4624 }, { 4624, 4624 }, { 4128, 4128 }, { 1040, 1040 },
 { 1040, 1040 }, { 528, 528 }, { 4160, 4160 }, { 4160, 4160 },
 { 2080, 2080 }, { 2080, 2080 }, { 2080, 2080 }, { 1040, 1040 },
 { 0, 0 }
};

and use this array like this,

for (l = 0; l < RPN_NUM: l++)
	for (i = 0; i < RPN_LOOP_NUM; i++)
		for (j ...)
			fd_ker_rdma_size[i][j];


in addition, this array contain pair of identical value. So it could be reduced to

static const unsigned int fd_ker_rdma_size[RPN_LOOP_NUM] = {
 240, 1168, 1168, 272,
 2320, 2080, 1040, 4624,
 3104, 9232, 4624, 4128,
 1040, 4624, 4624, 1552,
 4624, 4624, 4128, 1040,
 1040, 528, 4160, 4160,
 2080, 2080, 2080, 1040,
 0
};

> +
> +static const unsigned int fd_out_stride2_in[FD_LOOP_NUM] = {
> +	0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
> +	0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
> +};
> +
> +static const unsigned int fd_stride[FD_LOOP_NUM] = {
> +	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> +	1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,
> +	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
> +};

This array include 3 identical copy, so you could reduce this array as:

#define RPN_NUM		3
#define RPN_LOOP_NUM	29

static const unsigned int fd_stride[RPN_LOOP_NUM] = {
 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1
}

and use this array like this,

for (l = 0; l < RPN_NUM: l++)
	for (i = 0; i < RPN_LOOP_NUM; i++)
		fd_stride[i];

But this be more simple as

#define FD_STRIDE(n)	(n == 0 ? 2 : 1)

so you could drop the array.

> +
> +static const unsigned int fd_maxpool[FD_LOOP_NUM] = {
> +	0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
> +};

#define FD_MAXPOOL(n)	(n == 1 ? 1 : 0)

> +
> +static const unsigned int out_2size[FD_LOOP_NUM] = {
> +	0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1,
> +	0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
> +};
> +
> +static const unsigned int in_ch_pack[FD_LOOP_NUM] = {
> +	1,  16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
> +	32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 1, 16, 16, 16, 16, 16, 32,
> +	32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
> +	32, 32, 32, 0, 1, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32,
> +	32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0
> +};
> +
> +static const unsigned int outlayer[FD_LOOP_NUM] = {
> +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
> +	1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
> +};
> +
> +static const unsigned int out_ch_pack[FD_LOOP_NUM] = {
> +	16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
> +	32, 16, 16, 16, 32, 32, 32, 32, 32, 32, 0, 16, 16, 16, 16, 16, 32, 32,
> +	32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 16, 16, 16, 32, 32, 32,
> +	32, 32, 32, 0, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32,
> +	32, 32, 32, 32, 32, 16, 16, 16, 32, 32, 32, 32, 32, 32, 0
> +};
> +
> +/* [loop][ch][output_index] */
> +static const signed int fd_rdma_en[FD_LOOP_NUM][INPUT_WDMA_WRA_NUM][2] = {
> +	{ { 99, 99 }, { 99, 99 }, { 99, 99 }, { -1, -1 } },
> +	{ { 0, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 1, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 1, 0 }, { 2, 0 }, { -1, -1 }, { -1, -1 } },
> +	{ { 3, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 1, 2 }, { 2, 2 }, { 4, 2 }, { 4, 3 } },
> +	{ { 5, 0 }, { 5, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 6, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 5, 0 }, { 5, 1 }, { 7, 0 }, { -1, -1 } },
> +	{ { 8, 0 }, { 8, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 9, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 5, 2 }, { 5, 3 }, { 7, 2 }, { 10, 2 } },
> +	{ { 11, 0 }, { 11, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 12, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 13, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 11, 0 }, { 11, 1 }, { 14, 0 }, { -1, -1 } },
> +	{ { 15, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 16, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 11, 0 }, { 11, 1 }, { 14, 0 }, { 17, 0 } },
> +	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 19, 0 }, { 22, 0 }, { 22, 1 }, { 25, 0 } },
> +	{ { 99, 99 }, { 99, 99 }, { 99, 99 }, { -1, -1 } },
> +	{ { 29, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 30, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 30, 0 }, { 31, 0 }, { -1, -1 }, { -1, -1 } },
> +	{ { 32, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 30, 2 }, { 31, 2 }, { 33, 2 }, { 33, 3 } },
> +	{ { 34, 0 }, { 34, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 35, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 34, 0 }, { 34, 1 }, { 36, 0 }, { -1, -1 } },
> +	{ { 37, 0 }, { 37, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 38, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 34, 2 }, { 34, 3 }, { 36, 2 }, { 39, 2 } },
> +	{ { 40, 0 }, { 40, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 41, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 42, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 40, 0 }, { 40, 1 }, { 43, 0 }, { -1, -1 } },
> +	{ { 44, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 45, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 40, 0 }, { 40, 1 }, { 43, 0 }, { 46, 0 } },
> +	{ { 47, 0 }, { 47, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 47, 0 }, { 47, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 47, 0 }, { 47, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 47, 0 }, { 47, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 47, 0 }, { 47, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 47, 0 }, { 47, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 47, 0 }, { 47, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 47, 0 }, { 47, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 47, 0 }, { 47, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 48, 0 }, { 51, 0 }, { 51, 1 }, { 54, 0 } },
> +	{ { 99, 99 }, { 99, 99 }, { 99, 99 }, { -1, -1 } },
> +	{ { 58, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 59, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 59, 0 }, { 60, 0 }, { -1, -1 }, { -1, -1 } },
> +	{ { 61, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 59, 2 }, { 60, 2 }, { 62, 2 }, { 62, 3 } },
> +	{ { 63, 0 }, { 63, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 64, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 63, 0 }, { 63, 1 }, { 65, 0 }, { -1, -1 } },
> +	{ { 66, 0 }, { 66, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 67, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 63, 2 }, { 63, 3 }, { 65, 2 }, { 68, 2 } },
> +	{ { 69, 0 }, { 69, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 70, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 71, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 69, 0 }, { 69, 1 }, { 72, 0 }, { -1, -1 } },
> +	{ { 73, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 74, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 69, 0 }, { 69, 1 }, { 72, 0 }, { 75, 0 } },
> +	{ { 76, 0 }, { 76, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 76, 0 }, { 76, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 76, 0 }, { 76, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 76, 0 }, { 76, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 76, 0 }, { 76, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 76, 0 }, { 76, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 76, 0 }, { 76, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 76, 0 }, { 76, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 76, 0 }, { 76, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 77, 0 }, { 80, 0 }, { 80, 1 }, { 83, 0 } }
> +};

#define RPN_NUM		3
#define RPN_LOOP_NUM	29

/* [loop][ch][output_index] */
static const signed int fd_rdma_en[RPN_LOOP_NUM][INPUT_WDMA_WRA_NUM][2] = {
	{ { 99, 99 }, { 99, 99 }, { 99, 99 }, { -1, -1 } },
	{ { 0, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
	{ { 1, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
	{ { 1, 0 }, { 2, 0 }, { -1, -1 }, { -1, -1 } },
	{ { 3, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
	{ { 1, 2 }, { 2, 2 }, { 4, 2 }, { 4, 3 } },
	{ { 5, 0 }, { 5, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 6, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
	{ { 5, 0 }, { 5, 1 }, { 7, 0 }, { -1, -1 } },
	{ { 8, 0 }, { 8, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 9, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
	{ { 5, 2 }, { 5, 3 }, { 7, 2 }, { 10, 2 } },
	{ { 11, 0 }, { 11, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 12, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
	{ { 13, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
	{ { 11, 0 }, { 11, 1 }, { 14, 0 }, { -1, -1 } },
	{ { 15, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
	{ { 16, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
	{ { 11, 0 }, { 11, 1 }, { 14, 0 }, { 17, 0 } },
	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 18, 0 }, { 18, 1 }, { -1, -1 }, { -1, -1 } },
	{ { 19, 0 }, { 22, 0 }, { 22, 1 }, { 25, 0 } }
};

and use this array like this,

for (l = 0; l < RPN_NUM: l++)
	for (i = 0; i < RPN_LOOP_NUM; i++)
		for (j = 0; j < INPUT_WDMA_WRA_NUM; j++) {
			if (fd_rdma_en[i][j][0] != -1) {
				uloop = fd_rdma_en[i][j][0] + l * RPN_LOOP_NUM;
				uch = fd_rdma_en[i][j][1];
				...
			}

> +
> +static const unsigned int attr_wdma_en[ATTR_LOOP_NUM][OUTPUT_WDMA_WRA_NUM] = {
> +	{ 1, 0, 1, 0 }, { 1, 0, 1, 0 }, { 1, 0, 0, 0 }, { 1, 1, 1, 1 },
> +	{ 1, 1, 1, 1 }, { 1, 0, 1, 0 }, { 1, 1, 0, 0 }, { 1, 0, 1, 0 },
> +	{ 1, 1, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 1, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 },
> +	{ 1, 0, 0, 0 }, { 1, 0, 0, 0 }
> +};
> +
> +static const unsigned int
> +	attr_ker_rdma_size[ATTR_LOOP_NUM][KERNEL_RDMA_RA_NUM] = {
> +		{ 240, 240 }, { 1168, 1168 }, { 272, 272 }, { 2320, 2320 },
> +		{ 2080, 2080 }, { 9232, 9232 }, { 3104, 3104 }, { 9232, 9232 },
> +		{ 4128, 4128 }, { 1040, 1040 }, { 4624, 4624 }, { 4624, 4624 },
> +		{ 1552, 1552 }, { 4624, 4624 }, { 4624, 4624 }, { 4128, 4128 },
> +		{ 9232, 9232 }, { 272, 272 },	{ 9232, 9232 }, { 2320, 2320 },
> +		{ 144, 144 }, { 9232, 9232 }, { 272, 272 }, { 9232, 9232 },
> +		{ 2320, 2320 }, { 144, 144 }
> +	};

This array contain pair of identical value, so reduce this array to

static const unsigned int attr_ker_rdma_size[ATTR_LOOP_NUM] = {
	240, 1168, 272, 2320,
	2080, 9232, 3104, 9232,
	4128, 1040, 4624, 4624,
	1552, 4624, 4624, 4128,
	9232, 272, 9232, 2320,
	144, 9232, 272, 9232,
	2320, 144
};

> +
> +static const unsigned int attr_out_stride2_as_in[ATTR_LOOP_NUM] = {
> +	0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
> +};
> +
> +static const unsigned int attr_fd_stride[ATTR_LOOP_NUM] = { /* H */
> +	2, 1, 1, 1, 1, 1, 1,
> +	1, 1, 1, 1, 1, 1, 1,
> +	1, 1, 1, 1, 1, 1, 1,
> +	1, 1, 1, 1, 1
> +};

#define ATTR_FD_STRIDE(n)	(n == 0 ? 2 : 1)

> +
> +static const unsigned int attr_fd_maxpool[ATTR_LOOP_NUM] = { /* L */
> +	1, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0,
> +	0, 0
> +};

#define ATTR_FD_MAXPOOL(n)	(n == 0 ? 1 : 0)

> +
> +static const unsigned int attr_out_2size[ATTR_LOOP_NUM] = { /* O */
> +	1, 1, 0, 1, 1, 1, 0,
> +	1, 0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0, 0, 0,
> +	0, 0, 0, 0, 0
> +};
> +
> +/* [loop][ch][output_index] */
> +static const signed int attr_rdma_en[ATTR_LOOP_NUM][INPUT_WDMA_WRA_NUM][2] = {
> +	{ { 99, 99 }, { 99, 99 }, { 99, 99 }, { -1, -1 } },
> +	{ { 0, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 0, 0 }, { 1, 0 }, { -1, -1 }, { -1, -1 } },
> +	{ { 2, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 0, 2 }, { 1, 2 }, { 3, 2 }, { 3, 3 } },
> +	{ { 4, 0 }, { 4, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 4, 0 }, { 4, 1 }, { 5, 0 }, { -1, -1 } },
> +	{ { 6, 0 }, { 6, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 4, 2 }, { 4, 3 }, { 5, 2 }, { 7, 2 } },
> +	{ { 8, 0 }, { 8, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 9, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 10, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 8, 0 }, { 8, 1 }, { 11, 0 }, { -1, -1 } },
> +	{ { 12, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 13, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 8, 0 }, { 8, 1 }, { 11, 0 }, { 14, 0 } },
> +	{ { 15, 0 }, { 15, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 16, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 15, 0 }, { 15, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 18, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 19, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 15, 0 }, { 15, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 21, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 15, 0 }, { 15, 1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 23, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } },
> +	{ { 24, 0 }, { -1, -1 }, { -1, -1 }, { -1, -1 } }
> +};
> +
> +static const unsigned int attr_wdma_size[ATTR_LOOP_NUM][OUTPUT_WDMA_WRA_NUM] = {
> +	{ 16384, 0, 4096, 0 },
> +	{ 16384, 0, 4096, 0 },
> +	{ 16384, 0, 0, 0 },
> +	{ 16384, 16384, 4096, 4096 },
> +	{ 8192, 8192, 2048, 2048 },
> +	{ 8192, 0, 2048, 0 },
> +	{ 8192, 8192, 0, 0 },
> +	{ 8192, 0, 2048, 0 },
> +	{ 2048, 2048, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 2048, 2048, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 0, 0, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 1024, 0, 0, 0 },
> +	{ 0, 0, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 0, 0, 0, 0 },
> +	{ 2048, 0, 0, 0 },
> +	{ 1024, 0, 0, 0 },
> +	{ 0, 0, 0, 0 }
> +};
> +
> +static const unsigned int fld_step_align_size[FLD_STEP_NUM][FLD_MAX_FRAME] = {
> +	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6528 },
> +	{ 1536, 1280, 1280, 1280, 1280, 1280, 1280, 1280, 1280, 1280, 1280,
> +	  1280, 1280, 1280, 1280 },
> +	{ 5376, 5376, 5376, 5376, 5376, 5376, 5376, 5376, 5376, 5376, 5376,
> +	  5376, 5376, 5376, 5376 },
> +	{ 307200, 307200, 307200, 307200, 307200, 307200, 307200, 307200,
> +	  307200, 307200, 307200, 307200, 307200, 307200, 307200 },
> +	{ 8064, 8064, 8064, 8064, 8064, 8064, 8064, 8064, 8064, 8064, 8064,
> +	  8064, 8064, 8064, 8064 },
> +	{ 8064, 8064, 8064, 8064, 8064, 8064, 8064, 8064, 8064, 8064, 8064,
> +	  8064, 8064, 8064, 8064 }
> +};

Use inline function to reduce this array:

static inline unsigned int fld_step_align_size(int step_num, int frame)
{
	switch (step_num) {
	case FLD_STEP_BLINK:
		return frame == (FLD_MAX_FRAME - 1) ? 6528 : 0;
	case FLD_STEP_CV:
		return frame == 0 ? 1536 : 1280;
	case FLD_STEP_FP:
		return 5376;
	case FLD_STEP_LEAF:
		return 307200;
	case FLD_STEP_KM02:
	case FLD_STEP_KM13:
		return 8064;
	}

	return 0;
}

> +
> +static const unsigned int fld_face_info_0[FLD_MAX_FRAME] = {
> +	0x440, 0x44C, 0x458, 0x464, 0x470, 0x47C, 0x488, 0x494, 0x4A4,
> +	0x4B0, 0x4BC, 0x4C8, 0x4D4, 0x4E0, 0x4EC
> +};
> +
> +static const unsigned int fld_face_info_1[FLD_MAX_FRAME] = {
> +	0x444, 0x450, 0x45C, 0x468, 0x474, 0x480, 0x48C, 0x498, 0x4A8,
> +	0x4B4, 0x4C0, 0x4CC, 0x4D8, 0x4E4, 0x4F0
> +};
> +
> +static const unsigned int fld_face_info_2[FLD_MAX_FRAME] = {
> +	0x448, 0x454, 0x460, 0x46C, 0x478, 0x484, 0x490, 0x4A0, 0x4AC,
> +	0x4B8, 0x4C4, 0x4D0, 0x4DC, 0x4E8, 0x4F4
> +};

Use below macro to reduce fld_face_info_1[] and fld_face_info_2[]:

#define FLD_FACE_INFO(m, n)	(fld_face_info_0[n] + 4 * m)

> +
> +struct aie_static_info_element {
> +	u32 fd_wdma_size[OUTPUT_WDMA_WRA_NUM];
> +	u32 out_xsize_plus_1;
> +	u32 out_height;
> +	u32 out_ysize_plus_1_stride2;
> +	u32 out_stride;
> +	u32 out_stride_stride2;
> +	u32 out_width;
> +	u32 img_width;
> +	u32 img_height;
> +	u32 stride2_out_width;
> +	u32 stride2_out_height;
> +	u32 out_xsize_plus_1_stride2;
> +	u32 input_xsize_plus_1;
> +};
> +
> +struct aie_static_info {
> +	struct aie_static_info_element inf_elm[FD_LOOP_NUM];
> +};
> +
> +enum aie_state {
> +	STATE_NA,
> +	STATE_INIT,
> +	STATE_OPEN
> +};
> +
> +/* Different features of AIE 3.1 */
> +/* FDMODE: Face Detection Mode */
> +/* ATTRIBUTEMODE: Detect gender and race */
> +/* FLDMODE: Detect the location of the facial features */
> +enum aie_mode {
> +	FDMODE,
> +	ATTRIBUTEMODE,
> +	FLDMODE
> +};
> +
> +enum aie_format {
> +	FMT_NA,
> +	FMT_YUV_2P,
> +	FMT_YVU_2P,
> +	FMT_YUYV,
> +	FMT_YVYU,
> +	FMT_UYVY,
> +	FMT_VYUY,
> +	FMT_MONO,
> +	/* AIE 3.X */
> +	FMT_YUV420_2P,
> +	FMT_YUV420_1P
> +};
> +
> +enum aie_input_degree {
> +	DEGREE_0,
> +	DEGREE_90,
> +	DEGREE_270,
> +	DEGREE_180
> +};
> +
> +/* align v4l2 user space interface */
> +struct fd_ret {
> +	u16 anchor_x0[MAX_FACE_NUM];
> +	u16 anchor_x1[MAX_FACE_NUM];
> +	u16 anchor_y0[MAX_FACE_NUM];
> +	u16 anchor_y1[MAX_FACE_NUM];
> +	s16 rop_landmark_score0[MAX_FACE_NUM];
> +	s16 rop_landmark_score1[MAX_FACE_NUM];
> +	s16 rop_landmark_score2[MAX_FACE_NUM];
> +	s16 anchor_score[MAX_FACE_NUM];
> +	s16 rip_landmark_score0[MAX_FACE_NUM];
> +	s16 rip_landmark_score1[MAX_FACE_NUM];
> +	s16 rip_landmark_score2[MAX_FACE_NUM];
> +	s16 rip_landmark_score3[MAX_FACE_NUM];
> +	s16 rip_landmark_score4[MAX_FACE_NUM];
> +	s16 rip_landmark_score5[MAX_FACE_NUM];
> +	s16 rip_landmark_score6[MAX_FACE_NUM];
> +	u16 face_result_index[MAX_FACE_NUM];
> +	u16 anchor_index[MAX_FACE_NUM];
> +	u32 fd_partial_result;
> +};
> +
> +struct fd_result {
> +	u16 fd_pyramid0_num;
> +	u16 fd_pyramid1_num;
> +	u16 fd_pyramid2_num;
> +	u16 fd_total_num;
> +	struct fd_ret pyramid0_result;
> +	struct fd_ret pyramid1_result;
> +	struct fd_ret pyramid2_result;
> +};
> +
> +struct race_result {
> +	signed short result[4][64];
> +};
> +
> +struct merged_race_result {
> +	signed short result[4];
> +};
> +
> +struct merged_gender_result {
> +	signed short result[2];
> +};
> +
> +struct merged_age_result {
> +	signed short result[2];
> +};
> +
> +struct merged_is_indian_result {
> +	signed short result[2];
> +};
> +
> +struct attr_result {
> +	s16 gender_ret[2][64];
> +	s16 race_ret[4][64];
> +	s16 merged_age_ret[2];
> +	s16 merged_gender_ret[2];
> +	s16 merged_is_indian_ret[2];
> +	s16 merged_race_ret[4];
> +};
> +
> +/* AIE 3.X */
> +struct fld_landmark {
> +	u16 x;
> +	u16 y;
> +};
> +
> +struct fld_result {
> +	struct fld_landmark fld_landmark[FLD_CUR_LANDMARK];
> +	u16 fld_out_rip;
> +	u16 fld_out_rop;
> +	u16 confidence;
> +	s16 blinkscore;
> +};
> +
> +struct aie_roi {

This has been defined in mtk_aie_v4l2_controls.h

> +	u32 x1;
> +	u32 y1;
> +	u32 x2;
> +	u32 y2;
> +};
> +
> +struct aie_padding {

Ditto.

> +	u32 left;
> +	u32 right;
> +	u32 down;
> +	u32 up;
> +};
> +
> +/* AIE 3.X */
> +struct fld_crop_rip_rop {

Ditto.

> +	u32 fld_in_crop_x1;
> +	u32 fld_in_crop_y1;
> +	u32 fld_in_crop_x2;
> +	u32 fld_in_crop_y2;
> +	u32 fld_in_rip;
> +	u32 fld_in_rop;
> +};
> +
> +/* Align v4l2 user space interface. */
> +/* This cannot modify unless v4l2 user space is modified synchronously */
> +/* Otherwise AIE won't work */
> +struct aie_enq_info {
> +	unsigned int sel_mode;
> +	unsigned int src_img_fmt;
> +	unsigned int src_img_width;
> +	unsigned int src_img_height;
> +	unsigned int src_img_stride;
> +	unsigned int pyramid_base_width;
> +	unsigned int pyramid_base_height;
> +	unsigned int number_of_pyramid;
> +	unsigned int rotate_degree;
> +	int en_roi;
> +	struct aie_roi src_roi;
> +	int en_padding;
> +	struct aie_padding src_padding;
> +	unsigned int freq_level;
> +
> +	/* AIE 3.X */
> +	unsigned int fld_face_num;
> +	struct fld_crop_rip_rop fld_input[FLD_MAX_FRAME];
> +	u32 src_img_addr;
> +	u32 src_img_addr_uv;
> +	u32 fd_version;

fd_version is useless, so drop it.

> +	u32 attr_version;

attr_version is useless, so drop it.

> +	u32 pose_version;

pose_version is useless, so drop it.

> +	struct fd_result fd_out;
> +	struct attr_result attr_out;
> +
> +	/* AIE 3.X */
> +	struct fld_result fld_out[FLD_MAX_FRAME];
> +	u32 irq_status;

In mtk_aie_job_ready(),

plane_vaddr = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
fd->aie_cfg = (struct aie_enq_info *)plane_vaddr;

In mtk_aie_frame_done_worker(),

aie_get_fd_result(fd, fd->aie_cfg);

That means you copy the result data into destination video plane.
And the result data is struct aie_enq_info {} but it does not expose to user space interface.
So move this structure to uapi header file.

> +};
> +
> +struct aie_reg_cfg {
> +	u32 rs_adr;

This is identical to fd->base_para->fd_rs_cfg_pa, so this is redundant.
Directly use fd->base_para->fd_rs_cfg_pa and drop this.

> +	u32 yuv2rgb_adr;
> +	u32 fd_adr;
> +	u32 fd_pose_adr;
> +	u32 fd_mode;
> +	u32 hw_result;
> +	u32 hw_result1;
> +	u32 reserved;
> +};
> +
> +struct aie_hw_rect {
> +	u16 width;
> +	u16 height;
> +};
> +
> +struct aie_para {
> +	void *fd_fd_cfg_va;
> +	void *fd_rs_cfg_va;
> +	void *fd_yuv2rgb_cfg_va;
> +
> +	void *attr_fd_cfg_va[MAX_ENQUE_FRAME_NUM];
> +	void *attr_yuv2rgb_cfg_va[MAX_ENQUE_FRAME_NUM];
> +
> +	void *rs_pym_rst_va[PYM_NUM][COLOR_NUM];
> +
> +	dma_addr_t fd_fd_cfg_pa;
> +	dma_addr_t fd_rs_cfg_pa;
> +	dma_addr_t fd_yuv2rgb_cfg_pa;
> +
> +	dma_addr_t attr_fd_cfg_pa[MAX_ENQUE_FRAME_NUM];
> +	dma_addr_t attr_yuv2rgb_cfg_pa[MAX_ENQUE_FRAME_NUM];
> +
> +	dma_addr_t rs_pym_rst_pa[PYM_NUM][COLOR_NUM];
> +
> +	u32 sel_mode;
> +	u32 src_img_fmt;
> +	u32 rotate_degree;
> +	s16 rpn_anchor_thrd;
> +	u16 number_of_pyramid;
> +	u32 src_img_addr;
> +	u32 src_img_addr_uv;
> +
> +	struct aie_hw_rect max_img_rect;
> +	struct aie_hw_rect img_rect;
> +	struct aie_hw_rect crop_rect;
> +	struct aie_hw_rect pyramid_rect;
> +	struct aie_hw_rect max_pyramid_rect;
> +};
> +
> +struct aie_attr_para {
> +	u32 w_idx;
> +	u32 r_idx;

I think this driver support software to queue many frame setting, so use read/write index to control the queue.
But why only ATTRIBUTE mode need read/write index?
FD mode and FLD mode also need read/write index to control queue, doesn't it?

> +	u32 sel_mode[MAX_ENQUE_FRAME_NUM];
> +	u16 img_width[MAX_ENQUE_FRAME_NUM];
> +	u16 img_height[MAX_ENQUE_FRAME_NUM];
> +	u16 crop_width[MAX_ENQUE_FRAME_NUM];
> +	u16 crop_height[MAX_ENQUE_FRAME_NUM];
> +	u32 src_img_fmt[MAX_ENQUE_FRAME_NUM];
> +	u32 rotate_degree[MAX_ENQUE_FRAME_NUM];
> +	u32 src_img_addr[MAX_ENQUE_FRAME_NUM];
> +	u32 src_img_addr_uv[MAX_ENQUE_FRAME_NUM];
> +};
> +
> +struct aie_fd_dma_para {
> +	void *fd_out_hw_va[FD_LOOP_NUM][OUTPUT_WDMA_WRA_NUM];
> +	void *fd_kernel_va[FD_LOOP_NUM][KERNEL_RDMA_RA_NUM];
> +	void *attr_out_hw_va[ATTR_LOOP_NUM][OUTPUT_WDMA_WRA_NUM];
> +	void *attr_kernel_va[ATTR_LOOP_NUM][KERNEL_RDMA_RA_NUM];
> +
> +	void *age_out_hw_va[MAX_ENQUE_FRAME_NUM];
> +	void *gender_out_hw_va[MAX_ENQUE_FRAME_NUM];
> +	void *is_indian_out_hw_va[MAX_ENQUE_FRAME_NUM];
> +	void *race_out_hw_va[MAX_ENQUE_FRAME_NUM];
> +
> +	dma_addr_t fd_out_hw_pa[FD_LOOP_NUM][OUTPUT_WDMA_WRA_NUM];
> +	dma_addr_t fd_kernel_pa[FD_LOOP_NUM][KERNEL_RDMA_RA_NUM];
> +	dma_addr_t attr_out_hw_pa[ATTR_LOOP_NUM][OUTPUT_WDMA_WRA_NUM];
> +	dma_addr_t attr_kernel_pa[ATTR_LOOP_NUM][KERNEL_RDMA_RA_NUM];
> +
> +	dma_addr_t age_out_hw_pa[MAX_ENQUE_FRAME_NUM];
> +	dma_addr_t gender_out_hw_pa[MAX_ENQUE_FRAME_NUM];
> +	dma_addr_t is_indian_out_hw_pa[MAX_ENQUE_FRAME_NUM];
> +	dma_addr_t race_out_hw_pa[MAX_ENQUE_FRAME_NUM];
> +};
> +
> +/* AIE 3.X */
> +struct aie_fd_fld_para {
> +	void *fld_step_va[FLD_STEP_NUM][FLD_MAX_FRAME];
> +	void *fld_output_va[FLD_MAX_FRAME];
> +	dma_addr_t fld_step_pa[FLD_STEP_NUM][FLD_MAX_FRAME];
> +	dma_addr_t fld_output_pa[FLD_MAX_FRAME];
> +};
> +
> +struct imem_buf_info {
> +	void *va;
> +	dma_addr_t pa;
> +	unsigned int size;
> +	unsigned int reserved;
> +};
> +
> +struct fd_buffer {
> +	/* used by DMA HW */
> +	u32 dma_addr;
> +};
> +
> +struct aie_clocks {
> +	struct clk_bulk_data *clks;
> +	unsigned int clk_num;
> +};
> +
> +struct mtk_aie_req_work {
> +	struct work_struct work;
> +	struct mtk_aie_dev *fd_dev;
> +};
> +
> +struct mtk_aie_variant {
> +	unsigned int y2r_cfg_size;
> +	unsigned int rs_cfg_size;
> +	unsigned int fd_cfg_size;
> +};
> +
> +struct mtk_aie_dev {
> +	struct device *dev;
> +	struct mtk_aie_ctx *ctx;
> +	struct v4l2_m2m_dev *m2m_dev;
> +	struct device *larb;
> +	struct aie_para *base_para;
> +	struct aie_attr_para *attr_para;
> +	struct aie_fd_dma_para *dma_para;
> +
> +	/* AIE 3.X */
> +	struct aie_fd_fld_para *fld_para;
> +
> +	struct aie_enq_info *aie_cfg;
> +	struct workqueue_struct *frame_done_wq;
> +	void __iomem *fd_base;
> +	const struct mtk_aie_variant *variant;
> +
> +	/* Input Buffer Pointer */
> +	struct imem_buf_info rs_cfg_data;
> +	struct imem_buf_info fd_cfg_data;
> +	struct imem_buf_info yuv2rgb_cfg_data;
> +	/* HW Output Buffer Pointer */
> +	struct imem_buf_info rs_output_hw;
> +	struct imem_buf_info fd_dma_hw;
> +	struct imem_buf_info fd_dma_result_hw;
> +	struct imem_buf_info fd_kernel_hw;
> +	struct imem_buf_info fd_attr_dma_hw;
> +	struct aie_static_info st_info;
> +
> +	struct aie_reg_cfg reg_cfg;
> +
> +	/* AIE 3.X */
> +	/* fld fw buffer */
> +	struct media_device mdev;
> +	struct video_device vfd;
> +	struct aie_clocks aie_clk;
> +	struct v4l2_device v4l2_dev;
> +
> +	/* Lock for V4L2 operations */
> +	struct mutex vfd_lock;
> +	/* Lock for device operations */
> +	struct mutex dev_lock;
> +	/* Lock for performance optimization */
> +	struct mutex fd_lock;
> +	struct imem_buf_info fd_fld_step_data;
> +	struct imem_buf_info fd_fld_out_hw;
> +
> +	int irq;
> +	struct completion fd_job_finished;
> +	struct delayed_work job_timeout_work;
> +
> +	/* DRAM Buffer Size */
> +	unsigned int fd_rs_cfg_size;
> +	unsigned int fd_fd_cfg_size;
> +	unsigned int fd_yuv2rgb_cfg_size;
> +	unsigned int attr_fd_cfg_size;
> +	unsigned int attr_yuv2rgb_cfg_size;
> +
> +	/* HW Output Buffer Size */
> +	unsigned int rs_pym_out_size[PYM_NUM];
> +	unsigned int fd_dma_max_size;
> +	unsigned int fd_dma_rst_max_size;
> +	unsigned int fd_fd_kernel_size;
> +	unsigned int fd_attr_kernel_size;
> +	unsigned int fd_attr_dma_max_size;
> +	unsigned int fd_attr_dma_rst_max_size;
> +	// AIE 3.X
> +	/* fld size */
> +	unsigned int fld_step_size;
> +	unsigned int fld_out_size;
> +
> +	wait_queue_head_t flushing_waitq;
> +	atomic_t num_composing;
> +	struct mtk_aie_req_work req_work;
> +	unsigned int fd_state;
> +	unsigned int fd_mem_size;
> +	u32 fd_stream_count;
> +};
> +
> +struct mtk_aie_ctx {
> +	struct mtk_aie_dev *fd_dev;
> +	struct device *dev;
> +	struct v4l2_fh fh;
> +	struct v4l2_ctrl_handler hdl;
> +	struct v4l2_pix_format_mplane src_fmt;
> +	struct v4l2_meta_format dst_fmt;
> +	struct v4l2_ctrl_aie_init user_init;
> +	struct v4l2_ctrl_aie_param user_param;
> +};
> +
> +void aie_reset(struct mtk_aie_dev *fd);
> +int aie_init(struct mtk_aie_dev *fd, struct v4l2_ctrl_aie_init *user_init);
> +void aie_uninit(struct mtk_aie_dev *fd);
> +void aie_prepare(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg);
> +void aie_execute(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg);
> +void aie_irqhandle(struct mtk_aie_dev *fd);
> +void aie_get_fd_result(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg);
> +void aie_get_attr_result(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg);
> +void aie_get_fld_result(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg);
> +#endif /*__MTK_AIE_H__*/
> diff --git a/drivers/media/platform/mediatek/aie/mtk_aie_53.c b/drivers/media/platform/mediatek/aie/mtk_aie_53.c

Why use 53 as file name? This file seems doing things about v4l2 control.
Maybe replace 53 with a proper name.

> new file mode 100644
> index 000000000000..2a94b10e523d
> --- /dev/null
> +++ b/drivers/media/platform/mediatek/aie/mtk_aie_53.c
> @@ -0,0 +1,1300 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2020 MediaTek Inc.
> + * Author: Fish Wu <fish.wu@xxxxxxxxxxxx>
> + */
> +
> +#include <linux/clk.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/mtk_aie_v4l2_controls.h>
> +#include <media/v4l2-event.h>
> +#include <media/v4l2-ioctl.h>
> +#include <media/v4l2-mem2mem.h>
> +#include <media/videobuf2-dma-contig.h>
> +#include "mtk_aie.h"
> +
> +#define V4L2_CID_MTK_AIE_MAX 2
> +#define Y2R_CFG_SIZE 34
> +#define RS_CFG_SIZE 30
> +#define FD_CFG_SIZE 56
> +
> +static const struct mtk_aie_variant aie_31_drvdata = {
> +	.y2r_cfg_size = Y2R_CFG_SIZE,
> +	.rs_cfg_size = RS_CFG_SIZE,
> +	.fd_cfg_size = FD_CFG_SIZE,
> +};
> +
> +static const struct of_device_id mtk_aie_of_ids[] = {
> +	{
> +		.compatible = "mediatek,mt8188-aie",
> +		.data = &aie_31_drvdata,
> +	},
> +	{ /* end of list */ },
> +};
> +MODULE_DEVICE_TABLE(of, mtk_aie_of_ids);
> +
> +static const struct v4l2_pix_format_mplane mtk_aie_img_fmts[] = {
> +	{
> +		.pixelformat = V4L2_PIX_FMT_NV16M,
> +		.num_planes = 2,
> +	},
> +	{
> +		.pixelformat = V4L2_PIX_FMT_NV61M,
> +		.num_planes = 2,
> +	},
> +	{
> +		.pixelformat = V4L2_PIX_FMT_YUYV,
> +		.num_planes = 1,
> +	},
> +	{
> +		.pixelformat = V4L2_PIX_FMT_YVYU,
> +		.num_planes = 1,
> +	},
> +	{
> +		.pixelformat = V4L2_PIX_FMT_UYVY,
> +		.num_planes = 1,
> +	},
> +	{
> +		.pixelformat = V4L2_PIX_FMT_VYUY,
> +		.num_planes = 1,
> +	},
> +	{
> +		.pixelformat = V4L2_PIX_FMT_GREY,
> +		.num_planes = 1,
> +	},
> +	{
> +		.pixelformat = V4L2_PIX_FMT_NV12M,
> +		.num_planes = 2,
> +	},
> +	{
> +		.pixelformat = V4L2_PIX_FMT_NV12,
> +		.num_planes = 1,
> +	},
> +};
> +
> +#define NUM_FORMATS ARRAY_SIZE(mtk_aie_img_fmts)
> +
> +static inline struct mtk_aie_ctx *fh_to_ctx(struct v4l2_fh *fh)
> +{
> +	return container_of(fh, struct mtk_aie_ctx, fh);
> +}
> +
> +static inline struct mtk_aie_ctx *ctrl_to_ctx(const struct v4l2_ctrl *ctrl)
> +{
> +	return container_of(ctrl->handler, struct mtk_aie_ctx, hdl);
> +}
> +
> +static void mtk_aie_hw_job_finish(struct mtk_aie_dev *fd,
> +				  enum vb2_buffer_state vb_state)
> +{
> +	struct vb2_v4l2_buffer *src_vbuf, *dst_vbuf;
> +	struct mtk_aie_ctx *ctx;
> +
> +	pm_runtime_put(fd->dev);
> +	ctx = v4l2_m2m_get_curr_priv(fd->m2m_dev);
> +	if (!ctx) {
> +		dev_err(fd->dev, "Failed to do v4l2_m2m_get_curr_priv!\n");
> +	} else {
> +		src_vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +		dst_vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +		if (src_vbuf && dst_vbuf)
> +			v4l2_m2m_buf_copy_metadata(src_vbuf, dst_vbuf, true);
> +		if (src_vbuf)
> +			v4l2_m2m_buf_done(src_vbuf, vb_state);
> +		if (dst_vbuf)
> +			v4l2_m2m_buf_done(dst_vbuf, vb_state);
> +		if (src_vbuf && dst_vbuf)
> +			v4l2_m2m_job_finish(fd->m2m_dev, ctx->fh.m2m_ctx);
> +	}
> +	complete_all(&fd->fd_job_finished);
> +}
> +
> +static int mtk_aie_hw_job_exec(struct mtk_aie_dev *fd)
> +{
> +	pm_runtime_get_sync(fd->dev);
> +
> +	reinit_completion(&fd->fd_job_finished);
> +	schedule_delayed_work(&fd->job_timeout_work,
> +			      msecs_to_jiffies(MTK_FD_HW_TIMEOUT_IN_MSEC));
> +
> +	return 0;
> +}
> +
> +static int mtk_aie_vb2_buf_out_validate(struct vb2_buffer *vb)
> +{
> +	struct vb2_v4l2_buffer *v4l2_buf = to_vb2_v4l2_buffer(vb);
> +
> +	if (v4l2_buf->field == V4L2_FIELD_ANY)
> +		v4l2_buf->field = V4L2_FIELD_NONE;
> +	if (v4l2_buf->field != V4L2_FIELD_NONE)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static int mtk_aie_vb2_buf_prepare(struct vb2_buffer *vb)
> +{
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct vb2_queue *vq = vb->vb2_queue;
> +	struct mtk_aie_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct v4l2_pix_format_mplane *pixfmt;
> +	struct device *dev = ctx->dev;
> +
> +	switch (vq->type) {
> +	case V4L2_BUF_TYPE_META_CAPTURE:
> +		if (vb2_plane_size(vb, 0) < ctx->dst_fmt.buffersize) {
> +			dev_err(dev, "meta size %lu is too small\n", vb2_plane_size(vb, 0));
> +			return -EINVAL;
> +		}
> +		vb2_set_plane_payload(vb, 0, ctx->dst_fmt.buffersize);
> +		break;
> +	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> +		pixfmt = &ctx->src_fmt;
> +
> +		if (vbuf->field == V4L2_FIELD_ANY)
> +			vbuf->field = V4L2_FIELD_NONE;
> +
> +		if (vb->num_planes > 2 || vbuf->field != V4L2_FIELD_NONE) {
> +			dev_dbg(dev, "plane %d or field %d not supported\n",
> +				vb->num_planes, vbuf->field);
> +			return -EINVAL;
> +		}
> +
> +		if (vb2_plane_size(vb, 0) < pixfmt->plane_fmt[0].sizeimage) {
> +			dev_dbg(dev, "plane 0 %lu is too small than %x\n",
> +				vb2_plane_size(vb, 0), pixfmt->plane_fmt[0].sizeimage);
> +			return -EINVAL;
> +		}
> +		vb2_set_plane_payload(vb, 0, pixfmt->plane_fmt[0].sizeimage);
> +
> +		if (pixfmt->num_planes == 2 &&
> +		    vb2_plane_size(vb, 1) < pixfmt->plane_fmt[1].sizeimage) {
> +			dev_dbg(dev, "plane 1 %lu is too small than %x\n",
> +				vb2_plane_size(vb, 1), pixfmt->plane_fmt[1].sizeimage);
> +			return -EINVAL;
> +		}
> +		vb2_set_plane_payload(vb, 1, pixfmt->plane_fmt[1].sizeimage);
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
> +static void mtk_aie_vb2_buf_queue(struct vb2_buffer *vb)
> +{
> +	struct mtk_aie_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +
> +	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
> +}
> +
> +static int mtk_aie_vb2_queue_setup(struct vb2_queue *vq,
> +				   unsigned int *num_buffers,
> +				   unsigned int *num_planes,
> +				   unsigned int sizes[],
> +				   struct device *alloc_devs[])
> +{
> +	struct mtk_aie_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct device *dev = ctx->dev;
> +	unsigned int size[2];
> +	unsigned int plane;
> +
> +	switch (vq->type) {
> +	case V4L2_BUF_TYPE_META_CAPTURE:
> +		size[0] = ctx->dst_fmt.buffersize;
> +		size[1] = 0;
> +		break;
> +	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> +		size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> +		size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> +		break;
> +	default:
> +		size[0] = 0;
> +		size[1] = 0;
> +	}
> +
> +	dev_dbg(dev, "vq type = %d, size[0] = %d, size[1] = %d\n",
> +		vq->type, size[0], size[1]);
> +
> +	if (*num_planes > 2)
> +		return -EINVAL;
> +
> +	*num_buffers = clamp_val(*num_buffers, 1, VB2_MAX_FRAME);
> +
> +	if (*num_planes == 0) {
> +		if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
> +			sizes[0] = ctx->dst_fmt.buffersize;
> +			*num_planes = 1;
> +			return 0;
> +		}
> +
> +		*num_planes = ctx->src_fmt.num_planes;
> +		if (*num_planes > 2)
> +			return -EINVAL;
> +		for (plane = 0; plane < *num_planes; plane++)
> +			sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
> +
> +		return 0;
> +	}
> +
> +	return 0;
> +}
> +
> +static int mtk_aie_vb2_start_streaming(struct vb2_queue *vq, unsigned int count)
> +{
> +	struct mtk_aie_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct mtk_aie_dev *fd;
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	fd = ctx->fd_dev;
> +	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE && (++fd->fd_stream_count == 1))
> +		return aie_init(ctx->fd_dev, &ctx->fd_dev->ctx->user_init);
> +
> +	return 0;
> +}
> +
> +static void mtk_aie_job_timeout_work(struct work_struct *work)
> +{
> +	struct mtk_aie_dev *fd =
> +		container_of(work, struct mtk_aie_dev, job_timeout_work.work);
> +
> +	dev_err(fd->dev, "FD Job timeout!");
> +
> +	dev_dbg(fd->dev, "%s result result1: %x, %x, %x", __func__,
> +		readl(fd->fd_base + AIE_RESULT_0_REG),
> +		readl(fd->fd_base + AIE_RESULT_1_REG),
> +		readl(fd->fd_base + AIE_DMA_CTL_REG));
> +
> +	fd->aie_cfg->irq_status = readl(fd->fd_base + AIE_INT_EN_REG);
> +
> +	if (fd->aie_cfg->sel_mode == ATTRIBUTEMODE) {
> +		dev_dbg(fd->dev, "w_idx = %d, r_idx = %d\n",
> +			fd->attr_para->w_idx, fd->attr_para->r_idx);
> +	}
> +
> +	aie_irqhandle(fd);
> +	aie_reset(fd);
> +	atomic_dec(&fd->num_composing);
> +	mtk_aie_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> +	wake_up(&fd->flushing_waitq);
> +}
> +
> +static void mtk_aie_vb2_stop_streaming(struct vb2_queue *vq)
> +{
> +	struct mtk_aie_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> +	struct v4l2_m2m_queue_ctx *queue_ctx;
> +	struct mtk_aie_dev *fd = ctx->fd_dev;
> +	struct vb2_v4l2_buffer *vb = NULL;
> +	int ret;
> +
> +	/* Waiting Job Finish */
> +	ret = wait_for_completion_timeout(&fd->fd_job_finished, msecs_to_jiffies(1000));
> +	if (!ret)
> +		dev_err(fd->dev, "Wait job finish timeout\n");
> +
> +	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
> +		fd->fd_stream_count--;
> +		if (fd->fd_stream_count > 0)
> +			dev_dbg(fd->dev, "Stop: fd_stream_count = %d\n", fd->fd_stream_count);
> +		else
> +			aie_uninit(fd);
> +	}
> +
> +	queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ? &m2m_ctx->out_q_ctx :
> +		&m2m_ctx->cap_q_ctx;
> +	while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> +		v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> +}
> +
> +static void mtk_aie_vb2_request_complete(struct vb2_buffer *vb)
> +{
> +	struct mtk_aie_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> +
> +	v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
> +}
> +
> +static int mtk_aie_querycap(struct file *file, void *fh,
> +			    struct v4l2_capability *cap)
> +{
> +	struct mtk_aie_dev *fd = video_drvdata(file);
> +	struct device *dev = fd->dev;
> +
> +	strscpy(cap->driver, dev_driver_string(dev), sizeof(cap->driver));
> +	strscpy(cap->card, dev_driver_string(dev), sizeof(cap->card));
> +
> +	cap->device_caps = V4L2_CAP_VIDEO_OUTPUT_MPLANE |
> +			   V4L2_CAP_STREAMING | V4L2_CAP_META_CAPTURE;
> +	cap->capabilities = V4L2_CAP_DEVICE_CAPS | cap->device_caps;
> +
> +	return 0;
> +}
> +
> +static int mtk_aie_enum_fmt_out_mp(struct file *file, void *fh,
> +				   struct v4l2_fmtdesc *f)
> +{
> +	if (f->index >= NUM_FORMATS)
> +		return -EINVAL;
> +
> +	f->pixelformat = mtk_aie_img_fmts[f->index].pixelformat;
> +	return 0;
> +}
> +
> +static void mtk_aie_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> +				   const struct v4l2_pix_format_mplane *sfmt)
> +{
> +	dfmt->field = V4L2_FIELD_NONE;
> +	dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> +	dfmt->num_planes = sfmt->num_planes;
> +	dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> +	dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> +	dfmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> +	dfmt->pixelformat = sfmt->pixelformat;
> +
> +	/* Keep user setting as possible */
> +	dfmt->width = clamp(dfmt->width, MTK_FD_OUTPUT_MIN_WIDTH,
> +			    MTK_FD_OUTPUT_MAX_WIDTH);
> +	dfmt->height = clamp(dfmt->height, MTK_FD_OUTPUT_MIN_HEIGHT,
> +			     MTK_FD_OUTPUT_MAX_HEIGHT);
> +
> +	dfmt->plane_fmt[0].bytesperline = ALIGN(dfmt->width, 16);
> +	dfmt->plane_fmt[1].bytesperline = ALIGN(dfmt->width, 16);
> +
> +	dfmt->plane_fmt[0].sizeimage = dfmt->height * dfmt->plane_fmt[0].bytesperline;
> +	dfmt->plane_fmt[1].sizeimage = dfmt->height * dfmt->plane_fmt[1].bytesperline;
> +	if (sfmt->num_planes == 2 && sfmt->pixelformat == V4L2_PIX_FMT_NV12M) {
> +		dfmt->plane_fmt[1].sizeimage /= 2;
> +	} else if (sfmt->pixelformat == V4L2_PIX_FMT_NV12) {
> +		dfmt->plane_fmt[0].sizeimage *= 3;
> +		dfmt->plane_fmt[0].sizeimage /= 2;
> +	}
> +}
> +
> +static const struct v4l2_pix_format_mplane *mtk_aie_find_fmt(u32 format)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < NUM_FORMATS; i++) {
> +		if (mtk_aie_img_fmts[i].pixelformat == format)
> +			return &mtk_aie_img_fmts[i];
> +	}
> +
> +	return NULL;
> +}
> +
> +static int mtk_aie_try_fmt_out_mp(struct file *file, void *fh,
> +				  struct v4l2_format *f)
> +{
> +	struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
> +	const struct v4l2_pix_format_mplane *fmt;
> +
> +	fmt = mtk_aie_find_fmt(pix_mp->pixelformat);
> +	if (!fmt)
> +		fmt = &mtk_aie_img_fmts[0]; /* Get default img fmt */
> +
> +	mtk_aie_fill_pixfmt_mp(pix_mp, fmt);
> +	return 0;
> +}
> +
> +static int mtk_aie_g_fmt_out_mp(struct file *file, void *fh,
> +				struct v4l2_format *f)
> +{
> +	struct mtk_aie_ctx *ctx = fh_to_ctx(fh);
> +
> +	f->fmt.pix_mp = ctx->src_fmt;
> +
> +	return 0;
> +}
> +
> +static int mtk_aie_s_fmt_out_mp(struct file *file, void *fh,
> +				struct v4l2_format *f)
> +{
> +	struct mtk_aie_ctx *ctx = fh_to_ctx(fh);
> +	struct vb2_queue *vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
> +	const struct v4l2_pix_format_mplane *fmt;
> +	struct mtk_aie_dev *fd = ctx->fd_dev;
> +
> +	if (!vq) {
> +		dev_err(fd->dev, "%s vq is NULL!\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	/* Change not allowed if queue is streaming. */
> +	if (vb2_is_streaming(vq))
> +		return -EBUSY;
> +
> +	fmt = mtk_aie_find_fmt(f->fmt.pix_mp.pixelformat);
> +	if (!fmt)
> +		fmt = &mtk_aie_img_fmts[0]; /* Get default img fmt */
> +	else if (&fd->ctx->fh != file->private_data)
> +		return -EBUSY;
> +
> +	fd->ctx = ctx;
> +	mtk_aie_fill_pixfmt_mp(&f->fmt.pix_mp, fmt);
> +	ctx->src_fmt = f->fmt.pix_mp;
> +
> +	return 0;
> +}
> +
> +static int mtk_aie_enum_fmt_meta_cap(struct file *file, void *fh,
> +				     struct v4l2_fmtdesc *f)
> +{
> +	if (f->index)
> +		return -EINVAL;
> +
> +	strscpy(f->description, "Face detection result", sizeof(f->description));
> +	f->pixelformat = V4L2_META_FMT_MTFD_RESULT;
> +	f->flags = 0;
> +
> +	return 0;
> +}
> +
> +static int mtk_aie_g_fmt_meta_cap(struct file *file, void *fh,
> +				  struct v4l2_format *f)
> +{
> +	f->fmt.meta.dataformat = V4L2_META_FMT_MTFD_RESULT;
> +	f->fmt.meta.buffersize = sizeof(struct aie_enq_info);
> +
> +	return 0;
> +}
> +
> +static const struct vb2_ops mtk_aie_vb2_ops = {
> +	.queue_setup = mtk_aie_vb2_queue_setup,
> +	.buf_out_validate = mtk_aie_vb2_buf_out_validate,
> +	.buf_prepare = mtk_aie_vb2_buf_prepare,
> +	.buf_queue = mtk_aie_vb2_buf_queue,
> +	.start_streaming = mtk_aie_vb2_start_streaming,
> +	.stop_streaming = mtk_aie_vb2_stop_streaming,
> +	.wait_prepare = vb2_ops_wait_prepare,
> +	.wait_finish = vb2_ops_wait_finish,
> +	.buf_request_complete = mtk_aie_vb2_request_complete,
> +};
> +
> +static const struct v4l2_ioctl_ops mtk_aie_v4l2_video_out_ioctl_ops = {
> +	.vidioc_querycap = mtk_aie_querycap,
> +	.vidioc_enum_fmt_vid_out = mtk_aie_enum_fmt_out_mp,
> +	.vidioc_g_fmt_vid_out_mplane = mtk_aie_g_fmt_out_mp,
> +	.vidioc_s_fmt_vid_out_mplane = mtk_aie_s_fmt_out_mp,
> +	.vidioc_try_fmt_vid_out_mplane = mtk_aie_try_fmt_out_mp,
> +	.vidioc_enum_fmt_meta_cap = mtk_aie_enum_fmt_meta_cap,
> +	.vidioc_g_fmt_meta_cap = mtk_aie_g_fmt_meta_cap,
> +	.vidioc_s_fmt_meta_cap = mtk_aie_g_fmt_meta_cap,
> +	.vidioc_try_fmt_meta_cap = mtk_aie_g_fmt_meta_cap,
> +	.vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
> +	.vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
> +	.vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
> +	.vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
> +	.vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
> +	.vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
> +	.vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
> +	.vidioc_streamon = v4l2_m2m_ioctl_streamon,
> +	.vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
> +	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
> +	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
> +};
> +
> +static int mtk_aie_queue_init(void *priv, struct vb2_queue *src_vq,
> +			      struct vb2_queue *dst_vq)
> +{
> +	struct mtk_aie_ctx *ctx = priv;
> +	int ret;
> +
> +	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
> +	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> +	src_vq->supports_requests = true;
> +	src_vq->drv_priv = ctx;
> +	src_vq->ops = &mtk_aie_vb2_ops;
> +	src_vq->mem_ops = &vb2_dma_contig_memops;
> +	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> +	src_vq->lock = &ctx->fd_dev->vfd_lock;
> +	src_vq->dev = ctx->fd_dev->v4l2_dev.dev;
> +
> +	ret = vb2_queue_init(src_vq);
> +	if (ret)
> +		return ret;
> +
> +	dst_vq->type = V4L2_BUF_TYPE_META_CAPTURE;
> +	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> +	dst_vq->drv_priv = ctx;
> +	dst_vq->ops = &mtk_aie_vb2_ops;
> +	dst_vq->mem_ops = &vb2_dma_contig_memops;
> +	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> +	dst_vq->lock = &ctx->fd_dev->vfd_lock;
> +	dst_vq->dev = ctx->fd_dev->v4l2_dev.dev;
> +
> +	return vb2_queue_init(dst_vq);
> +}
> +
> +static int mtk_aie_s_ctrl(struct v4l2_ctrl *ctrl)
> +{
> +	struct mtk_aie_ctx *ctx = ctrl_to_ctx(ctrl);
> +	struct v4l2_ctrl_aie_param *p_aie_param;
> +	struct v4l2_ctrl_aie_init *p_aie_init;
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	switch (ctrl->id) {
> +	case V4L2_CID_MTK_AIE_INIT:
> +		p_aie_init = ctrl->p_new.p;
> +		memcpy(&ctx->user_init, p_aie_init, sizeof(struct v4l2_ctrl_aie_init));
> +		break;
> +	case V4L2_CID_MTK_AIE_PARAM:
> +		p_aie_param = ctrl->p_new.p;
> +		memcpy(&ctx->user_param, p_aie_param, sizeof(struct v4l2_ctrl_aie_param));
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_ctrl_ops aie_ctrl_ops = {
> +	.s_ctrl = mtk_aie_s_ctrl,
> +};
> +
> +static void mtk_aie_ctrl_type_op_init(const struct v4l2_ctrl *ctrl,
> +				      u32 from_idx, union v4l2_ctrl_ptr ptr)
> +{
> +	struct v4l2_ctrl_aie_param *p_aie_param;
> +	struct v4l2_ctrl_aie_init *p_aie_init;
> +
> +	switch (ctrl->id) {
> +	case V4L2_CID_MTK_AIE_INIT:
> +		p_aie_init = ptr.p;
> +		memset(p_aie_init, 0, sizeof(struct v4l2_ctrl_aie_init));
> +		break;
> +
> +	case V4L2_CID_MTK_AIE_PARAM:
> +		p_aie_param = ptr.p;
> +		memset(p_aie_param, 0, sizeof(struct v4l2_ctrl_aie_param));
> +		break;
> +
> +	default:
> +		break;
> +	}
> +}
> +
> +static int mtk_aie_ctrl_type_op_validate(const struct v4l2_ctrl *ctrl,
> +					 union v4l2_ctrl_ptr ptr)
> +{
> +	struct mtk_aie_ctx *ctx = ctrl_to_ctx(ctrl);
> +	struct v4l2_ctrl_aie_param *p_aie_param;
> +	struct v4l2_ctrl_aie_init *p_aie_init;
> +	struct mtk_aie_dev *fd;
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	fd = ctx->fd_dev;
> +
> +	switch (ctrl->id) {
> +	case V4L2_CID_MTK_AIE_PARAM:
> +		p_aie_param = ptr.p;
> +
> +		switch (p_aie_param->fd_mode) {
> +		case FDMODE:
> +		case ATTRIBUTEMODE:
> +		case FLDMODE:
> +			break;
> +		default:
> +			dev_err(ctx->dev, "Requested invalied mode: %d\n", p_aie_param->fd_mode);
> +			return -EINVAL;
> +		}
> +
> +		switch (p_aie_param->src_img_fmt) {
> +		case FMT_YUV_2P:
> +		case FMT_YVU_2P:
> +		case FMT_YUYV:
> +		case FMT_YVYU:
> +		case FMT_UYVY:
> +		case FMT_VYUY:
> +		case FMT_MONO:
> +		case FMT_YUV420_2P:
> +		case FMT_YUV420_1P:
> +			break;
> +		default:
> +			dev_err(ctx->dev, "Requested invalied fmt: %d\n", p_aie_param->src_img_fmt);
> +			return -EINVAL;
> +		}
> +
> +		if (p_aie_param->src_img_width > fd->base_para->max_img_rect.width ||
> +		    p_aie_param->src_img_height > fd->base_para->max_img_rect.height ||
> +		    p_aie_param->src_img_width == 0 || p_aie_param->src_img_height == 0) {
> +			dev_err(fd->dev, "Requested invalied Src_WD: %d Src_HT: %d\n",
> +				p_aie_param->src_img_width,
> +				p_aie_param->src_img_height);
> +
> +			dev_err(fd->dev, "Requested invalied MAX_Src_WD: %d MAX_Src_HT: %d\n",
> +				fd->base_para->max_img_rect.width,
> +				fd->base_para->max_img_rect.height);
> +
> +			return -EINVAL;
> +		}
> +
> +		if (p_aie_param->pyramid_base_width > fd->base_para->max_pyramid_rect.width ||
> +		    p_aie_param->pyramid_base_height > fd->base_para->max_pyramid_rect.height ||
> +		    p_aie_param->number_of_pyramid > 3 || p_aie_param->number_of_pyramid <= 0) {
> +			dev_err(fd->dev, "Requested invalied base w: %d h: %d num: %d\n",
> +				p_aie_param->pyramid_base_width, p_aie_param->pyramid_base_height,
> +				p_aie_param->number_of_pyramid);
> +
> +			dev_err(fd->dev, "Requested invalied max w: %d h: %d\n",
> +				fd->base_para->max_pyramid_rect.width,
> +				fd->base_para->max_pyramid_rect.height);
> +
> +			return -EINVAL;
> +		}
> +
> +		break;
> +
> +	case V4L2_CID_MTK_AIE_INIT:
> +		p_aie_init = ptr.p;
> +		if (!p_aie_init->max_img_width || !p_aie_init->max_img_height ||
> +		    !p_aie_init->pyramid_width || !p_aie_init->pyramid_height) {
> +			dev_err(fd->dev,
> +				"Requested invalied max_w: %d max_h: %d, p_w: %d p_h: %d\n",
> +				p_aie_init->max_img_width, p_aie_init->max_img_height,
> +				p_aie_init->pyramid_width, p_aie_init->pyramid_height);
> +
> +			return -EINVAL;
> +		}
> +
> +		break;
> +
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_ctrl_type_ops aie_ctrl_type_ops = {
> +	.equal = v4l2_ctrl_type_op_equal,
> +	.init = mtk_aie_ctrl_type_op_init,
> +	.log = v4l2_ctrl_type_op_log,
> +	.validate = mtk_aie_ctrl_type_op_validate,
> +};
> +
> +static struct v4l2_ctrl_config mtk_aie_controls[] = {
> +	{
> +		.ops = &aie_ctrl_ops,
> +		.type_ops = &aie_ctrl_type_ops,
> +		.id = V4L2_CID_MTK_AIE_INIT,
> +		.name = "FD detection init",
> +		.type = V4L2_CTRL_TYPE_AIE_INIT,
> +		.elem_size = sizeof(struct v4l2_ctrl_aie_init),
> +	}, {
> +		.ops = &aie_ctrl_ops,
> +		.type_ops = &aie_ctrl_type_ops,
> +		.id = V4L2_CID_MTK_AIE_PARAM,
> +		.name = "FD detection param",
> +		.type = V4L2_CTRL_TYPE_AIE_PARAM,
> +		.elem_size = sizeof(struct v4l2_ctrl_aie_param),
> +	},
> +};
> +
> +static int mtk_aie_ctrls_setup(struct mtk_aie_ctx *ctx)
> +{
> +	struct v4l2_ctrl_handler *hdl = &ctx->hdl;
> +	int i;
> +
> +	v4l2_ctrl_handler_init(hdl, V4L2_CID_MTK_AIE_MAX);
> +	if (hdl->error)
> +		return hdl->error;
> +
> +	for (i = 0; i < ARRAY_SIZE(mtk_aie_controls); i++) {
> +		v4l2_ctrl_new_custom(hdl, &mtk_aie_controls[i], ctx);
> +		if (hdl->error) {
> +			v4l2_ctrl_handler_free(hdl);
> +			dev_err(ctx->dev, "Failed to register controls: %d", i);
> +			return hdl->error;
> +		}
> +	}
> +
> +	ctx->fh.ctrl_handler = &ctx->hdl;
> +	v4l2_ctrl_handler_setup(hdl);
> +
> +	return 0;
> +}
> +
> +static void init_ctx_fmt(struct mtk_aie_ctx *ctx)
> +{
> +	struct v4l2_pix_format_mplane *src_fmt = &ctx->src_fmt;
> +	struct v4l2_meta_format *dst_fmt = &ctx->dst_fmt;
> +
> +	/* Initialize M2M source fmt */
> +	src_fmt->width = MTK_FD_OUTPUT_MAX_WIDTH;
> +	src_fmt->height = MTK_FD_OUTPUT_MAX_HEIGHT;
> +	mtk_aie_fill_pixfmt_mp(src_fmt, &mtk_aie_img_fmts[0]);
> +
> +	/* Initialize M2M destination fmt */
> +	dst_fmt->buffersize = sizeof(struct aie_enq_info);
> +	dst_fmt->dataformat = V4L2_META_FMT_MTFD_RESULT;
> +}
> +
> +/*
> + * V4L2 file operations.
> + */
> +static int mtk_vfd_open(struct file *filp)
> +{
> +	struct video_device *vdev = video_devdata(filp);
> +	struct mtk_aie_dev *fd = video_drvdata(filp);
> +	struct mtk_aie_ctx *ctx;
> +	int ret;
> +
> +	mutex_lock(&fd->dev_lock);
> +
> +	if (fd->fd_state & STATE_OPEN) {
> +		dev_err(fd->dev, "VFD is already open, Only one instance is supported\n");
> +		ret =  -EBUSY;
> +		goto err_unlock;
> +	}
> +
> +	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> +	if (!ctx) {
> +		ret =  -ENOMEM;
> +		goto err_unlock;
> +	}
> +
> +	ctx->fd_dev = fd;
> +	ctx->dev = fd->dev;
> +	fd->ctx = ctx;
> +
> +	v4l2_fh_init(&ctx->fh, vdev);
> +	filp->private_data = &ctx->fh;
> +
> +	init_ctx_fmt(ctx);
> +
> +	ret = mtk_aie_ctrls_setup(ctx);
> +	if (ret) {
> +		dev_err(ctx->dev, "Failed to set up controls: %d\n", ret);
> +		goto err_fh_exit;
> +	}
> +	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(fd->m2m_dev, ctx, &mtk_aie_queue_init);
> +	if (IS_ERR(ctx->fh.m2m_ctx)) {
> +		ret = PTR_ERR(ctx->fh.m2m_ctx);
> +		goto err_free_ctrl_handler;
> +	}
> +	v4l2_fh_add(&ctx->fh);
> +	fd->fd_state |= STATE_OPEN;
> +
> +	mutex_unlock(&fd->dev_lock);
> +
> +	return 0;
> +err_free_ctrl_handler:
> +	v4l2_ctrl_handler_free(&ctx->hdl);
> +err_fh_exit:
> +	v4l2_fh_exit(&ctx->fh);
> +	kfree(ctx);
> +err_unlock:
> +	mutex_unlock(&fd->dev_lock);
> +
> +	return ret;
> +}
> +
> +static int mtk_vfd_release(struct file *filp)
> +{
> +	struct mtk_aie_ctx *ctx = container_of(filp->private_data, struct mtk_aie_ctx, fh);
> +	struct mtk_aie_dev *fd = video_drvdata(filp);
> +
> +	mutex_lock(&fd->dev_lock);
> +
> +	fd->fd_state &= ~STATE_OPEN;
> +
> +	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
> +	v4l2_ctrl_handler_free(&ctx->hdl);
> +	v4l2_fh_del(&ctx->fh);
> +	v4l2_fh_exit(&ctx->fh);
> +
> +	kfree(ctx);
> +
> +	mutex_unlock(&fd->dev_lock);
> +
> +	return 0;
> +}
> +
> +static __poll_t mtk_vfd_fop_poll(struct file *file, poll_table *wait)
> +{
> +	int ret;
> +
> +	struct mtk_aie_ctx *ctx = container_of(file->private_data, struct mtk_aie_ctx, fh);
> +	struct mtk_aie_dev *fd = ctx->fd_dev;
> +
> +	if (fd->fd_state & STATE_INIT) {
> +		/* Waiting Job Finsh */
> +		ret = wait_for_completion_timeout(&fd->fd_job_finished, msecs_to_jiffies(1000));
> +		if (!ret) {
> +			dev_err(ctx->dev, "Wait job finish timeout from poll\n");
> +			return EPOLLERR;
> +		}
> +	}
> +
> +	return v4l2_m2m_fop_poll(file, wait);
> +}
> +
> +static const struct v4l2_file_operations fd_video_fops = {
> +	.owner = THIS_MODULE,
> +	.open = mtk_vfd_open,
> +	.release = mtk_vfd_release,
> +	.poll = mtk_vfd_fop_poll,
> +	.unlocked_ioctl = video_ioctl2,
> +	.mmap = v4l2_m2m_fop_mmap,
> +};
> +
> +static int mtk_aie_job_ready(void *priv)
> +{
> +	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> +	struct mtk_aie_ctx *ctx = priv;
> +	struct mtk_aie_dev *fd = ctx->fd_dev;
> +	struct fd_buffer src_img[2] = {};
> +	void *plane_vaddr;
> +
> +	if (!ctx->fh.m2m_ctx) {
> +		dev_err(fd->dev, "Memory-to-memory context is NULL\n");
> +		return -1;
> +	}
> +
> +	if (!(fd->fd_state & STATE_OPEN)) {
> +		dev_err(fd->dev, "Job ready with device closed\n");
> +		return -1;
> +	}
> +
> +	mutex_lock(&fd->fd_lock);
> +
> +	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> +
> +	if (!src_buf || !dst_buf) {
> +		dev_err(fd->dev, "src or dst buf is NULL\n");
> +		mutex_unlock(&fd->fd_lock);
> +		return -1;
> +	}
> +
> +	if (!(fd->fd_state & STATE_INIT)) {
> +		dev_err(fd->dev, "%s Wrong fd state: %d\n", __func__, fd->fd_state);
> +		mutex_unlock(&fd->fd_lock);
> +		return -1;
> +	}
> +
> +	plane_vaddr = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
> +	if (!plane_vaddr) {
> +		dev_err(fd->dev, "Failed to get plane virtual address\n");
> +		mutex_unlock(&fd->fd_lock);
> +		return -1;
> +	}
> +
> +	v4l2_ctrl_request_setup(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
> +
> +	fd->aie_cfg = (struct aie_enq_info *)plane_vaddr;
> +	fd->aie_cfg->fld_face_num = ctx->user_param.fld_face_num;
> +
> +	memset(fd->aie_cfg, 0, sizeof(struct aie_enq_info));
> +	memcpy(fd->aie_cfg, &ctx->user_param, sizeof(struct v4l2_ctrl_aie_param));
> +	memcpy(fd->aie_cfg->fld_input, ctx->user_param.fld_input,
> +	       FLD_MAX_FRAME * sizeof(struct fld_crop_rip_rop));
> +
> +	src_img[0].dma_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
> +
> +	if (ctx->src_fmt.num_planes == 2) {
> +		src_img[1].dma_addr =
> +			vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 1);
> +	}
> +
> +	if ((fd->aie_cfg->sel_mode == FDMODE || fd->aie_cfg->sel_mode == ATTRIBUTEMODE) &&
> +	    fd->aie_cfg->src_img_fmt == FMT_YUV420_1P) {
> +		src_img[1].dma_addr = src_img[0].dma_addr + ctx->user_param.src_img_stride *
> +			ctx->user_param.src_img_height;
> +	}
> +
> +	fd->aie_cfg->src_img_addr = src_img[0].dma_addr;
> +	fd->aie_cfg->src_img_addr_uv = src_img[1].dma_addr;
> +
> +	aie_prepare(fd, fd->aie_cfg);
> +
> +	mutex_unlock(&fd->fd_lock);
> +
> +	if (src_buf) {
> +		/* Complete request controls if any */
> +		v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
> +	}
> +
> +	return 0;
> +}
> +
> +static void mtk_aie_device_run(void *priv)
> +{
> +	struct mtk_aie_ctx *ctx = priv;
> +	struct mtk_aie_dev *fd = ctx->fd_dev;
> +	int ret;
> +
> +	ret = mtk_aie_job_ready(priv);
> +	if (ret == -1) {
> +		dev_err(fd->dev, "Failed to run job ready\n");
> +		return;
> +	}
> +
> +	atomic_inc(&fd->num_composing);
> +	mtk_aie_hw_job_exec(fd);
> +	aie_execute(fd, fd->aie_cfg);
> +}
> +
> +static struct v4l2_m2m_ops fd_m2m_ops = {
> +	.device_run = mtk_aie_device_run,
> +};
> +
> +static const struct media_device_ops fd_m2m_media_ops = {
> +	.req_validate = vb2_request_validate,
> +	.req_queue = v4l2_m2m_request_queue,
> +};
> +
> +static int mtk_aie_video_device_register(struct mtk_aie_dev *fd)
> +{
> +	struct v4l2_m2m_dev *m2m_dev = fd->m2m_dev;
> +	struct video_device *vfd = &fd->vfd;
> +	struct device *dev = fd->dev;
> +	int ret;
> +
> +	vfd->fops = &fd_video_fops;
> +	vfd->release = video_device_release_empty;
> +	vfd->lock = &fd->vfd_lock;
> +	vfd->v4l2_dev = &fd->v4l2_dev;
> +	vfd->vfl_dir = VFL_DIR_M2M;
> +	vfd->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_OUTPUT_MPLANE |
> +			   V4L2_CAP_META_CAPTURE;
> +	vfd->ioctl_ops = &mtk_aie_v4l2_video_out_ioctl_ops;
> +
> +	strscpy(vfd->name, dev_driver_string(dev), sizeof(vfd->name));
> +
> +	video_set_drvdata(vfd, fd);
> +
> +	ret = video_register_device(vfd, VFL_TYPE_VIDEO, 0);
> +	if (ret) {
> +		dev_err(dev, "Failed to register video device\n");
> +		return ret;
> +	}
> +
> +	ret = v4l2_m2m_register_media_controller(m2m_dev, vfd, MEDIA_ENT_F_PROC_VIDEO_STATISTICS);
> +	if (ret) {
> +		dev_err(dev, "Failed to init mem2mem media controller\n");
> +		video_unregister_device(vfd);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int mtk_aie_dev_v4l2_init(struct mtk_aie_dev *fd)
> +{
> +	struct media_device *mdev = &fd->mdev;
> +	struct device *dev = fd->dev;
> +	int ret;
> +
> +	ret = v4l2_device_register(dev, &fd->v4l2_dev);
> +	if (ret) {
> +		dev_err(dev, "Failed to register v4l2 device\n");
> +		return ret;
> +	}
> +
> +	fd->m2m_dev = v4l2_m2m_init(&fd_m2m_ops);
> +	if (IS_ERR(fd->m2m_dev)) {
> +		dev_err(dev, "Failed to init mem2mem device\n");
> +		ret = PTR_ERR(fd->m2m_dev);
> +		goto err_unreg_v4l2_dev;
> +	}
> +
> +	mdev->dev = dev;
> +	strscpy(mdev->model, dev_driver_string(dev), sizeof(mdev->model));
> +	media_device_init(mdev);
> +	mdev->ops = &fd_m2m_media_ops;
> +	fd->v4l2_dev.mdev = mdev;
> +
> +	ret = mtk_aie_video_device_register(fd);
> +	if (ret)
> +		goto err_cleanup_mdev;
> +
> +	ret = media_device_register(mdev);
> +	if (ret) {
> +		dev_err(dev, "Failed to register mem2mem media device\n");
> +		goto err_unreg_vdev;
> +	}
> +	return 0;
> +
> +err_unreg_vdev:
> +	v4l2_m2m_unregister_media_controller(fd->m2m_dev);
> +	video_unregister_device(&fd->vfd);
> +err_cleanup_mdev:
> +	media_device_cleanup(mdev);
> +	v4l2_m2m_release(fd->m2m_dev);
> +err_unreg_v4l2_dev:
> +	v4l2_device_unregister(&fd->v4l2_dev);
> +	return ret;
> +}
> +
> +static void mtk_aie_video_device_unregister(struct mtk_aie_dev *fd)
> +{
> +	v4l2_m2m_unregister_media_controller(fd->m2m_dev);
> +	video_unregister_device(&fd->vfd);
> +	media_device_cleanup(&fd->mdev);
> +	v4l2_m2m_release(fd->m2m_dev);
> +	v4l2_device_unregister(&fd->v4l2_dev);
> +}
> +
> +static void mtk_aie_frame_done_worker(struct work_struct *work)
> +{
> +	struct mtk_aie_req_work *req_work = (struct mtk_aie_req_work *)work;
> +	struct mtk_aie_dev *fd = (struct mtk_aie_dev *)req_work->fd_dev;
> +
> +	if (fd->reg_cfg.fd_mode == FDMODE) {
> +		fd->reg_cfg.hw_result = readl(fd->fd_base + AIE_RESULT_0_REG);
> +		fd->reg_cfg.hw_result1 = readl(fd->fd_base + AIE_RESULT_1_REG);
> +	}
> +
> +	mutex_lock(&fd->fd_lock);
> +
> +	switch (fd->aie_cfg->sel_mode) {
> +	case FDMODE:
> +		aie_get_fd_result(fd, fd->aie_cfg);
> +		break;
> +	case ATTRIBUTEMODE:
> +		aie_get_attr_result(fd, fd->aie_cfg);
> +		break;
> +	case FLDMODE:
> +		aie_get_fld_result(fd, fd->aie_cfg);
> +		break;
> +	default:
> +		dev_dbg(fd->dev, "Wrong sel_mode\n");
> +		break;
> +	}
> +
> +	mutex_unlock(&fd->fd_lock);
> +
> +	if (!cancel_delayed_work(&fd->job_timeout_work))
> +		return;
> +
> +	atomic_dec(&fd->num_composing);
> +	mtk_aie_hw_job_finish(fd, VB2_BUF_STATE_DONE);
> +	wake_up(&fd->flushing_waitq);
> +}
> +
> +static int mtk_aie_resource_init(struct mtk_aie_dev *fd)
> +{
> +	mutex_init(&fd->vfd_lock);
> +	mutex_init(&fd->dev_lock);
> +	mutex_init(&fd->fd_lock);
> +
> +	init_completion(&fd->fd_job_finished);
> +	complete_all(&fd->fd_job_finished);
> +	INIT_DELAYED_WORK(&fd->job_timeout_work, mtk_aie_job_timeout_work);
> +	init_waitqueue_head(&fd->flushing_waitq);
> +	atomic_set(&fd->num_composing, 0);
> +	fd->fd_stream_count = 0;
> +
> +	fd->frame_done_wq = alloc_ordered_workqueue(dev_name(fd->dev),
> +						    WQ_HIGHPRI | WQ_FREEZABLE);
> +	if (!fd->frame_done_wq) {
> +		dev_err(fd->dev, "failed to alloc frame_done workqueue\n");
> +		mutex_destroy(&fd->vfd_lock);
> +		mutex_destroy(&fd->dev_lock);
> +		mutex_destroy(&fd->fd_lock);
> +		return -ENOMEM;
> +	}
> +
> +	INIT_WORK(&fd->req_work.work, mtk_aie_frame_done_worker);
> +	fd->req_work.fd_dev = fd;
> +
> +	return 0;
> +}
> +
> +static void mtk_aie_resource_free(struct platform_device *pdev)
> +{
> +	struct mtk_aie_dev *fd = dev_get_drvdata(&pdev->dev);
> +
> +	if (fd->frame_done_wq)
> +		destroy_workqueue(fd->frame_done_wq);
> +	fd->frame_done_wq = NULL;
> +	mutex_destroy(&fd->vfd_lock);
> +	mutex_destroy(&fd->dev_lock);
> +	mutex_destroy(&fd->fd_lock);
> +}
> +
> +static irqreturn_t mtk_aie_irq(int irq, void *data)
> +{
> +	struct mtk_aie_dev *fd = (struct mtk_aie_dev *)data;
> +
> +	aie_irqhandle(fd);
> +
> +	queue_work(fd->frame_done_wq, &fd->req_work.work);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static int mtk_aie_probe(struct platform_device *pdev)
> +{
> +	struct mtk_aie_dev *fd;
> +	struct device *dev = &pdev->dev;
> +	const struct mtk_aie_variant *driver_data = NULL;
> +	const struct of_device_id *match = NULL;
> +	int irq;
> +	int ret;
> +
> +	static struct clk_bulk_data aie_clks[] = {
> +		{ .id = "img_ipe" },
> +		{ .id = "ipe_fdvt" },
> +		{ .id = "ipe_top" },
> +		{ .id = "ipe_smi_larb12" },
> +	};
> +
> +	fd = devm_kzalloc(&pdev->dev, sizeof(*fd), GFP_KERNEL);
> +	if (!fd)
> +		return -ENOMEM;
> +
> +	match = of_match_node(mtk_aie_of_ids, dev->of_node);
> +	if (match)
> +		driver_data = (const struct mtk_aie_variant *)match->data;
> +
> +	fd->variant = driver_data;
> +	if (!fd->variant)
> +		return -ENODEV;
> +
> +	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(34));
> +	if (ret)
> +		return dev_err_probe(dev, ret, "Cannot set Coherent DMA mask\n");
> +
> +	dev_set_drvdata(dev, fd);
> +	fd->dev = dev;
> +
> +	irq = platform_get_irq(pdev, 0);
> +	if (irq < 0)
> +		return dev_err_probe(dev, irq, "Failed to get IRQ\n");
> +
> +	ret = devm_request_irq(dev, irq, mtk_aie_irq, IRQF_SHARED,
> +			       dev_driver_string(dev), fd);
> +	if (ret)
> +		return dev_err_probe(dev, ret, "Failed to request irq\n");
> +
> +	fd->irq = irq;
> +	fd->fd_base = devm_platform_ioremap_resource(pdev, 0);
> +	if (IS_ERR(fd->fd_base))
> +		return dev_err_probe(dev, -EINVAL, "Failed to get fd reg base\n");
> +
> +	fd->aie_clk.clk_num = ARRAY_SIZE(aie_clks);
> +	fd->aie_clk.clks = aie_clks;
> +	ret = devm_clk_bulk_get(&pdev->dev, fd->aie_clk.clk_num, fd->aie_clk.clks);
> +	if (ret)
> +		return dev_err_probe(dev, ret, "Failed to get raw clock\n");
> +
> +	ret = mtk_aie_resource_init(fd);
> +	if (ret) {
> +		mtk_aie_resource_free(pdev);
> +		return ret;
> +	}
> +	pm_runtime_enable(dev);
> +	ret = mtk_aie_dev_v4l2_init(fd);
> +	if (ret) {
> +		pm_runtime_disable(&pdev->dev);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static void mtk_aie_remove(struct platform_device *pdev)
> +{
> +	struct mtk_aie_dev *fd = dev_get_drvdata(&pdev->dev);
> +
> +	mtk_aie_video_device_unregister(fd);
> +	pm_runtime_disable(&pdev->dev);
> +	mtk_aie_resource_free(pdev);
> +}
> +
> +static int __maybe_unused mtk_aie_suspend(struct device *dev)
> +{
> +	struct mtk_aie_dev *fd = dev_get_drvdata(dev);
> +	int ret, num;
> +
> +	if (pm_runtime_suspended(dev))
> +		return 0;
> +
> +	num = atomic_read(&fd->num_composing);

In wait_event_timeout(), it would read this, so this is redundant. Drop it.

> +
> +	ret = wait_event_timeout(fd->flushing_waitq,
> +				 !(num = atomic_read(&fd->num_composing)),
> +				 msecs_to_jiffies(MTK_FD_HW_TIMEOUT_IN_MSEC));
> +	if (!ret && num) {
> +		dev_dbg(dev, "%s: flushing aie job timeout num %d\n",
> +			__func__, num);
> +
> +		return -EBUSY;
> +	}
> +
> +	ret = pm_runtime_force_suspend(dev);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int __maybe_unused mtk_aie_resume(struct device *dev)
> +{
> +	int ret;
> +
> +	if (pm_runtime_suspended(dev)) {
> +		dev_dbg(dev, "%s: pm_runtime_suspended is true, no action\n", __func__);
> +		return 0;
> +	}
> +
> +	ret = pm_runtime_force_resume(dev);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int __maybe_unused mtk_aie_runtime_suspend(struct device *dev)
> +{
> +	struct mtk_aie_dev *fd = dev_get_drvdata(dev);
> +
> +	clk_bulk_disable_unprepare(fd->aie_clk.clk_num, fd->aie_clk.clks);
> +
> +	return 0;
> +}
> +
> +static int __maybe_unused mtk_aie_runtime_resume(struct device *dev)
> +{
> +	struct mtk_aie_dev *fd = dev_get_drvdata(dev);
> +	int ret;
> +
> +	ret = clk_bulk_prepare_enable(fd->aie_clk.clk_num, fd->aie_clk.clks);
> +	if (ret) {
> +		dev_err(dev, "Failed to enable clock: %d\n", ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct dev_pm_ops mtk_aie_pm_ops = {
> +	SET_SYSTEM_SLEEP_PM_OPS(mtk_aie_suspend, mtk_aie_resume)
> +	SET_RUNTIME_PM_OPS(mtk_aie_runtime_suspend, mtk_aie_runtime_resume, NULL)
> +};
> +
> +static struct platform_driver mtk_aie_driver = {
> +	.probe = mtk_aie_probe,
> +	.remove = mtk_aie_remove,
> +	.driver = {
> +		.name = "mtk-aie-5.3",
> +		.of_match_table = mtk_aie_of_ids,
> +		.pm = pm_ptr(&mtk_aie_pm_ops),
> +	}
> +};
> +
> +module_platform_driver(mtk_aie_driver);
> +MODULE_AUTHOR("Bo Kong <bo.kong@xxxxxxxxxxxx>");
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("MediaTek AIE driver");
> diff --git a/drivers/media/platform/mediatek/aie/mtk_aie_drv.c b/drivers/media/platform/mediatek/aie/mtk_aie_drv.c
> new file mode 100644
> index 000000000000..d1221e442ea3
> --- /dev/null
> +++ b/drivers/media/platform/mediatek/aie/mtk_aie_drv.c
> @@ -0,0 +1,2309 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2020 MediaTek Inc.
> + * Author: Fish Wu <fish.wu@xxxxxxxxxxxx>
> + */
> +
> +#include <linux/firmware.h>
> +#include <linux/mtk_aie_v4l2_controls.h>
> +#include "mtk_aie.h"
> +
> +static u32 aie_cmb_u16(u16 low, u16 high)
> +{
> +	return ((u32)high << 16) | low;
> +}
> +
> +static u32 aie_cmb_stride(u16 low, u16 high)
> +{
> +	return ((u32)high << 16) | (low & 0x000F);
> +}
> +
> +static inline u16 dif_x(const struct aie_enq_info *aie_cfg)
> +{
> +	return (u16)(aie_cfg->src_roi.x2 - aie_cfg->src_roi.x1);
> +}
> +
> +static inline u16 dif_y(const struct aie_enq_info *aie_cfg)
> +{
> +	return (u16)(aie_cfg->src_roi.y2 - aie_cfg->src_roi.y1);
> +}
> +
> +static inline void set_cmb_cfg(u32 *tbl, u16 index, u16 stride)
> +{
> +	tbl[index] = aie_cmb_u16(tbl[index], stride);
> +}
> +
> +static inline void set_cmbst_cfg(u32 *tbl, u16 index, u16 stride)
> +{
> +	tbl[index] = aie_cmb_stride(tbl[index], stride);
> +}
> +
> +static int aie_imem_alloc(struct mtk_aie_dev *fd, u32 size,
> +			  struct imem_buf_info *bufinfo)
> +{
> +	struct device *dev = fd->dev;
> +	void *va;
> +	dma_addr_t dma_handle;
> +
> +	if (size == 0) {
> +		dev_dbg(fd->dev, "%s: size(%d)\n", __func__, size);
> +		return -EINVAL;
> +	}
> +
> +	fd->fd_mem_size += size;
> +
> +	va = dma_alloc_coherent(dev, size, &dma_handle, GFP_KERNEL);
> +	if (!va || dma_handle == 0)
> +		return -ENOMEM;
> +
> +	bufinfo->va = va;
> +	bufinfo->pa = dma_handle;
> +	bufinfo->size = size;
> +
> +	dev_dbg(fd->dev, "%s: vAddr(0x%p) size(%d)\n", __func__, va,  size);
> +
> +	return 0;
> +}
> +
> +static void aie_imem_free(struct mtk_aie_dev *fd, struct imem_buf_info *bufinfo)
> +{
> +	dev_dbg(fd->dev, "%s: vAddr(0x%p) size(%d)\n", __func__, bufinfo->va, bufinfo->size);
> +
> +	if (bufinfo->va)
> +		dma_free_coherent(fd->dev, bufinfo->size, bufinfo->va, bufinfo->pa);
> +}
> +
> +static void aie_update_table(struct mtk_aie_dev *fd, u16 pym_width,
> +			     u16 pym_height, bool binit)
> +{
> +	int i;
> +	struct aie_static_info *pstv = &fd->st_info;
> +
> +	pstv->inf_elm[PYM2_START_LOOP].img_width = pym_width / 4;
> +	pstv->inf_elm[PYM2_START_LOOP].img_height = pym_height / 4;
> +
> +	pstv->inf_elm[PYM1_START_LOOP].img_width = pym_width / 2;
> +	pstv->inf_elm[PYM1_START_LOOP].img_height = pym_height / 2;
> +
> +	pstv->inf_elm[PYM0_START_LOOP].img_width = pym_width;
> +	pstv->inf_elm[PYM0_START_LOOP].img_height = pym_height;
> +
> +	for (i = 0; i < FD_LOOP_NUM; i++) {
> +		if (i != PYM2_START_LOOP && i != PYM1_START_LOOP && i != PYM0_START_LOOP) {
> +			if (fd_out_stride2_in[i] == 1) {
> +				pstv->inf_elm[i].img_width =
> +					pstv->inf_elm[i - 1].stride2_out_width;

Replace pstv->inf_elm[i - 1].stride2_out_width with

((pstv->inf_elm[i - 1].out_width - 1) / 2 + 1) * out_2size[i];

So you don't need stride2_out_width any more. Drop stride2_out_width.

> +				pstv->inf_elm[i].img_height =
> +					pstv->inf_elm[i - 1].stride2_out_height;

Replace pstv->inf_elm[i - 1].stride2_out_height with

((pstv->inf_elm[i].out_height - 1) / 2 + 1) * out_2size[i];

So you don't need stride2_out_height any more. Drop stride2_out_height.

> +			} else {
> +				pstv->inf_elm[i].img_width = pstv->inf_elm[i - 1].out_width;
> +				pstv->inf_elm[i].img_height = pstv->inf_elm[i - 1].out_height;
> +			}
> +		}
> +
> +		if (fd_maxpool[i] == 1 && fd_stride[i] == 1) {

When fd_maxpool[i] == 1, it imply that fd_stride[i] == 1, so you just need to check

if (fd_maxpool[i] == 1)

> +			pstv->inf_elm[i].out_width =
> +				(pstv->inf_elm[i].img_width - 1) / (2 * fd_maxpool[i]) + 1;

This could be reduced to

pstv->inf_elm[i].out_width = pstv->inf_elm[i].img_width - 1) / 2 + 1;

> +			pstv->inf_elm[i].out_height =
> +				(pstv->inf_elm[i].img_height - 1) / (2 * fd_maxpool[i]) + 1;
> +		} else {
> +			pstv->inf_elm[i].out_width = (pstv->inf_elm[i].img_width - 1) /
> +					(fd_stride[i] + 2 * fd_maxpool[i]) + 1;

In the else part, fd_maxpool[i] = 0. So this could be reduced to

pstv->inf_elm[i].out_width = (pstv->inf_elm[i].img_width - 1) /
			     fd_stride[i] + 1;

> +			pstv->inf_elm[i].out_height = (pstv->inf_elm[i].img_height - 1) /
> +					(fd_stride[i] + 2 * fd_maxpool[i]) + 1;
> +		}
> +
> +		pstv->inf_elm[i].stride2_out_width =
> +			((pstv->inf_elm[i].out_width - 1) / 2 + 1) * out_2size[i];
> +		pstv->inf_elm[i].stride2_out_height =
> +			((pstv->inf_elm[i].out_height - 1) / 2 + 1) * out_2size[i];
> +
> +		if (outlayer[i] == 1) {
> +			pstv->inf_elm[i].out_xsize_plus_1 =
> +				pstv->inf_elm[i].out_width * out_ch_pack[i] * 2;
> +			pstv->inf_elm[i].out_stride =
> +				round_up(pstv->inf_elm[i].out_xsize_plus_1 * ANCHOR_EN_NUM, 16);
> +			pstv->inf_elm[i].out_xsize_plus_1_stride2 =
> +				((pstv->inf_elm[i].out_width - 1) / 2 + 1) *
> +				out_ch_pack[i] * 2 * out_2size[i];
> +		} else {
> +			pstv->inf_elm[i].out_xsize_plus_1 =
> +				pstv->inf_elm[i].out_width * out_ch_pack[i];
> +			pstv->inf_elm[i].out_stride =
> +				round_up(pstv->inf_elm[i].out_xsize_plus_1, 16);
> +			pstv->inf_elm[i].out_xsize_plus_1_stride2 =
> +				((pstv->inf_elm[i].out_width - 1) / 2 + 1) *
> +				out_ch_pack[i] * out_2size[i];
> +		}
> +
> +		pstv->inf_elm[i].out_stride_stride2 =
> +				round_up(pstv->inf_elm[i].out_xsize_plus_1_stride2, 16);
> +
> +		if (out_2size[i] == 1)
> +			pstv->inf_elm[i].out_ysize_plus_1_stride2 =
> +				(pstv->inf_elm[i].out_height - 1) / 2 + 1;
> +		else
> +			pstv->inf_elm[i].out_ysize_plus_1_stride2 = pstv->inf_elm[i].out_height;
> +
> +		if (binit) {
> +			if (fd_wdma_en[i][0]) {

fd_wdma_en[i][0] is always 1, so this checking is redundant. Drop it.

> +				if (i == RPN2_LOOP_NUM || i == RPN1_LOOP_NUM || i == RPN0_LOOP_NUM)
> +					pstv->inf_elm[i].fd_wdma_size[0] = RESULT_SIZE;
> +				else
> +					pstv->inf_elm[i].fd_wdma_size[0] =
> +						pstv->inf_elm[i].out_height *
> +						pstv->inf_elm[i].out_stride;
> +			}
> +
> +			if (outlayer[i] == 1) {
> +				if (fd_wdma_en[i][1])
> +					pstv->inf_elm[i].fd_wdma_size[1] =
> +						pstv->inf_elm[i].fd_wdma_size[0];
> +				if (fd_wdma_en[i][2])
> +					pstv->inf_elm[i].fd_wdma_size[2] =
> +						pstv->inf_elm[i].fd_wdma_size[0];
> +				if (fd_wdma_en[i][3])
> +					pstv->inf_elm[i].fd_wdma_size[3] =
> +						pstv->inf_elm[i].fd_wdma_size[0];
> +			} else if (i == RPN2_LOOP_NUM || i == RPN1_LOOP_NUM || i == RPN0_LOOP_NUM) {
> +				pstv->inf_elm[i].fd_wdma_size[0] = RESULT_SIZE;

This is already done before. So just do nothing here.

> +			} else {
> +				if (fd_wdma_en[i][1])
> +					pstv->inf_elm[i].fd_wdma_size[1] =
> +						pstv->inf_elm[i].out_height *
> +						pstv->inf_elm[i].out_stride;
> +				if (fd_wdma_en[i][2])
> +					pstv->inf_elm[i].fd_wdma_size[2] =
> +						pstv->inf_elm[i].out_ysize_plus_1_stride2 *
> +						pstv->inf_elm[i].out_stride_stride2;
> +				if (fd_wdma_en[i][3])
> +					pstv->inf_elm[i].fd_wdma_size[3] =
> +						pstv->inf_elm[i].out_ysize_plus_1_stride2 *
> +						pstv->inf_elm[i].out_stride_stride2;
> +				}
> +		}
> +
> +		if (in_ch_pack[i] == 1)
> +			pstv->inf_elm[i].input_xsize_plus_1 =
> +				round_up(pstv->inf_elm[i].img_width, 8);
> +		else
> +			pstv->inf_elm[i].input_xsize_plus_1 =
> +				pstv->inf_elm[i].img_width * in_ch_pack[i];
> +	}
> +}
> +
> +static void aie_update_buf_params(struct mtk_aie_dev *fd, u16 max_img_width,
> +				  u16 max_img_height)
> +{
> +	struct aie_static_info *pstv = &fd->st_info;
> +	u8 i, j;
> +
> +	fd->base_para->max_img_rect.width = max_img_width;
> +	fd->base_para->max_img_rect.height = max_img_height;
> +	fd->fd_dma_max_size = 0;
> +	fd->fd_dma_rst_max_size = 0;
> +	fd->fd_fd_kernel_size = 0;
> +	fd->fd_attr_kernel_size = 0;
> +	fd->fd_attr_dma_max_size = 0;
> +	fd->fd_attr_dma_rst_max_size = 0;
> +
> +	/* FDMODE Dram Buffer Size */
> +	fd->fd_rs_cfg_size = 4 * fd->variant->rs_cfg_size * 2;
> +	fd->fd_fd_cfg_size = 4 * fd->variant->fd_cfg_size * FD_LOOP_NUM;
> +	fd->fd_yuv2rgb_cfg_size = 4 * fd->variant->y2r_cfg_size;
> +
> +	/* ATTRMODE Dram Buffer Size */
> +	fd->attr_fd_cfg_size = 4 * fd->variant->fd_cfg_size * ATTR_LOOP_NUM;
> +	fd->attr_yuv2rgb_cfg_size = 4 * fd->variant->y2r_cfg_size;
> +
> +	/* HW Output Buffer Size */
> +	fd->rs_pym_out_size[0] = fd->base_para->max_pyramid_rect.width *
> +				 fd->base_para->max_pyramid_rect.height;
> +	fd->rs_pym_out_size[1] = fd->rs_pym_out_size[0] / 2;
> +	fd->rs_pym_out_size[2] = fd->rs_pym_out_size[0] / 4;
> +
> +	/* FDMODE Dram Buffer Size */
> +	for (i = 0; i < FD_LOOP_NUM; i++) {
> +		for (j = 0; j < OUTPUT_WDMA_WRA_NUM; j++) {
> +			if (fd_wdma_en[i][j]) {
> +				if ((i == RPN2_LOOP_NUM || i == RPN1_LOOP_NUM ||
> +				     i == RPN0_LOOP_NUM) && j == 0)
> +					fd->fd_dma_rst_max_size += pstv->inf_elm[i].fd_wdma_size[j];
> +				else
> +					fd->fd_dma_max_size += pstv->inf_elm[i].fd_wdma_size[j];
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < FD_LOOP_NUM; i++) {
> +		for (j = 0; j < KERNEL_RDMA_RA_NUM; j++) {
> +			if (fd_ker_rdma_size[i][j])
> +				fd->fd_fd_kernel_size += fd_ker_rdma_size[i][j];
> +		}
> +	}
> +
> +	/* ATTRMODE Dram Buffer Size */
> +	for (i = 0; i < ATTR_LOOP_NUM; i++) {
> +		for (j = 0; j < OUTPUT_WDMA_WRA_NUM; j++) {
> +			if (attr_wdma_en[i][j]) {
> +				if ((i == AGE_OUT_RGS || i == GENDER_OUT_RGS ||
> +				     i == INDIAN_OUT_RGS || i == RACE_OUT_RGS) && j == 0)
> +					fd->fd_attr_dma_rst_max_size +=
> +						ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
> +				else
> +					fd->fd_attr_dma_max_size += attr_wdma_size[i][j];
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < ATTR_LOOP_NUM; i++) {
> +		for (j = 0; j < KERNEL_RDMA_RA_NUM; j++)
> +			fd->fd_attr_kernel_size += attr_ker_rdma_size[i][j];
> +	}
> +
> +	/* FD Pose secure result output buffer: result size * 3 loops */
> +	fd->fd_dma_rst_max_size += RESULT_SIZE * 3;
> +
> +	/* FLD size */
> +	fd->fld_step_size = 0;
> +	for (i = 0; i < FLD_STEP_NUM; i++)
> +		for (j = 0; j < FLD_MAX_FRAME; j++)
> +			fd->fld_step_size += fld_step_align_size[i][j];
> +
> +	fd->fld_out_size = FLD_OUTPUT_SIZE * FLD_MAX_FRAME;
> +}
> +
> +static int aie_alloc_dram_buf(struct mtk_aie_dev *fd)
> +{
> +	u32 alloc_size;
> +	int ret;
> +	u8 i;
> +
> +	/* RS DRAM */
> +	alloc_size = fd->fd_rs_cfg_size;
> +	dev_dbg(fd->dev, "RS CFG:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->rs_cfg_data);
> +	if (ret)
> +		goto dma_alloc_fail;
> +	/* FD MODE */
> +	fd->base_para->fd_rs_cfg_pa = fd->rs_cfg_data.pa;
> +	fd->base_para->fd_rs_cfg_va = fd->rs_cfg_data.va;
> +
> +	/* FD DRAM */
> +	alloc_size = fd->fd_fd_cfg_size + fd->attr_fd_cfg_size * MAX_ENQUE_FRAME_NUM;
> +	dev_dbg(fd->dev, "FD CFG:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->fd_cfg_data);
> +	if (ret)
> +		goto dma_alloc_fail;
> +	/* FD MODE */
> +	fd->base_para->fd_fd_cfg_pa = fd->fd_cfg_data.pa;
> +	fd->base_para->fd_fd_cfg_va = fd->fd_cfg_data.va;
> +	/* ATTR MODE */
> +	fd->base_para->attr_fd_cfg_pa[0] =
> +		fd->base_para->fd_fd_cfg_pa + fd->fd_fd_cfg_size;
> +	fd->base_para->attr_fd_cfg_va[0] =
> +		fd->base_para->fd_fd_cfg_va + fd->fd_fd_cfg_size;
> +
> +	for (i = 1; i < MAX_ENQUE_FRAME_NUM; i++) {
> +		fd->base_para->attr_fd_cfg_pa[i] =
> +			fd->base_para->attr_fd_cfg_pa[i - 1] + fd->attr_fd_cfg_size;
> +		fd->base_para->attr_fd_cfg_va[i] =
> +			fd->base_para->attr_fd_cfg_va[i - 1] + fd->attr_fd_cfg_size;
> +	}
> +
> +	/* YUV2RGB DRAM */
> +	alloc_size = fd->fd_yuv2rgb_cfg_size +
> +		     fd->attr_yuv2rgb_cfg_size * MAX_ENQUE_FRAME_NUM;
> +	dev_dbg(fd->dev, "YUV2RGB CFG:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->yuv2rgb_cfg_data);
> +	if (ret)
> +		goto dma_alloc_fail;
> +	/* FD MODE */
> +	fd->base_para->fd_yuv2rgb_cfg_pa = fd->yuv2rgb_cfg_data.pa;
> +	fd->base_para->fd_yuv2rgb_cfg_va = fd->yuv2rgb_cfg_data.va;
> +
> +	/* ATTR MODE */
> +	fd->base_para->attr_yuv2rgb_cfg_pa[0] =
> +		fd->base_para->fd_yuv2rgb_cfg_pa + fd->fd_yuv2rgb_cfg_size;
> +	fd->base_para->attr_yuv2rgb_cfg_va[0] =
> +		fd->base_para->fd_yuv2rgb_cfg_va + fd->fd_yuv2rgb_cfg_size;
> +
> +	for (i = 1; i < MAX_ENQUE_FRAME_NUM; i++) {
> +		fd->base_para->attr_yuv2rgb_cfg_pa[i] =
> +			fd->base_para->attr_yuv2rgb_cfg_pa[i - 1] + fd->attr_yuv2rgb_cfg_size;
> +		fd->base_para->attr_yuv2rgb_cfg_va[i] =
> +			fd->base_para->attr_yuv2rgb_cfg_va[i - 1] + fd->attr_yuv2rgb_cfg_size;
> +	}
> +
> +	return 0;
> +dma_alloc_fail:
> +	aie_imem_free(fd, &fd->fd_cfg_data);
> +	aie_imem_free(fd, &fd->rs_cfg_data);
> +
> +	return ret;
> +}
> +
> +static int aie_alloc_output_buf(struct mtk_aie_dev *fd)
> +{
> +	u32 alloc_size = 0, pa_off = 0, va_off = 0;
> +	int i, j;
> +	int ret;
> +
> +	for (i = 0; i < PYM_NUM; i++)
> +		alloc_size += fd->rs_pym_out_size[i] * 3;
> +	dev_dbg(fd->dev, "RS OUT:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->rs_output_hw);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < PYM_NUM; i++) {
> +		for (j = 0; j < COLOR_NUM; j++) {
> +			fd->base_para->rs_pym_rst_pa[i][j] = fd->rs_output_hw.pa + pa_off;
> +			pa_off += fd->rs_pym_out_size[i];
> +
> +			fd->base_para->rs_pym_rst_va[i][j] = fd->rs_output_hw.va + va_off;
> +			va_off += fd->rs_pym_out_size[i];
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static void aie_alloc_normal(struct mtk_aie_dev *fd, int start, int end)
> +{
> +	struct aie_static_info *pstv = &fd->st_info;
> +	int i, j, pi, pj;
> +
> +	if (start <= 0 || end <= start || end >= FD_LOOP_NUM) {
> +		dev_err(fd->dev, "%s: start = %d, end = %d\n", __func__, start, end);
> +		return;
> +	}
> +
> +	pi = start - 1;
> +	pj = 0;
> +	for (i = start; i < end + 1; i++) {
> +		for (j = 0; j < OUTPUT_WDMA_WRA_NUM; j++) {
> +			if (fd_wdma_en[i][j]) {
> +				fd->dma_para->fd_out_hw_pa[i][j] =
> +					fd->dma_para->fd_out_hw_pa[pi][pj] +
> +					pstv->inf_elm[pi].fd_wdma_size[pj];
> +				pi = i;
> +				pj = j;
> +			}
> +		}
> +	}
> +}
> +
> +static int aie_alloc_fddma_buf(struct mtk_aie_dev *fd)
> +{
> +	u32 alloc_size;
> +	int ret;
> +
> +	alloc_size = fd->fd_dma_max_size;
> +	dev_dbg(fd->dev, "FD DMA:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->fd_dma_hw);
> +	if (ret)
> +		goto dma_alloc_fail;
> +
> +	alloc_size = fd->fd_fd_kernel_size + fd->fd_attr_kernel_size;
> +	dev_dbg(fd->dev, "FD KERNEL:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->fd_kernel_hw);
> +	if (ret)
> +		goto dma_alloc_fail;
> +
> +	alloc_size = fd->fd_attr_dma_max_size;
> +	dev_dbg(fd->dev, "ATTR DMA:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->fd_attr_dma_hw);
> +	if (ret)
> +		goto dma_alloc_fail;
> +
> +	alloc_size = fd->fd_dma_rst_max_size + fd->fd_attr_dma_rst_max_size;
> +	dev_dbg(fd->dev, "RESULT DMA:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->fd_dma_result_hw);
> +	if (ret)
> +		goto dma_alloc_fail;
> +
> +	return 0;
> +
> +dma_alloc_fail:
> +	aie_imem_free(fd, &fd->fd_attr_dma_hw);
> +	aie_imem_free(fd, &fd->fd_kernel_hw);
> +	aie_imem_free(fd, &fd->fd_dma_hw);
> +
> +	return ret;
> +}
> +
> +static int aie_alloc_fld_buf(struct mtk_aie_dev *fd)
> +{
> +	u32 alloc_size;
> +	int ret;
> +
> +	alloc_size = fd->fld_step_size;
> +	dev_dbg(fd->dev, "FLD STEP:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->fd_fld_step_data);
> +	if (ret)
> +		return ret;
> +
> +	alloc_size = fd->fld_out_size;
> +	dev_dbg(fd->dev, "FLD OUT:");
> +	ret = aie_imem_alloc(fd, alloc_size, &fd->fd_fld_out_hw);
> +	if (ret)
> +		goto fld_step;
> +
> +	return 0;
> +fld_step:
> +	aie_imem_free(fd, &fd->fd_fld_step_data);
> +
> +	return ret;
> +}
> +
> +static void aie_calculate_pa(struct mtk_aie_dev *fd, struct aie_static_info *pstv, int i)
> +{
> +	fd->dma_para->fd_out_hw_pa[i][0] = fd->dma_para->fd_out_hw_pa[i - 1][1] +
> +					   pstv->inf_elm[i - 1].fd_wdma_size[1];
> +	fd->dma_para->fd_out_hw_pa[i][1] = fd->dma_para->fd_out_hw_pa[i][0] +
> +					   pstv->inf_elm[i].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 1][0] = fd->dma_para->fd_out_hw_pa[i][0] +
> +					       2 * pstv->inf_elm[i + 1].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 1][1] = fd->dma_para->fd_out_hw_pa[i][0] +
> +					       3 * pstv->inf_elm[i + 1].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 2][0] = fd->dma_para->fd_out_hw_pa[i][0] +
> +					       4 * pstv->inf_elm[i + 2].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 3][0] = fd->dma_para->fd_out_hw_pa[i][0] +
> +					       pstv->inf_elm[i].fd_wdma_size[0] +
> +					       pstv->inf_elm[i].fd_wdma_size[1] +
> +					       pstv->inf_elm[i + 1].fd_wdma_size[0] +
> +					       pstv->inf_elm[i + 1].fd_wdma_size[1] +
> +					       pstv->inf_elm[i + 2].fd_wdma_size[0];
> +	fd->dma_para->fd_out_hw_pa[i + 3][1] = fd->dma_para->fd_out_hw_pa[i + 3][0] +
> +					       pstv->inf_elm[i + 3].fd_wdma_size[0] +
> +					       pstv->inf_elm[i + 3].fd_wdma_size[2] +
> +					       pstv->inf_elm[i + 4].fd_wdma_size[0] +
> +					       pstv->inf_elm[i + 4].fd_wdma_size[2] +
> +					       pstv->inf_elm[i + 5].fd_wdma_size[0];
> +	fd->dma_para->fd_out_hw_pa[i + 3][2] = fd->dma_para->fd_out_hw_pa[i + 3][0] +
> +					       pstv->inf_elm[i + 3].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 3][3] = fd->dma_para->fd_out_hw_pa[i + 3][1] +
> +					       pstv->inf_elm[i + 3].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 4][0] = fd->dma_para->fd_out_hw_pa[i + 3][0] +
> +					       2 * pstv->inf_elm[i + 4].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 4][1] = fd->dma_para->fd_out_hw_pa[i + 3][1] +
> +					       2 * pstv->inf_elm[i + 4].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 4][2] = fd->dma_para->fd_out_hw_pa[i + 3][0] +
> +					       3 * pstv->inf_elm[i + 4].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 4][3] = fd->dma_para->fd_out_hw_pa[i + 3][1] +
> +					       3 * pstv->inf_elm[i + 4].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 5][0] = fd->dma_para->fd_out_hw_pa[i + 3][0] +
> +					       4 * pstv->inf_elm[i + 5].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 5][1] = fd->dma_para->fd_out_hw_pa[i + 3][1] +
> +					       4 * pstv->inf_elm[i + 5].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 6][0] = fd->dma_para->fd_out_hw_pa[i + 3][1] +
> +					       pstv->inf_elm[i + 3].fd_wdma_size[1] +
> +					       pstv->inf_elm[i + 3].fd_wdma_size[3] +
> +					       pstv->inf_elm[i + 4].fd_wdma_size[1] +
> +					       pstv->inf_elm[i + 4].fd_wdma_size[3] +
> +					       pstv->inf_elm[i + 5].fd_wdma_size[1];
> +	fd->dma_para->fd_out_hw_pa[i + 6][1] = fd->dma_para->fd_out_hw_pa[i + 6][0] +
> +					       pstv->inf_elm[i + 6].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 7][0] = fd->dma_para->fd_out_hw_pa[i + 6][0] +
> +					       2 * pstv->inf_elm[i + 7].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 7][1] = fd->dma_para->fd_out_hw_pa[i + 6][0] +
> +					       3 * pstv->inf_elm[i + 7].out_xsize_plus_1;
> +	fd->dma_para->fd_out_hw_pa[i + 8][0] = fd->dma_para->fd_out_hw_pa[i + 6][0] +
> +					       4 * pstv->inf_elm[i + 8].out_xsize_plus_1;
> +}
> +
> +static void aie_arrange_fddma_buf(struct mtk_aie_dev *fd)
> +{
> +	struct aie_static_info *pstv = &fd->st_info;
> +	dma_addr_t current_pa;
> +	void *current_va;
> +	u8 i, j;
> +
> +	/* 0 ~ 18 */
> +	fd->dma_para->fd_out_hw_pa[0][0] = fd->fd_dma_hw.pa;
> +	aie_alloc_normal(fd, 1, 18);
> +
> +	/* 19 ~ 27 */
> +	aie_calculate_pa(fd, pstv, 19);
> +
> +	/* 29 ~ 47 */
> +	fd->dma_para->fd_out_hw_pa[29][0] = fd->dma_para->fd_out_hw_pa[25][0] +
> +					    pstv->inf_elm[25].fd_wdma_size[0] +
> +					    pstv->inf_elm[25].fd_wdma_size[1] +
> +					    pstv->inf_elm[26].fd_wdma_size[0] +
> +					    pstv->inf_elm[26].fd_wdma_size[1] +
> +					    pstv->inf_elm[27].fd_wdma_size[0];
> +	aie_alloc_normal(fd, 30, 47);
> +
> +	/* 48 ~ 56 */
> +	aie_calculate_pa(fd, pstv, 48);
> +
> +	/* 58 ~ 76 */
> +	fd->dma_para->fd_out_hw_pa[58][0] = fd->dma_para->fd_out_hw_pa[54][0] +
> +					    pstv->inf_elm[54].fd_wdma_size[0] +
> +					    pstv->inf_elm[54].fd_wdma_size[1] +
> +					    pstv->inf_elm[55].fd_wdma_size[0] +
> +					    pstv->inf_elm[55].fd_wdma_size[1] +
> +					    pstv->inf_elm[56].fd_wdma_size[0];
> +	aie_alloc_normal(fd, 59, 76);
> +
> +	/* 77 ~ 85 */
> +	aie_calculate_pa(fd, pstv, 77);
> +
> +	/* VA : except 28, 57, 86 */
> +	/* 0 ~ 86 */
> +	fd->dma_para->fd_out_hw_va[0][0] = fd->fd_dma_hw.va;
> +	for (i = 1; i < FD_LOOP_NUM; i++) {
> +		if (i == RPN2_LOOP_NUM || i == RPN1_LOOP_NUM || i == RPN0_LOOP_NUM)
> +			continue;
> +		for (j = 0; j < 4; j++) {
> +			if (fd_wdma_en[i][j]) {
> +				fd->dma_para->fd_out_hw_va[i][j] = fd->fd_dma_hw.va +
> +					fd->dma_para->fd_out_hw_pa[i][j] - fd->fd_dma_hw.pa;
> +			}
> +		}
> +	}
> +
> +	current_pa = fd->dma_para->fd_out_hw_pa[83][0] +
> +		     pstv->inf_elm[83].fd_wdma_size[0] +
> +		     pstv->inf_elm[83].fd_wdma_size[1] +
> +		     pstv->inf_elm[84].fd_wdma_size[0] +
> +		     pstv->inf_elm[84].fd_wdma_size[1] +
> +		     pstv->inf_elm[85].fd_wdma_size[0];
> +	current_va = fd->dma_para->fd_out_hw_va[83][0] +
> +		     pstv->inf_elm[83].fd_wdma_size[0] +
> +		     pstv->inf_elm[83].fd_wdma_size[1] +
> +		     pstv->inf_elm[84].fd_wdma_size[0] +
> +		     pstv->inf_elm[84].fd_wdma_size[1] +
> +		     pstv->inf_elm[85].fd_wdma_size[0];
> +
> +	dev_dbg(fd->dev, "%s: current VA = %p\n", __func__, current_va);
> +}
> +
> +static void aie_arrange_kernel_buf(struct mtk_aie_dev *fd)
> +{
> +	void *current_va;
> +	dma_addr_t current_pa;
> +	u8 i, j;
> +
> +	current_pa = fd->fd_kernel_hw.pa;
> +	current_va = fd->fd_kernel_hw.va;
> +
> +	for (i = 0; i < FD_LOOP_NUM; i++) {
> +		for (j = 0; j < KERNEL_RDMA_RA_NUM; j++) {
> +			if (fd_ker_rdma_size[i][j]) {
> +				fd->dma_para->fd_kernel_pa[i][j] = current_pa;
> +				fd->dma_para->fd_kernel_va[i][j] = current_va;
> +				current_pa += fd_ker_rdma_size[i][j];
> +				current_va += fd_ker_rdma_size[i][j];
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < ATTR_LOOP_NUM; i++) {
> +		for (j = 0; j < KERNEL_RDMA_RA_NUM; j++) {
> +			fd->dma_para->attr_kernel_pa[i][j] = current_pa;
> +			fd->dma_para->attr_kernel_va[i][j] = current_va;
> +			current_pa += attr_ker_rdma_size[i][j];
> +			current_va += attr_ker_rdma_size[i][j];
> +		}
> +	}
> +
> +	dev_dbg(fd->dev, "%s: current VA = %p\n", __func__, current_va);
> +}
> +
> +static void aie_arrange_attrdma_buf(struct mtk_aie_dev *fd)
> +{
> +	void *current_va;
> +	dma_addr_t current_pa;
> +	u8 i, j;
> +
> +	current_pa = fd->fd_attr_dma_hw.pa;
> +	current_va = fd->fd_attr_dma_hw.va;
> +
> +	/* attribute mode */
> +	for (i = 0; i < ATTR_LOOP_NUM; i++) {
> +		for (j = 0; j < OUTPUT_WDMA_WRA_NUM; j++) {
> +			if (attr_wdma_en[i][j]) {
> +				fd->dma_para->attr_out_hw_pa[i][j] = current_pa;
> +				fd->dma_para->attr_out_hw_va[i][j] = current_va;
> +				current_pa += attr_wdma_size[i][j];
> +				current_va += attr_wdma_size[i][j];
> +			}
> +		}
> +	}
> +
> +	dev_dbg(fd->dev, "%s: current VA = %p\n", __func__, current_va);
> +}
> +
> +static void aie_arrange_result_dma_buf(struct mtk_aie_dev *fd)
> +{
> +	struct aie_static_info *pstv = &fd->st_info;
> +	dma_addr_t currentresult_pa;
> +	void *currentresult_va;
> +	u8 i;
> +
> +	currentresult_pa = fd->fd_dma_result_hw.pa;
> +	currentresult_va = fd->fd_dma_result_hw.va;
> +
> +	fd->dma_para->fd_out_hw_pa[RPN2_LOOP_NUM][0] = currentresult_pa;
> +	fd->dma_para->fd_out_hw_va[RPN2_LOOP_NUM][0] = currentresult_va;
> +	currentresult_pa += pstv->inf_elm[RPN2_LOOP_NUM].fd_wdma_size[0];
> +	currentresult_va += pstv->inf_elm[RPN2_LOOP_NUM].fd_wdma_size[0];
> +	fd->dma_para->fd_out_hw_pa[RPN1_LOOP_NUM][0] = currentresult_pa;
> +	fd->dma_para->fd_out_hw_va[RPN1_LOOP_NUM][0] = currentresult_va;
> +	currentresult_pa += pstv->inf_elm[RPN1_LOOP_NUM].fd_wdma_size[0];
> +	currentresult_va += pstv->inf_elm[RPN1_LOOP_NUM].fd_wdma_size[0];
> +	fd->dma_para->fd_out_hw_pa[RPN0_LOOP_NUM][0] = currentresult_pa;
> +	fd->dma_para->fd_out_hw_va[RPN0_LOOP_NUM][0] = currentresult_va;
> +	currentresult_pa += pstv->inf_elm[RPN0_LOOP_NUM].fd_wdma_size[0];
> +	currentresult_va += pstv->inf_elm[RPN0_LOOP_NUM].fd_wdma_size[0];
> +
> +	fd->dma_para->attr_out_hw_pa[AGE_OUT_RGS][0] = currentresult_pa;
> +	fd->dma_para->attr_out_hw_va[AGE_OUT_RGS][0] = currentresult_va;
> +	currentresult_pa += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
> +	currentresult_va += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
> +	fd->dma_para->attr_out_hw_pa[GENDER_OUT_RGS][0] = currentresult_pa;
> +	fd->dma_para->attr_out_hw_va[GENDER_OUT_RGS][0] = currentresult_va;
> +	currentresult_pa += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
> +	currentresult_va += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
> +	fd->dma_para->attr_out_hw_pa[INDIAN_OUT_RGS][0] = currentresult_pa;
> +	fd->dma_para->attr_out_hw_va[INDIAN_OUT_RGS][0] = currentresult_va;
> +	currentresult_pa += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
> +	currentresult_va += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
> +	fd->dma_para->attr_out_hw_pa[RACE_OUT_RGS][0] = currentresult_pa;
> +	fd->dma_para->attr_out_hw_va[RACE_OUT_RGS][0] = currentresult_va;
> +	currentresult_pa += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
> +	currentresult_va += ATTR_OUT_SIZE * MAX_ENQUE_FRAME_NUM;
> +
> +	/* need to prepare 10 buffers to store 10 times result */
> +	fd->dma_para->age_out_hw_pa[0] = fd->dma_para->attr_out_hw_pa[AGE_OUT_RGS][0];
> +	fd->dma_para->age_out_hw_va[0] = fd->dma_para->attr_out_hw_va[AGE_OUT_RGS][0];
> +	fd->dma_para->gender_out_hw_pa[0] = fd->dma_para->attr_out_hw_pa[GENDER_OUT_RGS][0];
> +	fd->dma_para->gender_out_hw_va[0] = fd->dma_para->attr_out_hw_va[GENDER_OUT_RGS][0];
> +	fd->dma_para->is_indian_out_hw_pa[0] =
> +		fd->dma_para->attr_out_hw_pa[INDIAN_OUT_RGS][0];
> +	fd->dma_para->is_indian_out_hw_va[0] =
> +		fd->dma_para->attr_out_hw_va[INDIAN_OUT_RGS][0];
> +	fd->dma_para->race_out_hw_pa[0] = fd->dma_para->attr_out_hw_pa[RACE_OUT_RGS][0];
> +	fd->dma_para->race_out_hw_va[0] = fd->dma_para->attr_out_hw_va[RACE_OUT_RGS][0];
> +
> +	for (i = 1; i < MAX_ENQUE_FRAME_NUM; i++) {
> +		fd->dma_para->age_out_hw_pa[i] =
> +			fd->dma_para->age_out_hw_pa[i - 1] + ATTR_OUT_SIZE;
> +		fd->dma_para->age_out_hw_va[i] =
> +			fd->dma_para->age_out_hw_va[i - 1] + ATTR_OUT_SIZE;
> +		fd->dma_para->gender_out_hw_pa[i] =
> +			fd->dma_para->gender_out_hw_pa[i - 1] + ATTR_OUT_SIZE;
> +		fd->dma_para->gender_out_hw_va[i] =
> +			fd->dma_para->gender_out_hw_va[i - 1] + ATTR_OUT_SIZE;
> +		fd->dma_para->is_indian_out_hw_pa[i] =
> +			fd->dma_para->is_indian_out_hw_pa[i - 1] + ATTR_OUT_SIZE;
> +		fd->dma_para->is_indian_out_hw_va[i] =
> +			fd->dma_para->is_indian_out_hw_va[i - 1] + ATTR_OUT_SIZE;
> +		fd->dma_para->race_out_hw_pa[i] =
> +			fd->dma_para->race_out_hw_pa[i - 1] + ATTR_OUT_SIZE;
> +		fd->dma_para->race_out_hw_va[i] =
> +			fd->dma_para->race_out_hw_va[i - 1] + ATTR_OUT_SIZE;
> +	}
> +
> +	memset(fd->fd_dma_result_hw.va, 0, fd->fd_dma_result_hw.size);
> +
> +	dev_dbg(fd->dev, "%s: current VA = %p\n", __func__, currentresult_va);
> +}
> +
> +static void aie_arrange_fld_buf(struct mtk_aie_dev *fd)
> +{
> +	unsigned int offset = 0;
> +	u8 i, j;
> +
> +	for (i = 0; i < FLD_STEP_NUM; i++) {
> +		for (j = 0; j < FLD_MAX_FRAME; j++) {
> +			fd->fld_para->fld_step_va[i][j] = fd->fd_fld_step_data.va + offset;
> +			fd->fld_para->fld_step_pa[i][j] = fd->fd_fld_step_data.pa + offset;
> +			offset += fld_step_align_size[i][j];
> +		}
> +	}
> +
> +	for (i = 0, offset = 0; i < FLD_MAX_FRAME; i++) {
> +		fd->fld_para->fld_output_va[i] = fd->fd_fld_out_hw.va + offset;
> +		fd->fld_para->fld_output_pa[i] = fd->fd_fld_out_hw.pa + offset;
> +		offset += FLD_OUTPUT_SIZE;
> +	}
> +}
> +
> +static void aie_update_fddma_buf(struct mtk_aie_dev *fd)
> +{
> +	struct aie_static_info *pstv = &fd->st_info;
> +	u8 i, j;
> +
> +	/* 19 ~ 27 */
> +	aie_calculate_pa(fd, pstv, 19);
> +
> +	/* 48 ~ 56 */
> +	aie_calculate_pa(fd, pstv, 48);
> +
> +	/* 77 ~ 85 */
> +	aie_calculate_pa(fd, pstv, 77);
> +
> +	/* VA : except 28, 57, 86 */
> +	/* 0 ~ 86 */
> +	fd->dma_para->fd_out_hw_va[0][0] = fd->fd_dma_hw.va;
> +	for (i = 1; i < FD_LOOP_NUM; i++) {
> +		if (i == RPN2_LOOP_NUM || i == RPN1_LOOP_NUM || i == RPN0_LOOP_NUM)
> +			continue;
> +		for (j = 0; j < 4; j++) {
> +			if (fd_wdma_en[i][j]) {
> +				fd->dma_para->fd_out_hw_va[i][j] = fd->fd_dma_hw.va +
> +					fd->dma_para->fd_out_hw_pa[i][j] - fd->fd_dma_hw.pa;
> +			}
> +		}
> +	}
> +}
> +
> +static void aie_free_dram_buf(struct mtk_aie_dev *fd)
> +{
> +	aie_imem_free(fd, &fd->rs_cfg_data);
> +	aie_imem_free(fd, &fd->fd_cfg_data);
> +	aie_imem_free(fd, &fd->yuv2rgb_cfg_data);
> +}
> +
> +static void aie_free_output_buf(struct mtk_aie_dev *fd)
> +{
> +	aie_imem_free(fd, &fd->rs_output_hw);
> +}
> +
> +static void aie_free_fddma_buf(struct mtk_aie_dev *fd)
> +{
> +	aie_imem_free(fd, &fd->fd_dma_hw);
> +	aie_imem_free(fd, &fd->fd_kernel_hw);
> +	aie_imem_free(fd, &fd->fd_attr_dma_hw);
> +	aie_imem_free(fd, &fd->fd_dma_result_hw);
> +}
> +
> +static void aie_free_fld_buf(struct mtk_aie_dev *fd)
> +{
> +	aie_imem_free(fd, &fd->fd_fld_step_data);
> +	aie_imem_free(fd, &fd->fd_fld_out_hw);
> +}
> +
> +static int aie_copy_fw(struct mtk_aie_dev *fd, const char *name, void *buf,
> +		       unsigned int size)
> +{
> +	int ret;
> +	const struct firmware *fw = NULL;
> +
> +	ret = request_firmware(&fw, name, fd->dev);
> +	if (ret == 0) {
> +		if (size >= fw->size)
> +			memcpy(buf, fw->data, fw->size);
> +		else
> +			ret = -EINVAL;
> +	}
> +
> +	release_firmware(fw);
> +
> +	return ret;
> +}
> +
> +static int aie_load_fw(struct mtk_aie_dev *fd)
> +{
> +	char name[128] = {};
> +	u8 i, j;
> +	int ret;
> +
> +	ret = sprintf(name, "aie_mp_fw31/config/aie_fd_fd_config.bin");
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = aie_copy_fw(fd,
> +			  name,
> +			  fd->base_para->fd_fd_cfg_va,
> +			  fd->fd_fd_cfg_size);
> +	if (ret)
> +		return ret;
> +
> +	ret = sprintf(name, "aie_mp_fw31/config/aie_fd_rs_config.bin");
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = aie_copy_fw(fd,
> +			  name,
> +			  fd->base_para->fd_rs_cfg_va,
> +			  fd->fd_rs_cfg_size);
> +	if (ret)
> +		return ret;
> +
> +	ret = sprintf(name, "aie_mp_fw31/config/aie_fd_yuv2rgb_config.bin");
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = aie_copy_fw(fd,
> +			  name,
> +			  fd->base_para->fd_yuv2rgb_cfg_va,
> +			  fd->fd_yuv2rgb_cfg_size);
> +	if (ret)
> +		return ret;
> +
> +	ret = sprintf(name, "aie_mp_fw31/config/aie_attr_fd_config.bin");
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = aie_copy_fw(fd,
> +			  name,
> +			  fd->base_para->attr_fd_cfg_va[0],
> +			  fd->attr_fd_cfg_size);
> +	if (ret)
> +		return ret;
> +
> +	ret = sprintf(name, "aie_mp_fw31/config/aie_attr_yuv2rgb_config.bin");
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = aie_copy_fw(fd,
> +			  name,
> +			  fd->base_para->attr_yuv2rgb_cfg_va[0],
> +			  fd->attr_yuv2rgb_cfg_size);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 1; i < MAX_ENQUE_FRAME_NUM; i++) {
> +		memcpy(fd->base_para->attr_fd_cfg_va[i],
> +		       fd->base_para->attr_fd_cfg_va[0], fd->attr_fd_cfg_size);
> +		memcpy(fd->base_para->attr_yuv2rgb_cfg_va[i],
> +		       fd->base_para->attr_yuv2rgb_cfg_va[0],
> +		       fd->attr_yuv2rgb_cfg_size);
> +	}
> +
> +	for (i = 0; i < FD_LOOP_NUM; i++) {
> +		for (j = 0; j < KERNEL_RDMA_RA_NUM; j++) {
> +			if (fd_ker_rdma_size[i][j]) {
> +				ret = sprintf(name,
> +					      "aie_mp_fw31/kernel/aie_fd_kernel_bias_loop%02d_%d.bin"
> +					       , i, j);
> +				if (ret < 0)
> +					return ret;
> +
> +				ret = aie_copy_fw(fd, name,
> +						  fd->dma_para->fd_kernel_va[i][j],
> +						  fd_ker_rdma_size[i][j]);
> +				if (ret)
> +					return ret;
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < ATTR_LOOP_NUM; i++) {
> +		for (j = 0; j < KERNEL_RDMA_RA_NUM; j++) {
> +			ret = sprintf(name,
> +				      "aie_mp_fw31/kernel/aie_attr_kernel_bias_loop%02d_%d.bin"
> +				       , i, j);
> +			if (ret < 0)
> +				return ret;
> +
> +			ret = aie_copy_fw(fd, name,
> +					  fd->dma_para->attr_kernel_va[i][j],
> +					  attr_ker_rdma_size[i][j]);
> +			if (ret)
> +				return ret;
> +		}
> +	}
> +
> +	ret = sprintf(name, "aie_mp_fw31/config/aie_fld_blink_weight_forest14.bin");
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = aie_copy_fw(fd, name,
> +			  fd->fld_para->fld_step_va[FLD_STEP_BLINK][14],
> +			  fld_step_align_size[FLD_STEP_BLINK][14]);
> +	if (ret)
> +		return ret;
> +
> +	for (j = 0; j < FLD_MAX_FRAME; j++) {
> +		ret = sprintf(name,
> +			      "aie_mp_fw31/config/aie_fld_cv_forest%02d_iom3.bin", j);
> +		if (ret < 0)
> +			return ret;
> +		ret = aie_copy_fw(fd, name,
> +				  fd->fld_para->fld_step_va[FLD_STEP_CV][j],
> +				  fld_step_align_size[FLD_STEP_CV][j]);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	for (j = 0; j < FLD_MAX_FRAME; j++) {
> +		ret = sprintf(name,
> +			      "aie_mp_fw31/config/aie_fld_fp_forest%02d_om45.bin", j);
> +		if (ret < 0)
> +			return ret;
> +
> +		ret = aie_copy_fw(fd, name,
> +				  fd->fld_para->fld_step_va[FLD_STEP_FP][j],
> +				  fld_step_align_size[FLD_STEP_FP][j]);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	for (j = 0; j < FLD_MAX_FRAME; j++) {
> +		ret = sprintf(name,
> +			      "aie_mp_fw31/config/aie_fld_leafnode_forest%02d.bin", j);
> +		if (ret < 0)
> +			return ret;
> +
> +		ret = aie_copy_fw(fd, name,
> +				  fd->fld_para->fld_step_va[FLD_STEP_LEAF][j],
> +				  fld_step_align_size[FLD_STEP_LEAF][j]);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	for (j = 0; j < FLD_MAX_FRAME; j++) {
> +		ret = sprintf(name,
> +			      "aie_mp_fw31/config/aie_fld_tree_forest%02d_km02.bin", j);
> +		if (ret < 0)
> +			return ret;
> +		ret = aie_copy_fw(fd, name,
> +				  fd->fld_para->fld_step_va[FLD_STEP_KM02][j],
> +				  fld_step_align_size[FLD_STEP_KM02][j]);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	for (j = 0; j < FLD_MAX_FRAME; j++) {
> +		ret = sprintf(name,
> +			      "aie_mp_fw31/config/aie_fld_tree_forest%02d_km13.bin", j);
> +		if (ret < 0)
> +			return ret;
> +		ret = aie_copy_fw(fd, name,
> +				  fd->fld_para->fld_step_va[FLD_STEP_KM13][j],
> +				  fld_step_align_size[FLD_STEP_KM13][j]);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int aie_update_cfg(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
> +{
> +	int crop_width;
> +	int crop_height;
> +
> +	if (aie_cfg->en_roi) {
> +		crop_width = dif_x(aie_cfg) + 1;
> +		crop_height = dif_y(aie_cfg) + 1;
> +	} else {
> +		crop_width = aie_cfg->src_img_width;
> +		crop_height = aie_cfg->src_img_height;
> +	}
> +
> +	if (crop_width == 0 || crop_height == 0) {
> +		dev_err(fd->dev, "Invalid crop size 0x0\n");
> +		return -EINVAL;
> +	}
> +
> +	if (aie_cfg->en_padding) {
> +		crop_width += aie_cfg->src_padding.right + aie_cfg->src_padding.left;
> +		crop_height += aie_cfg->src_padding.up + aie_cfg->src_padding.down;
> +	}
> +
> +	if (aie_cfg->sel_mode == FDMODE) {
> +		fd->base_para->sel_mode = aie_cfg->sel_mode;
> +		fd->base_para->crop_rect.width = crop_width;
> +		fd->base_para->crop_rect.height = crop_height;
> +		fd->base_para->src_img_addr = aie_cfg->src_img_addr;
> +		fd->base_para->src_img_addr_uv = aie_cfg->src_img_addr_uv;
> +		fd->base_para->img_rect.width = aie_cfg->src_img_width;
> +		fd->base_para->img_rect.height = aie_cfg->src_img_height;
> +		fd->base_para->src_img_fmt = aie_cfg->src_img_fmt;
> +		fd->base_para->rotate_degree = aie_cfg->rotate_degree;
> +	} else if (aie_cfg->sel_mode == ATTRIBUTEMODE) {
> +		fd->attr_para->sel_mode[fd->attr_para->w_idx] = aie_cfg->sel_mode;
> +		fd->attr_para->crop_width[fd->attr_para->w_idx] = crop_width;
> +		fd->attr_para->crop_height[fd->attr_para->w_idx] = crop_height;
> +		fd->attr_para->src_img_addr[fd->attr_para->w_idx] = aie_cfg->src_img_addr;
> +		fd->attr_para->src_img_addr_uv[fd->attr_para->w_idx] = aie_cfg->src_img_addr_uv;
> +		fd->attr_para->img_width[fd->attr_para->w_idx] = aie_cfg->src_img_width;
> +		fd->attr_para->img_height[fd->attr_para->w_idx] = aie_cfg->src_img_height;
> +		fd->attr_para->src_img_fmt[fd->attr_para->w_idx] = aie_cfg->src_img_fmt;
> +		fd->attr_para->rotate_degree[fd->attr_para->w_idx] = aie_cfg->rotate_degree;
> +	}
> +
> +	return 0;
> +}
> +
> +static int aie_config_y2r(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg,
> +			  int mode)
> +{
> +	u32 img_addr;
> +	u32 img_addr_UV;
> +	u32 img_off;
> +	u32 img_off_uv;
> +	u32 *yuv2rgb_cfg;
> +	u32 srcbuf, srcbuf_UV;
> +	u16 xmag_0, ymag_0;
> +	u16 pym0_out_w;
> +	u16 pym0_out_h;
> +	u16 stride_pym0_out_w;
> +	u16 sr_crp_w;
> +	u16 sr_crp_h;
> +	u16 y1_stride;
> +
> +	if (!aie_cfg->en_roi) {
> +		img_off = 0;
> +		img_off_uv = 0;
> +	} else {
> +		if (aie_cfg->src_img_fmt == FMT_MONO || aie_cfg->src_img_fmt == FMT_YUV_2P ||
> +		    aie_cfg->src_img_fmt == FMT_YVU_2P) {
> +			y1_stride = aie_cfg->src_img_stride * aie_cfg->src_roi.y1;
> +			img_off = y1_stride + aie_cfg->src_roi.x1;
> +			img_off_uv = y1_stride + aie_cfg->src_roi.x1;
> +		} else if (aie_cfg->src_img_fmt == FMT_YUV420_2P ||
> +			   aie_cfg->src_img_fmt == FMT_YUV420_1P) {
> +			y1_stride = aie_cfg->src_img_stride * aie_cfg->src_roi.y1;
> +			img_off = y1_stride + aie_cfg->src_roi.x1;
> +			img_off_uv = y1_stride / 2 + aie_cfg->src_roi.x1;
> +		} else if (aie_cfg->src_img_fmt == FMT_YUYV ||
> +			   aie_cfg->src_img_fmt == FMT_YVYU ||
> +			   aie_cfg->src_img_fmt == FMT_UYVY ||
> +			   aie_cfg->src_img_fmt == FMT_VYUY) {
> +			y1_stride = aie_cfg->src_img_stride * aie_cfg->src_roi.y1;
> +			img_off = y1_stride + aie_cfg->src_roi.x1 * 2;
> +			img_off_uv = y1_stride + aie_cfg->src_roi.x1 * 2;
> +		} else {
> +			dev_err(fd->dev, "Unsupport input format %d", aie_cfg->src_img_fmt);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	img_addr = aie_cfg->src_img_addr + img_off;
> +	img_addr_UV = aie_cfg->src_img_addr_uv + img_off_uv;
> +
> +	srcbuf = img_addr;
> +	if (aie_cfg->src_img_fmt == FMT_YUV420_2P || aie_cfg->src_img_fmt == FMT_YUV420_1P ||
> +	    aie_cfg->src_img_fmt == FMT_YUV_2P || aie_cfg->src_img_fmt == FMT_YVU_2P)
> +		srcbuf_UV = img_addr_UV;
> +	else
> +		srcbuf_UV = 0;
> +
> +	if (mode == FDMODE) {
> +		sr_crp_w = fd->base_para->crop_rect.width;
> +		sr_crp_h = fd->base_para->crop_rect.height;
> +		yuv2rgb_cfg = (u32 *)fd->base_para->fd_yuv2rgb_cfg_va;
> +		pym0_out_w = fd->base_para->pyramid_rect.width;
> +	}
> +	if (mode == ATTRIBUTEMODE) {
> +		sr_crp_w = fd->attr_para->crop_width[fd->attr_para->w_idx];
> +		sr_crp_h = fd->attr_para->crop_height[fd->attr_para->w_idx];
> +		yuv2rgb_cfg = (u32 *)fd->base_para->attr_yuv2rgb_cfg_va[fd->attr_para->w_idx];
> +		pym0_out_w = ATTR_MODE_PYRAMID_WIDTH;
> +	}
> +
> +	pym0_out_h = pym0_out_w * sr_crp_h / sr_crp_w;
> +
> +	if (pym0_out_w != 0) {
> +		xmag_0 = 512 * sr_crp_w / pym0_out_w;
> +		ymag_0 = xmag_0;
> +	} else {
> +		xmag_0 = 0;
> +		ymag_0 = 0;
> +	}
> +
> +	yuv2rgb_cfg[Y2R_SRC_DST_FORMAT] = (yuv2rgb_cfg[Y2R_SRC_DST_FORMAT] & 0xFFFFFFF8) |
> +					  ((aie_cfg->src_img_fmt) & 0x7);
> +	if (aie_cfg->src_img_fmt == FMT_YUV420_2P || aie_cfg->src_img_fmt == FMT_YUV420_1P) {
> +		/* for match patten */
> +		yuv2rgb_cfg[Y2R_SRC_DST_FORMAT] = (yuv2rgb_cfg[Y2R_SRC_DST_FORMAT] &
> +						  0xFFFFFFF8) | ((0x3) & 0x7);
> +	}
> +	yuv2rgb_cfg[Y2R_IN_W_H] = (yuv2rgb_cfg[Y2R_IN_W_H] & 0xF800F800) |
> +				  ((sr_crp_w << 16) & 0x7FF0000) | (sr_crp_h & 0x7FF);
> +	yuv2rgb_cfg[Y2R_OUT_W_H] = (yuv2rgb_cfg[Y2R_OUT_W_H] & 0xF800F800) |
> +				   ((pym0_out_w << 16) & 0x7FF0000) | (pym0_out_h & 0x7FF);
> +
> +	if (aie_cfg->src_img_fmt == FMT_YUV_2P || aie_cfg->src_img_fmt == FMT_YVU_2P) {
> +		/* 2 plane */
> +		yuv2rgb_cfg[Y2R_RA0_RA1_EN] = (yuv2rgb_cfg[Y2R_RA0_RA1_EN] & 0xFFFFFFEE) | 0x11;
> +		if (aie_cfg->en_roi) {
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_cmb_u16(dif_x(aie_cfg), dif_y(aie_cfg));
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_cmb_u16(dif_x(aie_cfg), dif_y(aie_cfg));
> +		} else {
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_cmb_u16(sr_crp_w - 1, sr_crp_h - 1);
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_cmb_u16(sr_crp_w - 1, sr_crp_h - 1);
> +		}
> +		yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] =
> +			(yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] & 0xFFF0) |
> +			((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x1;
> +		yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] =
> +			(yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] & 0xFFF0) |
> +			((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x1;
> +	} else if (aie_cfg->src_img_fmt == FMT_MONO) {
> +		yuv2rgb_cfg[Y2R_RA0_RA1_EN] =
> +			(yuv2rgb_cfg[Y2R_RA0_RA1_EN] & 0xFFFFFFEE) | 0x01;
> +		if (aie_cfg->en_roi) {
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_cmb_u16(dif_x(aie_cfg), dif_y(aie_cfg));
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_cmb_u16(dif_x(aie_cfg), dif_y(aie_cfg));
> +		} else {
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_cmb_u16(sr_crp_w - 1, sr_crp_h - 1);
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_cmb_u16(sr_crp_w - 1, sr_crp_h - 1);
> +		}
> +		yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] =
> +			(yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] & 0xFFF0) |
> +			((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x0;
> +		yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] =
> +			(yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] & 0xFFF0) |
> +			((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x0;
> +	} else if (aie_cfg->src_img_fmt == FMT_YUYV ||
> +		   aie_cfg->src_img_fmt == FMT_YVYU ||
> +		   aie_cfg->src_img_fmt == FMT_UYVY ||
> +		   aie_cfg->src_img_fmt == FMT_VYUY) {
> +		/* 1 plane */
> +		yuv2rgb_cfg[Y2R_RA0_RA1_EN] = (yuv2rgb_cfg[Y2R_RA0_RA1_EN] & 0xFFFFFFEE) | 0x1;
> +		if (aie_cfg->en_roi) {
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_cmb_u16(2 * (dif_x(aie_cfg) + 1) - 1,
> +								    dif_y(aie_cfg));
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_cmb_u16(2 * (dif_x(aie_cfg) + 1) - 1,
> +								    dif_y(aie_cfg));
> +		} else {
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_cmb_u16(2 * sr_crp_w - 1, sr_crp_h - 1);
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_cmb_u16(2 * sr_crp_w - 1, sr_crp_h - 1);
> +		}
> +		yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] =
> +			(yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] & 0xFFF0) |
> +			((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x3;
> +		yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] =
> +			(yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] & 0xFFF0) |
> +			((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x3;
> +	}
> +
> +	/* AIE3.0 */
> +	if (aie_cfg->src_img_fmt == FMT_YUV420_2P ||
> +	    aie_cfg->src_img_fmt == FMT_YUV420_1P) {
> +		yuv2rgb_cfg[Y2R_RA0_RA1_EN] =
> +			(yuv2rgb_cfg[Y2R_RA0_RA1_EN] & 0xFFFFFFEE) | 0x11;
> +		if (aie_cfg->en_roi) {
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] = aie_cmb_u16(dif_x(aie_cfg), dif_y(aie_cfg));
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_cmb_u16(dif_x(aie_cfg),
> +								    dif_y(aie_cfg) / 2);
> +		} else {
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE0] =
> +				aie_cmb_u16(sr_crp_w - 1, sr_crp_h - 1);
> +			yuv2rgb_cfg[Y2R_IN_X_Y_SIZE1] = aie_cmb_u16(sr_crp_w - 1,
> +								    sr_crp_h / 2 - 1);
> +		}
> +		yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] =
> +			(yuv2rgb_cfg[Y2R_IN_STRIDE0_BUS_SIZE0] & 0xFFF0) |
> +			((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x0;
> +		yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] =
> +			(yuv2rgb_cfg[Y2R_IN_STRIDE1_BUS_SIZE1] & 0xFFF0) |
> +			((aie_cfg->src_img_stride << 16) & 0xFFFF0000) | 0x0;
> +
> +		yuv2rgb_cfg[Y2R_CO2_FMT_MODE_EN] =
> +			(yuv2rgb_cfg[Y2R_CO2_FMT_MODE_EN] & 0xFFFFFFFE) | 0x01;
> +		if (aie_cfg->en_roi) {
> +			yuv2rgb_cfg[Y2R_CO2_CROP_X] = aie_cmb_u16(0, dif_x(aie_cfg));
> +			yuv2rgb_cfg[Y2R_CO2_CROP_Y] = aie_cmb_u16(0, dif_y(aie_cfg));
> +		} else {
> +			yuv2rgb_cfg[Y2R_CO2_CROP_X] = aie_cmb_u16(0, sr_crp_w - 1);
> +			yuv2rgb_cfg[Y2R_CO2_CROP_Y] = aie_cmb_u16(0, sr_crp_h - 1);
> +		}
> +	} else {
> +		yuv2rgb_cfg[Y2R_CO2_FMT_MODE_EN] =
> +			(yuv2rgb_cfg[Y2R_CO2_FMT_MODE_EN] & 0xFFFFFFFE);
> +
> +		if (aie_cfg->en_roi) {
> +			yuv2rgb_cfg[Y2R_CO2_CROP_X] = aie_cmb_u16(0, dif_x(aie_cfg));
> +			yuv2rgb_cfg[Y2R_CO2_CROP_Y] = aie_cmb_u16(0, dif_y(aie_cfg));
> +		} else {
> +			yuv2rgb_cfg[Y2R_CO2_CROP_X] = aie_cmb_u16(0, sr_crp_w - 1);
> +			yuv2rgb_cfg[Y2R_CO2_CROP_Y] = aie_cmb_u16(0, sr_crp_h - 1);
> +		}
> +	}
> +
> +	stride_pym0_out_w = round_up(pym0_out_w, 8);
> +
> +	yuv2rgb_cfg[Y2R_OUT_X_Y_SIZE0] =
> +		aie_cmb_u16(pym0_out_w - 1, pym0_out_h - 1);
> +	set_cmb_cfg(yuv2rgb_cfg, Y2R_OUT_STRIDE0_BUS_SIZE0, stride_pym0_out_w);
> +	yuv2rgb_cfg[Y2R_OUT_X_Y_SIZE1] =
> +		aie_cmb_u16(pym0_out_w - 1, pym0_out_h - 1);
> +	set_cmb_cfg(yuv2rgb_cfg, Y2R_OUT_STRIDE1_BUS_SIZE1, stride_pym0_out_w);
> +	yuv2rgb_cfg[Y2R_OUT_X_Y_SIZE2] =
> +		aie_cmb_u16(pym0_out_w - 1, pym0_out_h - 1);
> +	set_cmb_cfg(yuv2rgb_cfg, Y2R_OUT_STRIDE2_BUS_SIZE2, stride_pym0_out_w);
> +
> +	if (aie_cfg->en_padding) {
> +		yuv2rgb_cfg[Y2R_PADDING_EN_UP_DOWN] =
> +			1 | ((aie_cfg->src_padding.up << 4) & 0x1FF0) |
> +			((aie_cfg->src_padding.down << 16) & 0x01FF0000);
> +		yuv2rgb_cfg[Y2R_PADDING_RIGHT_LEFT] =
> +			(aie_cfg->src_padding.right & 0x01FF) |
> +			((aie_cfg->src_padding.left << 16) & 0x01FF0000);
> +	} else {
> +		yuv2rgb_cfg[Y2R_PADDING_EN_UP_DOWN] = 0;
> +		yuv2rgb_cfg[Y2R_PADDING_RIGHT_LEFT] = 0;
> +	}
> +
> +	yuv2rgb_cfg[Y2R_IN_0] = srcbuf;
> +	yuv2rgb_cfg[Y2R_IN_1] = srcbuf_UV;
> +
> +	yuv2rgb_cfg[Y2R_OUT_0] = (u32)fd->base_para->rs_pym_rst_pa[0][0];
> +	yuv2rgb_cfg[Y2R_OUT_1] = (u32)fd->base_para->rs_pym_rst_pa[0][1];
> +	yuv2rgb_cfg[Y2R_OUT_2] = (u32)fd->base_para->rs_pym_rst_pa[0][2];
> +
> +	yuv2rgb_cfg[Y2R_X_Y_MAG] = (xmag_0 & 0x3FFF) | ((ymag_0 << 16) & 0x3FFF0000);
> +
> +	if (sr_crp_w >= pym0_out_w) {
> +		/* down scale AIE1.0 by FRZ */
> +		yuv2rgb_cfg[Y2R_RS_SEL_SRZ_EN] =
> +			(yuv2rgb_cfg[Y2R_RS_SEL_SRZ_EN] & 0x00100070);
> +		yuv2rgb_cfg[Y2R_SRZ_HORI_STEP] = 0;
> +		yuv2rgb_cfg[Y2R_SRZ_VERT_STEP] = 0;
> +	} else {
> +		/* SRZ */
> +		/* 0: FDRZ for down scaling */
> +		/* 1: SRZ for up scaling */
> +		yuv2rgb_cfg[Y2R_RS_SEL_SRZ_EN] =
> +			(yuv2rgb_cfg[Y2R_RS_SEL_SRZ_EN] & 0x00100070) | SRZ_BIT;
> +		yuv2rgb_cfg[Y2R_SRZ_HORI_STEP] = ((sr_crp_w - 1) << 15) / (pym0_out_w - 1);
> +		yuv2rgb_cfg[Y2R_SRZ_VERT_STEP] = ((sr_crp_h - 1) << 15) / (pym0_out_h - 1);
> +	}
> +
> +	yuv2rgb_cfg[Y2R_CON_IN_BA_MSB] = (u32)0x02020202;
> +	yuv2rgb_cfg[Y2R_CON_OUT_BA_MSB] = (u32)0x02020202;
> +
> +	return 0;
> +}
> +
> +static int aie_config_rs(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
> +{
> +	u32 *rs_cfg;
> +	u32 *rs_tbl[2];
> +	u16 xmag_0, ymag_0;
> +	u16 pym_out_w[3];
> +	u16 pym_out_h[3];
> +	u16 round_w;
> +	u16 sr_crp_w;
> +	u16 sr_crp_h;
> +	int i;
> +
> +	sr_crp_w = fd->base_para->crop_rect.width;
> +	sr_crp_h = fd->base_para->crop_rect.height;
> +
> +	rs_cfg = (u32 *)fd->base_para->fd_rs_cfg_va;
> +
> +	pym_out_w[0] = fd->base_para->pyramid_rect.width;
> +	pym_out_w[1] = pym_out_w[0] >> 1;
> +	pym_out_w[2] = pym_out_w[1] >> 1;
> +
> +	pym_out_h[0] = pym_out_w[0] * sr_crp_h / sr_crp_w;
> +	pym_out_h[1] = pym_out_h[0] >> 1;
> +	pym_out_h[2] = pym_out_h[1] >> 1;
> +
> +	for (i = 0; i < 2; i++) {
> +		rs_tbl[i] = rs_cfg + fd->variant->rs_cfg_size * i;
> +
> +		rs_tbl[i][RS_IN_0] = (u32)fd->base_para->rs_pym_rst_pa[i][0];
> +		rs_tbl[i][RS_IN_1] = (u32)fd->base_para->rs_pym_rst_pa[i][1];
> +		rs_tbl[i][RS_IN_2] = (u32)fd->base_para->rs_pym_rst_pa[i][2];
> +
> +		rs_tbl[i][RS_OUT_0] = (u32)fd->base_para->rs_pym_rst_pa[i + 1][0];
> +		rs_tbl[i][RS_OUT_1] = (u32)fd->base_para->rs_pym_rst_pa[i + 1][1];
> +		rs_tbl[i][RS_OUT_2] = (u32)fd->base_para->rs_pym_rst_pa[i + 1][2];
> +
> +		rs_tbl[i][RS_INPUT_W_H] = (rs_tbl[i][RS_INPUT_W_H] & 0xF800F800) |
> +			(pym_out_h[i] & 0x7FF) | ((pym_out_w[i] << 16) & 0x7FF0000);
> +		rs_tbl[i][RS_OUTPUT_W_H] = (rs_tbl[i][RS_OUTPUT_W_H] & 0xF800F800) |
> +			(pym_out_h[i + 1] & 0x7FF) | ((pym_out_w[i + 1] << 16) & 0x7FF0000);
> +
> +		rs_tbl[i][RS_IN_X_Y_SIZE0] = aie_cmb_u16(pym_out_w[i] - 1, pym_out_h[i] - 1);
> +		rs_tbl[i][RS_IN_X_Y_SIZE1] = aie_cmb_u16(pym_out_w[i] - 1, pym_out_h[i] - 1);
> +		rs_tbl[i][RS_IN_X_Y_SIZE2] = aie_cmb_u16(pym_out_w[i] - 1, pym_out_h[i] - 1);
> +
> +		set_cmb_cfg(rs_tbl[i], RS_IN_STRIDE0, pym_out_w[i]);
> +		set_cmb_cfg(rs_tbl[i], RS_IN_STRIDE1, pym_out_w[i]);
> +		set_cmb_cfg(rs_tbl[i], RS_IN_STRIDE2, pym_out_w[i]);
> +
> +		rs_tbl[i][RS_OUT_X_Y_SIZE0] = aie_cmb_u16(pym_out_w[i + 1] - 1,
> +							  pym_out_h[i + 1] - 1);
> +		rs_tbl[i][RS_OUT_X_Y_SIZE1] = aie_cmb_u16(pym_out_w[i + 1] - 1,
> +							  pym_out_h[i + 1] - 1);
> +		rs_tbl[i][RS_OUT_X_Y_SIZE2] = aie_cmb_u16(pym_out_w[i + 1] - 1,
> +							  pym_out_h[i + 1] - 1);
> +
> +		if (i == 0)
> +			round_w = pym_out_w[i + 1];
> +		else
> +			round_w = round_up(pym_out_w[i + 1], 8);
> +
> +		set_cmb_cfg(rs_tbl[i], RS_OUT_STRIDE0, round_w);
> +		set_cmb_cfg(rs_tbl[i], RS_OUT_STRIDE1, round_w);
> +		set_cmb_cfg(rs_tbl[i], RS_OUT_STRIDE2, round_w);
> +
> +		xmag_0 = 512 * pym_out_w[i] / pym_out_w[i + 1];
> +		ymag_0 = xmag_0;
> +
> +		rs_tbl[i][RS_X_Y_MAG] = (xmag_0 & 0x3FFF) | ((ymag_0 << 16) & 0x3FFF0000);
> +		rs_tbl[i][RS_CON_IN_BA_MSB] = (u32)0x02020202;
> +		rs_tbl[i][RS_CON_OUT_BA_MSB] = (u32)0x02020202;
> +	}
> +
> +	return 0;
> +}
> +
> +static int aie_config_network(struct mtk_aie_dev *fd,
> +			      struct aie_enq_info *aie_cfg)
> +{
> +	struct aie_static_info *pstv = &fd->st_info;
> +	u16 pyramid0_out_w, pyramid0_out_h, pyramid1_out_h, pyramid2_out_h;
> +	u16 out_ysize_plus_1, out_ysize_plus_1_stride2;
> +	u16 input_height, out_height;
> +	u16 conv_width, conv_height;
> +	u16 fd_xsize[4];
> +	u32 *fd_cur_cfg, *fd_cur_set;
> +	u32 sr_crp_w, sr_crp_h;
> +	u32 cal_x, cal_y;
> +	u8 uch, uloop;
> +	u8 i, j;
> +	void *fd_cfg;
> +
> +	sr_crp_w = fd->base_para->crop_rect.width;
> +	sr_crp_h = fd->base_para->crop_rect.height;
> +
> +	pyramid0_out_w = fd->base_para->pyramid_rect.width;
> +	pyramid0_out_h = pyramid0_out_w * sr_crp_h / sr_crp_w;
> +
> +	pyramid1_out_h = pyramid0_out_h / 2;
> +	pyramid2_out_h = pyramid1_out_h / 2;
> +
> +	fd_cfg = fd->base_para->fd_fd_cfg_va;
> +
> +	for (i = 0; i < FD_LOOP_NUM; i++) {
> +		fd_cur_cfg = (u32 *)fd_cfg + fd->variant->fd_cfg_size * i;
> +		fd_cur_cfg[FD_INPUT_ROTATE] = (fd_cur_cfg[FD_INPUT_ROTATE] & 0xFFFF0FFF) |
> +				     ((aie_cfg->rotate_degree << 12) & 0x3000);
> +
> +		if (i == 0)
> +			input_height = pyramid2_out_h;
> +		else if (i == (RPN2_LOOP_NUM + 1))
> +			input_height = pyramid1_out_h;
> +		else if (i == (RPN1_LOOP_NUM + 1))
> +			input_height = pyramid0_out_h;
> +		else
> +			if (fd_out_stride2_in[i] == 0)
> +				input_height = out_height;
> +			else
> +				input_height = (out_height + 1) / 2;
> +
> +		if (fd_maxpool[i] == 1 && fd_stride[i] == 1)

When fd_maxpool[i] == 1, it imply that fd_stride[i] == 1, so you just need to check

if (fd_maxpool[i] == 1)

> +			out_height =
> +				DIV_ROUND_UP(input_height, 2 * fd_maxpool[i]);
> +		else
> +			out_height = DIV_ROUND_UP(input_height, fd_stride[i] + 2 * fd_maxpool[i]);
> +
> +		if (i == RPN0_LOOP_NUM || i == RPN1_LOOP_NUM || i == RPN2_LOOP_NUM) {
> +			conv_width = fd->base_para->img_rect.width;
> +			conv_height = fd->base_para->img_rect.height;
> +			fd_xsize[0] = pstv->inf_elm[i].img_width * 2 * 16 * ANCHOR_EN_NUM - 1;
> +			fd_xsize[3] = pstv->inf_elm[i].img_width * 2 * 32 * ANCHOR_EN_NUM - 1;
> +			fd_xsize[2] = fd_xsize[3];
> +			fd_xsize[1] = fd_xsize[2];
> +		} else {
> +			conv_width = DIV_ROUND_UP(pstv->inf_elm[i].img_width, fd_stride[i]);
> +			conv_height = DIV_ROUND_UP(input_height, fd_stride[i]);
> +
> +			fd_xsize[3] = pstv->inf_elm[i].input_xsize_plus_1 - 1;

In aie_update_table(), is calculated as:

		if (in_ch_pack[i] == 1)
			pstv->inf_elm[i].input_xsize_plus_1 =
				round_up(pstv->inf_elm[i].img_width, 8);
		else
			pstv->inf_elm[i].input_xsize_plus_1 =
				pstv->inf_elm[i].img_width * in_ch_pack[i];

in_ch_pack[i] == 1 imply that (i == RPN0_LOOP_NUM || i == RPN1_LOOP_NUM || i == RPN2_LOOP_NUM),
and when (i == RPN0_LOOP_NUM || i == RPN1_LOOP_NUM || i == RPN2_LOOP_NUM) it never use input_xsize_plus_1,
so only the else part is valid.

			pstv->inf_elm[i].input_xsize_plus_1 =
				pstv->inf_elm[i].img_width * in_ch_pack[i];

And this calculation could be moved here,

fd_xsize[3] = pstv->inf_elm[i].img_width * in_ch_pack[i] - 1;

and input_xsize_plus_1 could be dropped.

> +			fd_xsize[2] = fd_xsize[3];
> +			fd_xsize[1] = fd_xsize[2];
> +			fd_xsize[0] = fd_xsize[1];
> +		}
> +
> +		fd_cur_cfg[FD_CONV_WIDTH_MOD6] = (fd_cur_cfg[FD_CONV_WIDTH_MOD6] & 0xFF8FFFFF) |
> +						 (((conv_width % 6) << 20) & 0x00700000);
> +		fd_cur_cfg[FD_CONV_IMG_W_H] = aie_cmb_u16(conv_height, conv_width);
> +
> +		fd_cur_cfg[FD_IN_IMG_W_H] = aie_cmb_u16(input_height, pstv->inf_elm[i].img_width);
> +		fd_cur_cfg[FD_OUT_IMG_W_H] = aie_cmb_u16(out_height, pstv->inf_elm[i].out_width);
> +
> +		if (fd_rdma_en[i][0][0] != -1) {
> +			for (j = 0; j < 4; j++) {
> +				fd_cur_cfg[FD_IN_X_Y_SIZE0 + 2 * j] =
> +					aie_cmb_u16(fd_xsize[j], input_height - 1);
> +				set_cmbst_cfg(fd_cur_cfg, FD_IN_STRIDE0_BUS_SIZE0 + 2 * j,
> +					      fd_xsize[j] + 1);
> +			}
> +		}
> +
> +		out_ysize_plus_1 = out_height - 1;

I think this variable name should be out_ysize_minus_1.

> +		out_ysize_plus_1_stride2 = (out_height + 1) / 2 - 1;

I think this variable name should be out_ysize_minus_1_stride2

> +
> +		for (j = 0; j < OUTPUT_WDMA_WRA_NUM; j++) {
> +			fd_cur_set = fd_cur_cfg + 2 * j;
> +			if (!fd_wdma_en[i][j])
> +				continue;
> +
> +			if (out_stride_size[i][j] == 1) {
> +				fd_cur_set[FD_OUT_X_Y_SIZE0] =
> +					aie_cmb_u16(pstv->inf_elm[i].out_xsize_plus_1 - 1,
> +						    out_ysize_plus_1);
> +				set_cmbst_cfg(fd_cur_set, FD_OUT_STRIDE0_BUS_SIZE0,
> +					      pstv->inf_elm[i].out_stride);
> +			} else if (out_stride_size[i][j] == 2) {
> +				fd_cur_set[FD_OUT_X_Y_SIZE0] =
> +					aie_cmb_u16(pstv->inf_elm[i].out_xsize_plus_1_stride2 - 1,
> +						    out_ysize_plus_1_stride2);
> +				set_cmbst_cfg(fd_cur_set, FD_OUT_STRIDE0_BUS_SIZE0,
> +					      pstv->inf_elm[i].out_stride_stride2);
> +			}
> +		}
> +
> +		if (i == RPN0_LOOP_NUM || i == RPN1_LOOP_NUM || i == RPN2_LOOP_NUM)
> +			set_cmb_cfg(fd_cur_cfg, FD_RPN_SET, fd->base_para->rpn_anchor_thrd);
> +
> +		if (i == RPN0_LOOP_NUM) {
> +			cal_x = ((sr_crp_w << 10) * 100 /
> +				 (int)fd->base_para->pyramid_rect.width) >> 10;
> +			cal_y = cal_x * 512 / 100;
> +			fd_cur_cfg[FD_IMAGE_COORD] = (fd_cur_cfg[FD_IMAGE_COORD] & 0xF) |
> +						     ((cal_y << 4) & 0x7FFF0);
> +			fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] = 0;
> +			if (aie_cfg->en_roi) {
> +				fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] =
> +					(aie_cfg->src_roi.x1 - aie_cfg->src_padding.left) |
> +					(aie_cfg->src_roi.y1 - aie_cfg->src_padding.up) << 16;
> +			}
> +		} else if (i == RPN1_LOOP_NUM) {
> +			cal_x = ((sr_crp_w << 10) * 100 /
> +				(int)fd->base_para->pyramid_rect.width) >> 10;
> +			cal_y = cal_x * 2 * 512 / 100;
> +			fd_cur_cfg[FD_IMAGE_COORD] = (fd_cur_cfg[FD_IMAGE_COORD] & 0xF) |
> +						     ((cal_y << 4) & 0x7FFF0);
> +			fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] = 0;
> +			if (aie_cfg->en_roi) {
> +				fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] =
> +					(aie_cfg->src_roi.x1 - aie_cfg->src_padding.left) |
> +					(aie_cfg->src_roi.y1 - aie_cfg->src_padding.up) << 16;
> +			}
> +		} else if (i == RPN2_LOOP_NUM) {
> +			cal_x = ((sr_crp_w << 10) * 100 /
> +				(int)fd->base_para->pyramid_rect.width) >> 10;
> +			cal_y = cal_x * 4 * 512 / 100;
> +			fd_cur_cfg[FD_IMAGE_COORD] = (fd_cur_cfg[FD_IMAGE_COORD] & 0xF) |
> +						     ((cal_y << 4) & 0x7FFF0);
> +			fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] = 0;
> +			if (aie_cfg->en_roi) {
> +				fd_cur_cfg[FD_IMAGE_COORD_XY_OFST] =
> +					(aie_cfg->src_roi.x1 - aie_cfg->src_padding.left) |
> +					(aie_cfg->src_roi.y1 - aie_cfg->src_padding.up) << 16;
> +			}
> +		}
> +
> +		/* IN_FM_BASE_ADR */
> +		if (i == 0) {
> +			fd_cur_cfg[FD_IN_0] = (u32)(fd->base_para->rs_pym_rst_pa[2][0]);
> +			fd_cur_cfg[FD_IN_1] = (u32)(fd->base_para->rs_pym_rst_pa[2][1]);
> +			fd_cur_cfg[FD_IN_2] = (u32)(fd->base_para->rs_pym_rst_pa[2][2]);
> +		} else if (i == (RPN2_LOOP_NUM + 1)) {
> +			fd_cur_cfg[FD_IN_0] = (u32)(fd->base_para->rs_pym_rst_pa[1][0]);
> +			fd_cur_cfg[FD_IN_1] = (u32)(fd->base_para->rs_pym_rst_pa[1][1]);
> +			fd_cur_cfg[FD_IN_2] = (u32)(fd->base_para->rs_pym_rst_pa[1][2]);
> +		} else if (i == (RPN1_LOOP_NUM + 1)) {
> +			fd_cur_cfg[FD_IN_0] = (u32)(fd->base_para->rs_pym_rst_pa[0][0]);
> +			fd_cur_cfg[FD_IN_1] = (u32)(fd->base_para->rs_pym_rst_pa[0][1]);
> +			fd_cur_cfg[FD_IN_2] = (u32)(fd->base_para->rs_pym_rst_pa[0][2]);
> +		} else {
> +			for (j = 0; j < INPUT_WDMA_WRA_NUM; j++) {
> +				if (fd_rdma_en[i][j][0] != -1) {
> +					uloop = fd_rdma_en[i][j][0];
> +					uch = fd_rdma_en[i][j][1];
> +					fd_cur_cfg[FD_IN_0 + j] =
> +						(u32)(fd->dma_para->fd_out_hw_pa[uloop][uch]);
> +				}
> +			}
> +		}
> +
> +		/* OUT_FM_BASE_ADR */
> +		for (j = 0; j < OUTPUT_WDMA_WRA_NUM; j++) {
> +			if (fd_wdma_en[i][j])
> +				fd_cur_cfg[FD_OUT_0 + j] =
> +					(u32)(fd->dma_para->fd_out_hw_pa[i][j]);
> +		}
> +
> +		/* KERNEL_BASE_ADR */
> +		for (j = 0; j < KERNEL_RDMA_RA_NUM; j++) {
> +			if (fd_ker_rdma_size[i][j])
> +				fd_cur_cfg[FD_KERNEL_0 + j] =
> +					(u32)(fd->dma_para->fd_kernel_pa[i][j]);
> +		}
> +
> +		fd_cur_cfg[FD_CON_IN_BA_MSB] = (u32)0x02020202;
> +		fd_cur_cfg[FD_CON_OUT_BA_MSB] = (u32)0x02020202;
> +		fd_cur_cfg[FD_CON_KERNEL_BA_MSB] = (u32)0x00000202;
> +	}
> +
> +	return 0;
> +}
> +
> +static int aie_config_attr_network(struct mtk_aie_dev *fd,
> +				   struct aie_enq_info *aie_cfg)
> +{
> +	bool is_regression_loop;
> +	void *fd_cfg;
> +	u32 *fd_cur_cfg;
> +	u16 fd_input_ht, fd_output_ht;
> +	u16 fd_out_y[4];
> +	u8 i, j;
> +	u8 uloop, uch, uidx;
> +	u16 pyramid0_out_w, pyramid0_out_h;
> +	int fd_conv_ht;
> +	u16 sr_crp_w, sr_crp_h;
> +
> +	sr_crp_w = fd->attr_para->crop_width[fd->attr_para->w_idx];
> +	sr_crp_h = fd->attr_para->crop_height[fd->attr_para->w_idx];
> +
> +	pyramid0_out_w = ATTR_MODE_PYRAMID_WIDTH;
> +	pyramid0_out_h = pyramid0_out_w * sr_crp_h / sr_crp_w;
> +
> +	fd_cfg = fd->base_para->attr_fd_cfg_va[fd->attr_para->w_idx];
> +
> +	for (i = 0; i < ATTR_LOOP_NUM; i++) {
> +		fd_cur_cfg = (u32 *)fd_cfg + fd->variant->fd_cfg_size * i;
> +		fd_cur_cfg[FD_INPUT_ROTATE] =
> +			(fd_cur_cfg[FD_INPUT_ROTATE] & 0xFFFF0FFF) |
> +			((aie_cfg->rotate_degree << 12) & 0x3000);
> +		if (i == 0)
> +			fd_input_ht = pyramid0_out_h;
> +		else
> +			if (attr_out_stride2_as_in[i] == 0)
> +				fd_input_ht = fd_output_ht;
> +			else if (attr_out_stride2_as_in[i] == 1)
> +				fd_input_ht = (fd_output_ht + 1) / 2;
> +
> +		fd_output_ht = DIV_ROUND_UP(fd_input_ht, attr_fd_stride[i] +
> +					    2 * attr_fd_maxpool[i]);
> +		fd_conv_ht = DIV_ROUND_UP(fd_input_ht, attr_fd_stride[i]);
> +
> +		fd_cur_cfg[FD_CONV_IMG_W_H] =
> +			(fd_cur_cfg[FD_CONV_IMG_W_H] & 0xFFFF0000) |
> +			(fd_conv_ht & 0xFFFF);
> +		fd_cur_cfg[FD_IN_IMG_W_H] =
> +			(fd_cur_cfg[FD_IN_IMG_W_H] & 0xFFFF0000) |
> +			(fd_input_ht & 0xFFFF);
> +		fd_cur_cfg[FD_OUT_IMG_W_H] =
> +			(fd_cur_cfg[FD_OUT_IMG_W_H] & 0xFFFF0000) |
> +			(fd_output_ht & 0xFFFF);
> +		set_cmb_cfg(fd_cur_cfg, FD_IN_X_Y_SIZE0, fd_input_ht - 1);
> +		set_cmb_cfg(fd_cur_cfg, FD_IN_X_Y_SIZE1, fd_input_ht - 1);
> +		set_cmb_cfg(fd_cur_cfg, FD_IN_X_Y_SIZE2, fd_input_ht - 1);
> +		set_cmb_cfg(fd_cur_cfg, FD_IN_X_Y_SIZE3, fd_input_ht - 1);
> +
> +		is_regression_loop = (i == AGE_OUT_RGS || i == GENDER_OUT_RGS ||
> +					      i == INDIAN_OUT_RGS || i == RACE_OUT_RGS);
> +
> +		if (is_regression_loop) {
> +			fd_out_y[0] = 0;
> +			fd_out_y[1] = 0;
> +			fd_out_y[2] = 0;
> +			fd_out_y[3] = 0;
> +		} else {
> +			fd_out_y[0] = fd_output_ht - 1;
> +			fd_out_y[1] = fd_output_ht - 1;
> +			if (attr_out_2size[i] == 0) {
> +				fd_out_y[2] = fd_output_ht - 1;
> +				fd_out_y[3] = fd_output_ht - 1;
> +			} else {
> +				fd_out_y[2] = (fd_output_ht + 1) / 2 - 1;
> +				fd_out_y[3] = (fd_output_ht + 1) / 2 - 1;
> +			}
> +		}
> +
> +		for (j = 0; j < 4; j++)
> +			set_cmb_cfg(fd_cur_cfg, FD_OUT_X_Y_SIZE0 + 2 * j, fd_out_y[j]);
> +
> +		/* IN_FM_BASE_ADR */
> +		if (i == 0) {
> +			fd_cur_cfg[FD_IN_0] = (u32)(fd->base_para->rs_pym_rst_pa[0][0]);
> +			fd_cur_cfg[FD_IN_1] = (u32)(fd->base_para->rs_pym_rst_pa[0][1]);
> +			fd_cur_cfg[FD_IN_2] = (u32)(fd->base_para->rs_pym_rst_pa[0][2]);
> +		} else {
> +			for (j = 0; j < INPUT_WDMA_WRA_NUM; j++) {
> +				if (attr_rdma_en[i][j][0] != -1) {
> +					uloop = attr_rdma_en[i][j][0];
> +					uch = attr_rdma_en[i][j][1];
> +					fd_cur_cfg[FD_IN_0 + j] =
> +						(u32)(fd->dma_para->attr_out_hw_pa[uloop][uch]);
> +				}
> +			}
> +		}
> +
> +		/* OUT_FM_BASE_ADR */
> +		for (j = 0; j < OUTPUT_WDMA_WRA_NUM; j++) {
> +			if (attr_wdma_en[i][j]) {
> +				uidx = fd->attr_para->w_idx;
> +				if (i == AGE_OUT_RGS && j == 0)
> +					fd_cur_cfg[FD_OUT_0 + j] =
> +						(u32)(fd->dma_para->age_out_hw_pa[uidx]);
> +				else if (i == GENDER_OUT_RGS && j == 0)
> +					fd_cur_cfg[FD_OUT_0 + j] =
> +						(u32)(fd->dma_para->gender_out_hw_pa[uidx]);
> +				else if (i == INDIAN_OUT_RGS && j == 0)
> +					fd_cur_cfg[FD_OUT_0 + j] =
> +						(u32)(fd->dma_para->is_indian_out_hw_pa[uidx]);
> +				else if (i == RACE_OUT_RGS && j == 0)
> +					fd_cur_cfg[FD_OUT_0 + j] =
> +						(u32)(fd->dma_para->race_out_hw_pa[uidx]);
> +				else
> +					fd_cur_cfg[FD_OUT_0 + j] =
> +						(u32)(fd->dma_para->attr_out_hw_pa[i][j]);
> +			}
> +		}
> +
> +		/* KERNEL_BASE_ADR */
> +		for (j = 0; j < KERNEL_RDMA_RA_NUM; j++) {
> +			fd_cur_cfg[FD_KERNEL_0 + j] =
> +				(u32)(fd->dma_para->attr_kernel_pa[i][j]);
> +		}
> +
> +		fd_cur_cfg[FD_CON_IN_BA_MSB] = (u32)0x02020202;
> +		fd_cur_cfg[FD_CON_OUT_BA_MSB] = (u32)0x02020202;
> +		fd_cur_cfg[FD_CON_KERNEL_BA_MSB] = (u32)0x00000202;
> +	}
> +	return 0;
> +}
> +
> +static int aie_config_dram(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
> +{
> +	int ret;
> +
> +	ret = aie_config_y2r(fd, aie_cfg, aie_cfg->sel_mode);
> +	if (ret)
> +		return ret;
> +
> +	if (aie_cfg->sel_mode == FDMODE) {
> +		ret = aie_config_rs(fd, aie_cfg);
> +		if (ret)
> +			return ret;
> +
> +		ret = aie_config_network(fd, aie_cfg);
> +		if (ret)
> +			return ret;
> +
> +	} else if (aie_cfg->sel_mode == ATTRIBUTEMODE) {
> +		ret = aie_config_attr_network(fd, aie_cfg);
> +		if (ret)
> +			return ret;
> +	} else {
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +void aie_reset(struct mtk_aie_dev *fd)
> +{
> +	writel(RESET_BIT, fd->fd_base + AIE_START_REG);
> +	writel(0x0, fd->fd_base + AIE_START_REG);
> +}
> +
> +int aie_init(struct mtk_aie_dev *fd, struct v4l2_ctrl_aie_init *user_init)
> +{
> +	int ret;
> +	int i, j;
> +
> +	if (fd->fd_state & STATE_INIT) {
> +		dev_err(fd->dev, "%s fd state: %d\n", __func__, fd->fd_state);
> +		return -EINVAL;
> +	}
> +
> +	fd->fd_state &= ~STATE_INIT;
> +	fd->fd_mem_size = 0;
> +
> +	fd->base_para = kmalloc(sizeof(*fd->base_para), GFP_KERNEL);
> +	if (!fd->base_para)
> +		return -ENOMEM;
> +
> +	fd->attr_para = kmalloc(sizeof(*fd->attr_para), GFP_KERNEL);
> +	if (!fd->attr_para) {
> +		ret = -ENOMEM;
> +		goto kmalloc_fail;
> +	}
> +
> +	fd->dma_para = kmalloc(sizeof(*fd->dma_para), GFP_KERNEL);
> +	if (!fd->dma_para) {
> +		ret = -ENOMEM;
> +		goto kmalloc_fail;
> +	}
> +
> +	fd->fld_para = kmalloc(sizeof(*fd->fld_para), GFP_KERNEL);
> +	if (!fd->fld_para) {
> +		ret = -ENOMEM;
> +		goto kmalloc_fail;
> +	}
> +
> +	fd->base_para->rpn_anchor_thrd =
> +		(signed short)(user_init->feature_threshold & 0x0000FFFF);
> +	fd->base_para->pyramid_rect.width = user_init->pyramid_width;
> +	fd->base_para->pyramid_rect.height = user_init->pyramid_height;
> +	fd->base_para->max_pyramid_rect.width = user_init->pyramid_width;
> +	fd->base_para->max_pyramid_rect.height = user_init->pyramid_height;
> +
> +	fd->base_para->fd_fd_cfg_va = NULL;
> +	fd->base_para->fd_rs_cfg_va = NULL;
> +	fd->base_para->fd_yuv2rgb_cfg_va = NULL;
> +	for (i = 0; i < MAX_ENQUE_FRAME_NUM; i++)
> +		fd->base_para->attr_fd_cfg_va[i] = NULL;
> +	for (i = 0; i < MAX_ENQUE_FRAME_NUM; i++)
> +		fd->base_para->attr_yuv2rgb_cfg_va[i] = NULL;
> +	for (i = 0; i < PYM_NUM; i++)
> +		for (j = 0; j < COLOR_NUM; j++)
> +			fd->base_para->rs_pym_rst_va[i][j] = NULL;
> +
> +	memset(&fd->st_info, 0, sizeof(struct aie_static_info));
> +	aie_update_table(fd, fd->base_para->max_pyramid_rect.width,
> +			 fd->base_para->max_pyramid_rect.height, 1);
> +	aie_update_buf_params(fd, user_init->max_img_width,
> +			      user_init->max_img_height);
> +	ret = aie_alloc_dram_buf(fd);
> +	if (ret)
> +		return -ENOMEM;
> +
> +	ret = aie_alloc_output_buf(fd);
> +	if (ret) {
> +		ret = -ENOMEM;
> +		goto free_all;
> +	}
> +
> +	ret = aie_alloc_fddma_buf(fd);
> +	if (ret) {
> +		ret = -ENOMEM;
> +		goto free_all;
> +	}
> +
> +	ret = aie_alloc_fld_buf(fd);
> +	if (ret) {
> +		ret = -ENOMEM;
> +		goto free_all;
> +	}
> +
> +	aie_arrange_fddma_buf(fd);
> +	aie_arrange_kernel_buf(fd);
> +	aie_arrange_attrdma_buf(fd);
> +	aie_arrange_result_dma_buf(fd);
> +
> +	aie_arrange_fld_buf(fd);
> +
> +	ret = aie_load_fw(fd);
> +	if (ret) {
> +		dev_err(fd->dev, "Failed to load aie fw\n");
> +		goto free_all;
> +	}
> +
> +	fd->attr_para->r_idx = 0;
> +	fd->attr_para->w_idx = 0;
> +
> +	fd->fd_state |= STATE_INIT;
> +
> +	dev_dbg(fd->dev, "%s: fd_mem_size(%d)\n", __func__, fd->fd_mem_size);
> +
> +	return 0;
> +
> +free_all:
> +	aie_free_dram_buf(fd);
> +	aie_free_output_buf(fd);
> +	aie_free_fddma_buf(fd);
> +	aie_free_fld_buf(fd);
> +
> +kmalloc_fail:
> +	kfree(fd->base_para);
> +	kfree(fd->attr_para);
> +	kfree(fd->dma_para);
> +	kfree(fd->fld_para);
> +
> +	dev_err(fd->dev, "Failed to init aie\n");
> +
> +	return ret;
> +}
> +
> +void aie_uninit(struct mtk_aie_dev *fd)
> +{
> +	fd->fd_state &= ~STATE_INIT;
> +	aie_free_dram_buf(fd);
> +	aie_free_output_buf(fd);
> +	aie_free_fddma_buf(fd);
> +	aie_free_fld_buf(fd);
> +
> +	kfree(fd->base_para);
> +	kfree(fd->attr_para);
> +	kfree(fd->dma_para);
> +	kfree(fd->fld_para);
> +}
> +
> +void aie_prepare(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
> +{
> +	if (aie_cfg->sel_mode == FLDMODE)
> +		return;
> +
> +	memset(&fd->reg_cfg, 0, sizeof(struct aie_reg_cfg));
> +
> +	if (aie_cfg->pyramid_base_width == 0) {
> +		fd->base_para->pyramid_rect.width = fd->base_para->max_pyramid_rect.width;

fd->base_para->pyramid_rect.width is always equal to fd->base_para->max_pyramid_rect.width,
so this assignment is not necessary.

> +		fd->base_para->pyramid_rect.height = fd->base_para->max_pyramid_rect.height;

fd->base_para->pyramid_rect.height is always equal to fd->base_para->max_pyramid_rect.height,
so this assignment is not necessary.

> +		fd->base_para->number_of_pyramid = 3;
> +	} else {
> +		fd->base_para->pyramid_rect.height =
> +			fd->base_para->max_pyramid_rect.height;
> +		fd->base_para->number_of_pyramid = aie_cfg->number_of_pyramid;
> +		if (aie_cfg->pyramid_base_width !=
> +		    fd->base_para->pyramid_rect.width) {
> +			dev_dbg(fd->dev,
> +				"pre: %d cur: %d num: %d\n",
> +				fd->base_para->pyramid_rect.width,
> +				aie_cfg->pyramid_base_width,
> +				fd->base_para->number_of_pyramid
> +			);
> +			fd->base_para->pyramid_rect.width =
> +				aie_cfg->pyramid_base_width;
> +			aie_update_table(fd, fd->base_para->pyramid_rect.width,
> +					 fd->base_para->pyramid_rect.height, 0);
> +			aie_update_fddma_buf(fd);
> +		}
> +	}
> +
> +	//Init output va array
> +	if (aie_cfg->sel_mode == FDMODE) {
> +		memset(fd->rs_output_hw.va, 0, fd->rs_output_hw.size);
> +		memset(fd->dma_para->fd_out_hw_va[RPN0_LOOP_NUM][0], 0,
> +		       RESULT_SIZE);
> +		memset(fd->dma_para->fd_out_hw_va[RPN1_LOOP_NUM][0], 0,
> +		       RESULT_SIZE);
> +		memset(fd->dma_para->fd_out_hw_va[RPN2_LOOP_NUM][0], 0,
> +		       RESULT_SIZE);
> +	} else if (aie_cfg->sel_mode == ATTRIBUTEMODE) {
> +		memset(fd->base_para->rs_pym_rst_va[0][0], 0,
> +		       fd->rs_pym_out_size[0]);
> +		memset(fd->base_para->rs_pym_rst_va[0][1], 0,
> +		       fd->rs_pym_out_size[0]);
> +		memset(fd->base_para->rs_pym_rst_va[0][2], 0,
> +		       fd->rs_pym_out_size[0]);
> +	}
> +
> +	fd->reg_cfg.fd_mode = aie_cfg->sel_mode;
> +	if (aie_cfg->sel_mode == FDMODE) {
> +		fd->reg_cfg.rs_adr = (u32)fd->base_para->fd_rs_cfg_pa;
> +		fd->reg_cfg.yuv2rgb_adr = (u32)fd->base_para->fd_yuv2rgb_cfg_pa;
> +		fd->reg_cfg.fd_adr = (u32)fd->base_para->fd_fd_cfg_pa +
> +					     fd->variant->fd_cfg_size * 4 *
> +					     FD_LOOP_NUM / 3 *
> +					     (3 - aie_cfg->number_of_pyramid);
> +
> +	} else if (aie_cfg->sel_mode == ATTRIBUTEMODE) {
> +		fd->reg_cfg.yuv2rgb_adr =
> +			(u32)fd->base_para->attr_yuv2rgb_cfg_pa[fd->attr_para->w_idx];
> +		fd->reg_cfg.fd_adr =
> +			(u32)fd->base_para->attr_fd_cfg_pa[fd->attr_para->w_idx];
> +	} else {
> +		dev_err(fd->dev, "Invalid Mode: %d", aie_cfg->sel_mode);
> +	}
> +
> +	aie_update_cfg(fd, aie_cfg);
> +
> +	aie_config_dram(fd, aie_cfg);
> +
> +	if (aie_cfg->sel_mode == ATTRIBUTEMODE)
> +		fd->attr_para->w_idx = (fd->attr_para->w_idx + 1) % MAX_ENQUE_FRAME_NUM;
> +}
> +
> +static void aie_execute_face_detection(struct mtk_aie_dev *fd,
> +				       struct aie_enq_info *aie_cfg)
> +{
> +	unsigned int loop_num, loop_reg_val;
> +
> +	writel(0x0, fd->fd_base + AIE_START_REG);
> +	writel(0x00000111, fd->fd_base + AIE_ENABLE_REG);
> +	loop_num = FD_LOOP_NUM / 3 * (aie_cfg->number_of_pyramid);
> +	loop_reg_val = (loop_num << 8) | (aie_cfg->number_of_pyramid - 1);
> +	writel(loop_reg_val, fd->fd_base + AIE_LOOP_REG);
> +	writel(0x1, fd->fd_base + AIE_INT_EN_REG);
> +	writel(fd->reg_cfg.rs_adr, fd->fd_base + AIE_RS_CON_BASE_ADR_REG);
> +	writel(fd->reg_cfg.fd_adr, fd->fd_base + AIE_FD_CON_BASE_ADR_REG);
> +	writel(fd->reg_cfg.yuv2rgb_adr, fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_REG);
> +	writel(0x00000002, fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_MSB);
> +	writel(0x00000002, fd->fd_base + AIE_RS_CON_BASE_ADR_MSB);
> +	writel(0x00000002, fd->fd_base + AIE_FD_CON_BASE_ADR_MSB);
> +	writel(0x1, fd->fd_base + AIE_START_REG);
> +}
> +
> +static void aie_execute_attribute_detection(struct mtk_aie_dev *fd,
> +					    struct aie_enq_info *aie_cfg)
> +{
> +	writel(0x0, fd->fd_base + AIE_START_REG);
> +	writel(0x00000101, fd->fd_base + AIE_ENABLE_REG);
> +	writel(0x00001A00, fd->fd_base + AIE_LOOP_REG);
> +	writel(0x1, fd->fd_base + AIE_INT_EN_REG);
> +	writel(fd->reg_cfg.rs_adr, fd->fd_base + AIE_RS_CON_BASE_ADR_REG);
> +	writel(fd->reg_cfg.fd_adr, fd->fd_base + AIE_FD_CON_BASE_ADR_REG);
> +	writel(fd->reg_cfg.yuv2rgb_adr, fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_REG);
> +	writel(0x00000002, fd->fd_base + AIE_YUV2RGB_CON_BASE_ADR_MSB);
> +	writel(0x00000002, fd->fd_base + AIE_RS_CON_BASE_ADR_MSB);
> +	writel(0x00000002, fd->fd_base + AIE_FD_CON_BASE_ADR_MSB);
> +	writel(0x1, fd->fd_base + AIE_START_REG);
> +}
> +
> +static void aie_execute_fld_detection(struct mtk_aie_dev *fd,
> +				      struct aie_enq_info *aie_cfg)
> +{
> +	unsigned int i;
> +
> +	writel(0x10, fd->fd_base + AIE_START_REG);
> +	writel(0x00011111, fd->fd_base + AIE_DMA_CTL_REG);
> +	writel(0x01111111, fd->fd_base + FLD_EN);
> +	writel(0x1, fd->fd_base + AIE_INT_EN_REG);
> +	for (i = 0; i < aie_cfg->fld_face_num; i++) {
> +		writel(aie_cfg->src_img_addr, fd->fd_base + FLD_BASE_ADDR_FACE_0 + i * 0x4);
> +		writel(aie_cfg->fld_input[i].fld_in_crop_x1 << 16 |
> +		       aie_cfg->fld_input[i].fld_in_crop_y1,
> +		       fd->fd_base + fld_face_info_0[i]);
> +		writel(aie_cfg->fld_input[i].fld_in_crop_x2 << 16 |
> +		       aie_cfg->fld_input[i].fld_in_crop_y2,
> +		       fd->fd_base + fld_face_info_1[i]);
> +		writel(aie_cfg->fld_input[i].fld_in_rip << 4 |
> +		       aie_cfg->fld_input[i].fld_in_rop,
> +		       fd->fd_base + fld_face_info_2[i]);
> +	}
> +
> +	writel(aie_cfg->fld_face_num << 28 | FLD_FOREST << 16 |
> +	       FLD_POINT, fd->fd_base + FLD_MODEL_PARA1);
> +	writel(13 << 16 | 0xfe9, fd->fd_base + FLD_MODEL_PARA14);
> +	writel(aie_cfg->src_img_width << 16 | aie_cfg->src_img_height,
> +	       fd->fd_base + FLD_SRC_WD_HT);
> +
> +	/*input settings*/
> +	writel(0x007c003f, fd->fd_base + FLD_PL_IN_SIZE_0);
> +	writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_0);
> +	writel(0x007c003f, fd->fd_base + FLD_PL_IN_SIZE_1);
> +	writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_1);
> +	writel(0x0016003f, fd->fd_base + FLD_PL_IN_SIZE_2_0);
> +	writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_2_0);
> +	writel(0x0013003f, fd->fd_base + FLD_PL_IN_SIZE_2_1);
> +	writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_2_1);
> +	writel(0x0013003f, fd->fd_base + FLD_PL_IN_SIZE_2_2);
> +	writel(0x0040000f, fd->fd_base + FLD_PL_IN_STRIDE_2_2);
> +	writel(0x00a6001f, fd->fd_base + FLD_PL_IN_SIZE_3);
> +	writel(0x0020000f, fd->fd_base + FLD_PL_IN_STRIDE_3);
> +
> +	/*output setting*/
> +	writel((2400 * aie_cfg->fld_face_num - 1) << 16 | 127,
> +	       fd->fd_base + FLD_SH_IN_SIZE_0);
> +	writel(0x0010000f, fd->fd_base + FLD_SH_IN_STRIDE_0);
> +	writel(fd->fld_para->fld_output_pa[0],
> +	       fd->fd_base + FLD_TR_OUT_BASE_ADDR_0);
> +	writel((aie_cfg->fld_face_num - 1) << 16 | 0x6f,
> +	       fd->fd_base + FLD_TR_OUT_SIZE_0);
> +	writel(0x0070000f, fd->fd_base + FLD_TR_OUT_STRIDE_0);
> +	writel(fd->fld_para->fld_output_pa[0],
> +	       fd->fd_base + FLD_PP_OUT_BASE_ADDR_0);
> +	writel((aie_cfg->fld_face_num - 1) << 16 | 0x6f,
> +	       fd->fd_base + FLD_PP_OUT_SIZE_0);
> +	writel(0x0070000f, fd->fd_base + FLD_PP_OUT_STRIDE_0);
> +
> +	/*cv score*/
> +	writel(0x00000001, fd->fd_base + FLD_BS_BIAS);
> +	writel(0x0000b835, fd->fd_base + FLD_CV_FM_RANGE_0);
> +	writel(0xffff5cba, fd->fd_base + FLD_CV_FM_RANGE_1);
> +	writel(0x00005ed5, fd->fd_base + FLD_CV_PM_RANGE_0);
> +	writel(0xffff910d, fd->fd_base + FLD_CV_PM_RANGE_1);
> +	writel(0x0000031e, fd->fd_base + FLD_BS_RANGE_0);
> +	writel(0xfffffcae, fd->fd_base + FLD_BS_RANGE_1);
> +
> +	/* 6 steps */
> +	writel(fd->fld_para->fld_step_pa[FLD_STEP_BLINK][14],
> +	       fd->fd_base + FLD_BS_IN_BASE_ADDR_14);
> +
> +	for (i = 0; i < 15; i++) {
> +		writel(fd->fld_para->fld_step_pa[FLD_STEP_CV][i],
> +		       fd->fd_base + FLD_PL_IN_BASE_ADDR_2_(i));
> +
> +		writel(fd->fld_para->fld_step_pa[FLD_STEP_FP][i],
> +		       fd->fd_base + FLD_PL_IN_BASE_ADDR_3_(i));
> +
> +		writel(fd->fld_para->fld_step_pa[FLD_STEP_LEAF][i],
> +		       fd->fd_base + FLD_SH_IN_BASE_ADDR_(i));
> +
> +		writel(fd->fld_para->fld_step_pa[FLD_STEP_KM02][i],
> +		       fd->fd_base + FLD_PL_IN_BASE_ADDR_0_(i));
> +
> +		writel(fd->fld_para->fld_step_pa[FLD_STEP_KM13][i],
> +		       fd->fd_base + FLD_PL_IN_BASE_ADDR_1_(i));
> +	}
> +
> +	writel(0x22222222, fd->fd_base + FLD_PL_IN_BASE_ADDR_0_0_7_MSB);
> +	writel(0x02222222, fd->fd_base + FLD_PL_IN_BASE_ADDR_0_8_15_MSB);
> +
> +	writel(0x22222222, fd->fd_base + FLD_PL_IN_BASE_ADDR_1_0_7_MSB);
> +	writel(0x02222222, fd->fd_base + FLD_PL_IN_BASE_ADDR_1_8_15_MSB);
> +
> +	writel(0x22222222, fd->fd_base + FLD_PL_IN_BASE_ADDR_2_0_7_MSB);
> +	writel(0x02222222, fd->fd_base + FLD_PL_IN_BASE_ADDR_2_8_15_MSB);
> +
> +	writel(0x22222222, fd->fd_base + FLD_PL_IN_BASE_ADDR_3_0_7_MSB);
> +	writel(0x02222222, fd->fd_base + FLD_PL_IN_BASE_ADDR_3_8_15_MSB);
> +
> +	writel(0x22222222, fd->fd_base + FLD_SH_IN_BASE_ADDR_0_7_MSB);
> +	writel(0x02222222, fd->fd_base + FLD_SH_IN_BASE_ADDR_8_15_MSB);
> +
> +	writel(0x02000000, fd->fd_base + FLD_BS_IN_BASE_ADDR_8_15_MSB);
> +
> +	writel(0x22222222, fd->fd_base + FLD_BASE_ADDR_FACE_0_7_MSB);
> +	writel(0x02222222, fd->fd_base + FLD_BASE_ADDR_FACE_8_14_MSB);
> +	writel(0x00000002, fd->fd_base + FLD_TR_OUT_BASE_ADDR_0_MSB);
> +	writel(0x00000002, fd->fd_base + FLD_PP_OUT_BASE_ADDR_0_MSB);
> +
> +	/* fld mode + trigger start */
> +	writel(0x11, fd->fd_base + AIE_START_REG);
> +}
> +
> +void aie_execute(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
> +{

This patch is a little big, and each sel_mode are independent function,
so break this patch to three patches.

1. Add MT8188 AIE driver (support FDMODE only)
2. Add ATTRIBUTEMODE function
3. Add FLDMODE function

> +	if (aie_cfg->sel_mode == FDMODE)
> +		aie_execute_face_detection(fd, aie_cfg);
> +	else if (aie_cfg->sel_mode == ATTRIBUTEMODE)
> +		aie_execute_attribute_detection(fd, aie_cfg);
> +	else if (aie_cfg->sel_mode == FLDMODE)
> +		aie_execute_fld_detection(fd, aie_cfg);
> +	else
> +		return;
> +}
> +
> +void aie_irqhandle(struct mtk_aie_dev *fd)
> +{
> +	writel(0x0, fd->fd_base + AIE_START_REG);
> +
> +	/* interrupt read clear */
> +	readl(fd->fd_base + AIE_INT_REG);
> +}
> +
> +static u16 aie_get_hi16(unsigned int value)
> +{
> +	return (value & 0xFFFF0000) >> 16;
> +}
> +
> +static u16 aie_get_lo16(unsigned int value)
> +{
> +	return value & 0xFFFF;
> +}
> +
> +static signed short aie_refine_s16_value(signed short value)
> +{
> +	s16 result;
> +
> +	if ((value & 0x200) >> 9)
> +		result = (value | 0xFE00);
> +	else
> +		result = value;
> +
> +	return result;
> +}
> +
> +/* return aie_cfg to user space */
> +void aie_get_fd_result(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
> +{
> +	u32 fd_result_hw, fd_result_1_hw, fd_total_num;
> +	struct aie_enq_info *tmp_aie_cfg;
> +	void *fd_pym_result[PYM_NUM];
> +	u32 fd_pyramid_num[PYM_NUM];
> +	signed short landmark;
> +	unsigned int *pto12;
> +	struct fd_ret *prst;
> +	unsigned int i, j;
> +
> +	aie_cfg->sel_mode = fd->base_para->sel_mode;
> +	aie_cfg->rotate_degree = fd->base_para->rotate_degree;
> +	aie_cfg->src_img_addr = fd->base_para->src_img_addr;
> +	aie_cfg->src_img_addr_uv = fd->base_para->src_img_addr_uv;
> +	aie_cfg->src_img_width = fd->base_para->img_rect.width;
> +	aie_cfg->src_img_height = fd->base_para->img_rect.height;
> +	aie_cfg->src_img_fmt = fd->base_para->src_img_fmt;
> +
> +	aie_cfg->irq_status = readl(fd->fd_base + AIE_INT_EN_REG);
> +
> +	fd_result_hw = fd->reg_cfg.hw_result;
> +	fd_result_1_hw = fd->reg_cfg.hw_result1;
> +	fd_total_num = fd_result_hw & 0xFFF;
> +	fd_pyramid_num[0] = (fd_result_hw & 0xFFF0000) >> 16;
> +	fd_pyramid_num[1] = fd_result_1_hw & 0xFFF;
> +	fd_pyramid_num[2] = (fd_result_1_hw & 0xFFF0000) >> 16;
> +
> +	if (fd_total_num == 0)
> +		goto nothing_out;
> +
> +	tmp_aie_cfg =  aie_cfg;
> +
> +	tmp_aie_cfg->fd_out.fd_total_num = fd_total_num;
> +	tmp_aie_cfg->fd_out.fd_pyramid0_num = fd_pyramid_num[0];
> +	tmp_aie_cfg->fd_out.fd_pyramid1_num = fd_pyramid_num[1];
> +	tmp_aie_cfg->fd_out.fd_pyramid2_num = fd_pyramid_num[2];
> +
> +	switch (tmp_aie_cfg->number_of_pyramid) {
> +	case 1:
> +		fd_pym_result[2] = fd->dma_para->fd_out_hw_va[RPN0_LOOP_NUM][0];
> +		break;
> +	case 2:
> +		fd_pym_result[1] = fd->dma_para->fd_out_hw_va[RPN0_LOOP_NUM][0];
> +		fd_pym_result[2] = fd->dma_para->fd_out_hw_va[RPN1_LOOP_NUM][0];
> +		break;
> +	case 3:
> +		fd_pym_result[0] = fd->dma_para->fd_out_hw_va[RPN0_LOOP_NUM][0];
> +		fd_pym_result[1] = fd->dma_para->fd_out_hw_va[RPN1_LOOP_NUM][0];
> +		fd_pym_result[2] = fd->dma_para->fd_out_hw_va[RPN2_LOOP_NUM][0];
> +		break;
> +	default:
> +		dev_err(fd->dev, "Wrong number_of_pyramid\n");
> +		goto nothing_out;
> +	}
> +
> +	for (i = 0; i < 3; i++) {
> +		for (j = 0; j < fd_pyramid_num[i]; j++) {
> +			if (i == 0)
> +				prst = &tmp_aie_cfg->fd_out.pyramid0_result;
> +			else if (i == 1)
> +				prst = &tmp_aie_cfg->fd_out.pyramid1_result;
> +			else if (i == 2)
> +				prst = &tmp_aie_cfg->fd_out.pyramid2_result;
> +
> +			pto12 = (unsigned int *)fd_pym_result[i] + 12 * j;
> +
> +			prst->anchor_x0[j] = aie_get_lo16(*(pto12 + 0));
> +			prst->anchor_y0[j] = aie_get_hi16(*(pto12 + 0));
> +			prst->anchor_x1[j] = aie_get_lo16(*(pto12 + 1));
> +			prst->anchor_y1[j] = aie_get_hi16(*(pto12 + 1));
> +
> +			if (prst->anchor_x1[j] == 0 ||
> +			    prst->anchor_y1[j] == 0) {
> +				dev_err(fd->dev,
> +					"wrong coordinate: i=%d j=%d M:%d %d %d %d\n", i, j,
> +					prst->anchor_x0[j],
> +					prst->anchor_x1[j],
> +					prst->anchor_y0[j],
> +					prst->anchor_y1[j]
> +				);
> +				goto nothing_out;
> +			}
> +
> +			/* ROP result at 1st run */
> +			landmark = (*(pto12 + 2) & 0x3FF);
> +			prst->rop_landmark_score0[j] = aie_refine_s16_value(landmark);
> +			landmark = ((*(pto12 + 2) & 0xFFC00) >> 10);
> +			prst->rop_landmark_score1[j] = aie_refine_s16_value(landmark);
> +			landmark = ((*(pto12 + 2) & 0x3FF00000) >> 20);
> +			prst->rop_landmark_score2[j] = aie_refine_s16_value(landmark);
> +
> +			prst->anchor_score[j] = aie_refine_s16_value(*(pto12 + 9) & 0x3FF);
> +
> +			/* RIP result at 1st run */
> +			landmark = ((*(pto12 + 9) & 0xFFC00) >> 10);
> +			prst->rip_landmark_score0[j] = aie_refine_s16_value(landmark);
> +			landmark = ((*(pto12 + 9) & 0x3FF00000) >> 20);
> +			prst->rip_landmark_score1[j] = aie_refine_s16_value(landmark);
> +			landmark = ((*(pto12 + 9) & 0xC0000000) >> 30) |
> +				   ((*(pto12 + 10) & 0xFF) << 2);
> +			prst->rip_landmark_score2[j] = aie_refine_s16_value(landmark);
> +			landmark = ((*(pto12 + 10) & 0x3FF00) >> 8);
> +			prst->rip_landmark_score3[j] = aie_refine_s16_value(landmark);
> +			landmark = ((*(pto12 + 10) & 0xFFC0000) >> 18);
> +			prst->rip_landmark_score4[j] = aie_refine_s16_value(landmark);
> +			landmark = ((*(pto12 + 10) & 0xF0000000) >> 28) |
> +				   ((*(pto12 + 11) & 0x3F) << 4);
> +			prst->rip_landmark_score5[j] = aie_refine_s16_value(landmark);
> +			landmark = ((*(pto12 + 11) & 0xFFC0) >> 6);
> +			prst->rip_landmark_score6[j] = aie_refine_s16_value(landmark);
> +			prst->face_result_index[j] = ((*(pto12 + 11) & 0xFFF0000) >> 16);
> +			prst->anchor_index[j] = ((*(pto12 + 11) & 0x70000000) >> 28);
> +
> +			prst->fd_partial_result = fd_pyramid_num[i];
> +		}
> +	}
> +	return;
> +nothing_out:
> +	// Ensure that user mode does not receive an inappropriate result structure
> +	memset(&aie_cfg->fd_out, 0, sizeof(struct fd_result));
> +}
> +
> +void aie_get_attr_result(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
> +{
> +	u32 *attr_race_result, *attr_gender_result;
> +	u32 *attr_age_result, *attr_is_indian_result;
> +
> +	aie_cfg->sel_mode = fd->attr_para->sel_mode[fd->attr_para->r_idx];
> +	aie_cfg->rotate_degree = fd->attr_para->rotate_degree[fd->attr_para->r_idx];
> +	aie_cfg->src_img_addr =
> +		fd->attr_para->src_img_addr[fd->attr_para->r_idx];
> +	aie_cfg->src_img_addr_uv =
> +		fd->attr_para->src_img_addr_uv[fd->attr_para->r_idx];
> +	aie_cfg->src_img_width = fd->attr_para->img_width[fd->attr_para->r_idx];
> +	aie_cfg->src_img_height =
> +		fd->attr_para->img_height[fd->attr_para->r_idx];
> +	aie_cfg->src_img_fmt = fd->attr_para->src_img_fmt[fd->attr_para->r_idx];
> +
> +	aie_cfg->irq_status = readl(fd->fd_base + AIE_INT_EN_REG);
> +
> +	/* 64 feature * 32 bytes */
> +	attr_age_result =
> +		(u32 *)fd->dma_para->age_out_hw_va[fd->attr_para->r_idx];
> +	attr_gender_result =
> +		(u32 *)fd->dma_para->gender_out_hw_va[fd->attr_para->r_idx];
> +	attr_is_indian_result =
> +		(u32 *)fd->dma_para->is_indian_out_hw_va[fd->attr_para->r_idx];
> +	attr_race_result =
> +		(u32 *)fd->dma_para->race_out_hw_va[fd->attr_para->r_idx];
> +
> +	aie_cfg->attr_out.merged_age_ret[0] = aie_get_lo16(*attr_age_result);
> +	aie_cfg->attr_out.merged_age_ret[1] = aie_get_hi16(*attr_age_result);
> +
> +	aie_cfg->attr_out.merged_gender_ret[0] = aie_get_lo16(*attr_gender_result);
> +	aie_cfg->attr_out.merged_gender_ret[1] = aie_get_hi16(*attr_gender_result);
> +
> +	aie_cfg->attr_out.merged_is_indian_ret[0] = aie_get_lo16(*attr_is_indian_result);
> +	aie_cfg->attr_out.merged_is_indian_ret[1] = aie_get_hi16(*attr_is_indian_result);
> +
> +	aie_cfg->attr_out.merged_race_ret[0] = aie_get_lo16(*attr_race_result);
> +	aie_cfg->attr_out.merged_race_ret[1] = aie_get_hi16(*attr_race_result);
> +	aie_cfg->attr_out.merged_race_ret[2] = aie_get_lo16(*(attr_race_result + 1));
> +
> +	fd->attr_para->r_idx = (fd->attr_para->r_idx + 1) % MAX_ENQUE_FRAME_NUM;
> +}
> +
> +void aie_get_fld_result(struct mtk_aie_dev *fd, struct aie_enq_info *aie_cfg)
> +{
> +	u8 fld_rlt[FLD_OUTPUT_X_SIZE][FLD_OUTPUT_SIZE];
> +	u16 *out_parsing;
> +	int i, j;
> +
> +	aie_cfg->irq_status = readl(fd->fd_base + AIE_INT_EN_REG);
> +
> +	memcpy(fld_rlt, fd->fld_para->fld_output_va[0], sizeof(fld_rlt));
> +
> +	for (j = 0; j < aie_cfg->fld_face_num; j++) {
> +		out_parsing = (unsigned short *)&fld_rlt[j][0];
> +		for (i = 0; i < FLD_CUR_LANDMARK; i++) {
> +			aie_cfg->fld_out[j].fld_landmark[i].x = *out_parsing;
> +			aie_cfg->fld_out[j].fld_landmark[i].y = *(out_parsing + 1);
> +
> +			if (i % 2)
> +				out_parsing = out_parsing + 6;
> +			else
> +				out_parsing = out_parsing + 2;
> +		}
> +		out_parsing = (unsigned short *)&fld_rlt[j][0];
> +		if (FLD_CUR_LANDMARK % 2)
> +			out_parsing = out_parsing + ((FLD_CUR_LANDMARK + 1) / 2) * 8;
> +		else
> +			out_parsing = out_parsing + (FLD_CUR_LANDMARK / 2) * 8;
> +
> +		aie_cfg->fld_out[j].fld_out_rop = *out_parsing;
> +		aie_cfg->fld_out[j].fld_out_rip = *(out_parsing + 1);
> +		aie_cfg->fld_out[j].confidence = *(out_parsing + 2);
> +		aie_cfg->fld_out[j].blinkscore = *(out_parsing + 3);
> +	}
> +}





[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux