Re: [PATCH v4 2/2] media: cedrus: Add H264 decoding support

Jernej Škrabec <jernej.skrabec@xxxxxxxxx> · Thu, 21 Feb 2019 19:21:47 +0100

Hi,

Dne sreda, 20. februar 2019 ob 18:50:54 CET je Jernej Škrabec napisal(a):
> Hi!
> 
> I really wanted to do another review on previous series but got distracted
> by analyzing one particulary troublesome H264 sample. It still doesn't work
> correctly, so I would ask you if you can test it with your stack (it might
> be userspace issue):
> 
> http://jernej.libreelec.tv/videos/problematic/test.mkv
> 
> Please take a look at my comments below.
> 
> Dne sreda, 20. februar 2019 ob 15:17:34 CET je Maxime Ripard napisal(a):
> > Introduce some basic H264 decoding support in cedrus. So far, only the
> > baseline profile videos have been tested, and some more advanced features
> > used in higher profiles are not even implemented.
> 
> What is not yet implemented? Multi slice frame decoding, interlaced frames
> and decoding frames with width > 2048. Anything else?
> 
> > Signed-off-by: Maxime Ripard <maxime.ripard@xxxxxxxxxxx>
> > ---
> > 
> >  drivers/staging/media/sunxi/cedrus/Makefile       |   3 +-
> >  drivers/staging/media/sunxi/cedrus/cedrus.c       |  30 +-
> >  drivers/staging/media/sunxi/cedrus/cedrus.h       |  38 +-
> >  drivers/staging/media/sunxi/cedrus/cedrus_dec.c   |  13 +-
> >  drivers/staging/media/sunxi/cedrus/cedrus_h264.c  | 584 +++++++++++++++-
> >  drivers/staging/media/sunxi/cedrus/cedrus_hw.c    |   4 +-
> >  drivers/staging/media/sunxi/cedrus/cedrus_regs.h  |  91 ++-
> >  drivers/staging/media/sunxi/cedrus/cedrus_video.c |   9 +-
> >  8 files changed, 770 insertions(+), 2 deletions(-)
> >  create mode 100644 drivers/staging/media/sunxi/cedrus/cedrus_h264.c
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/Makefile
> > b/drivers/staging/media/sunxi/cedrus/Makefile index
> > e9dc68b7bcb6..aaf141fc58b6 100644
> > --- a/drivers/staging/media/sunxi/cedrus/Makefile
> > +++ b/drivers/staging/media/sunxi/cedrus/Makefile
> > @@ -1,3 +1,4 @@
> > 
> >  obj-$(CONFIG_VIDEO_SUNXI_CEDRUS) += sunxi-cedrus.o
> > 
> > -sunxi-cedrus-y = cedrus.o cedrus_video.o cedrus_hw.o cedrus_dec.o
> > cedrus_mpeg2.o +sunxi-cedrus-y = cedrus.o cedrus_video.o cedrus_hw.o
> > cedrus_dec.o \ +		 cedrus_mpeg2.o cedrus_h264.o
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus.c index
> > ff11cbeba205..c1607142d998 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> > @@ -40,6 +40,35 @@ static const struct cedrus_control cedrus_controls[] =
> > {
> > 
> >  		.codec		= CEDRUS_CODEC_MPEG2,
> >  		.required	= false,
> >  	
> >  	},
> > 
> > +	{
> > +		.id		=
> 
> V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS,
> 
> > +		.elem_size	= sizeof(struct
> 
> v4l2_ctrl_h264_decode_param),
> 
> > +		.codec		= CEDRUS_CODEC_H264,
> > +		.required	= true,
> > +	},
> > +	{
> > +		.id		=
> 
> V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS,
> 
> > +		.elem_size	= sizeof(struct 
v4l2_ctrl_h264_slice_param),
> > +		.codec		= CEDRUS_CODEC_H264,
> > +		.required	= true,
> > +	},
> > +	{
> > +		.id		= V4L2_CID_MPEG_VIDEO_H264_SPS,
> > +		.elem_size	= sizeof(struct v4l2_ctrl_h264_sps),
> > +		.codec		= CEDRUS_CODEC_H264,
> > +		.required	= true,
> > +	},
> > +	{
> > +		.id		= V4L2_CID_MPEG_VIDEO_H264_PPS,
> > +		.elem_size	= sizeof(struct v4l2_ctrl_h264_pps),
> > +		.codec		= CEDRUS_CODEC_H264,
> > +		.required	= true,
> > +	},
> > +	{
> > +		.id		=
> 
> V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX,
> 
> > +		.elem_size	= sizeof(struct
> 
> v4l2_ctrl_h264_scaling_matrix),
> 
> > +		.codec		= CEDRUS_CODEC_H264,
> > +	},
> > 
> >  };
> >  
> >  #define CEDRUS_CONTROLS_COUNT	ARRAY_SIZE(cedrus_controls)
> > 
> > @@ -278,6 +307,7 @@ static int cedrus_probe(struct platform_device *pdev)
> > 
> >  	}
> >  	
> >  	dev->dec_ops[CEDRUS_CODEC_MPEG2] = &cedrus_dec_ops_mpeg2;
> > 
> > +	dev->dec_ops[CEDRUS_CODEC_H264] = &cedrus_dec_ops_h264;
> > 
> >  	mutex_init(&dev->dev_mutex);
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > 4aedd24a9848..8c64f9a27e9d 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > @@ -30,7 +30,7 @@
> > 
> >  enum cedrus_codec {
> >  
> >  	CEDRUS_CODEC_MPEG2,
> > 
> > -
> > +	CEDRUS_CODEC_H264,
> > 
> >  	CEDRUS_CODEC_LAST,
> >  
> >  };
> > 
> > @@ -40,6 +40,12 @@ enum cedrus_irq_status {
> > 
> >  	CEDRUS_IRQ_OK,
> >  
> >  };
> > 
> > +enum cedrus_h264_pic_type {
> > +	CEDRUS_H264_PIC_TYPE_FRAME	= 0,
> > +	CEDRUS_H264_PIC_TYPE_FIELD,
> > +	CEDRUS_H264_PIC_TYPE_MBAFF,
> > +};
> > +
> > 
> >  struct cedrus_control {
> >  
> >  	u32			id;
> >  	u32			elem_size;
> > 
> > @@ -47,6 +53,14 @@ struct cedrus_control {
> > 
> >  	unsigned char		required:1;
> >  
> >  };
> > 
> > +struct cedrus_h264_run {
> > +	const struct v4l2_ctrl_h264_decode_param	*decode_param;
> > +	const struct v4l2_ctrl_h264_pps			*pps;
> > +	const struct v4l2_ctrl_h264_scaling_matrix	
*scaling_matrix;
> > +	const struct v4l2_ctrl_h264_slice_param		*slice_param;
> > +	const struct v4l2_ctrl_h264_sps			*sps;
> > +};
> > +
> > 
> >  struct cedrus_mpeg2_run {
> >  
> >  	const struct v4l2_ctrl_mpeg2_slice_params	*slice_params;
> >  	const struct v4l2_ctrl_mpeg2_quantization	*quantization;
> > 
> > @@ -57,12 +71,20 @@ struct cedrus_run {
> > 
> >  	struct vb2_v4l2_buffer	*dst;
> >  	
> >  	union {
> > 
> > +		struct cedrus_h264_run	h264;
> > 
> >  		struct cedrus_mpeg2_run	mpeg2;
> >  	
> >  	};
> >  
> >  };
> >  
> >  struct cedrus_buffer {
> >  
> >  	struct v4l2_m2m_buffer          m2m_buf;
> > 
> > +
> > +	union {
> > +		struct {
> > +			unsigned int			position;
> > +			enum cedrus_h264_pic_type	pic_type;
> > +		} h264;
> > +	} codec;
> > 
> >  };
> >  
> >  struct cedrus_ctx {
> > 
> > @@ -77,6 +99,19 @@ struct cedrus_ctx {
> > 
> >  	struct v4l2_ctrl		**ctrls;
> >  	
> >  	struct vb2_buffer		*dst_bufs[VIDEO_MAX_FRAME];
> > 
> > +
> > +	union {
> > +		struct {
> > +			void		*mv_col_buf;
> > +			dma_addr_t	mv_col_buf_dma;
> > +			ssize_t		mv_col_buf_field_size;
> > +			ssize_t		mv_col_buf_size;
> > +			void		*pic_info_buf;
> > +			dma_addr_t	pic_info_buf_dma;
> > +			void		*neighbor_info_buf;
> > +			dma_addr_t	neighbor_info_buf_dma;
> > +		} h264;
> > +	} codec;
> > 
> >  };
> >  
> >  struct cedrus_dec_ops {
> > 
> > @@ -118,6 +153,7 @@ struct cedrus_dev {
> > 
> >  };
> >  
> >  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> > 
> > +extern struct cedrus_dec_ops cedrus_dec_ops_h264;
> > 
> >  static inline void cedrus_write(struct cedrus_dev *dev, u32 reg, u32 val)
> >  {
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c index
> > 4d6d602cdde6..a290ae1b8f4d 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > @@ -46,6 +46,19 @@ void cedrus_device_run(void *priv)
> > 
> >  			V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION);
> >  		
> >  		break;
> > 
> > +	case V4L2_PIX_FMT_H264_SLICE:
> > +		run.h264.decode_param = cedrus_find_control_data(ctx,
> > +			V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS);
> > +		run.h264.pps = cedrus_find_control_data(ctx,
> > +			V4L2_CID_MPEG_VIDEO_H264_PPS);
> > +		run.h264.scaling_matrix = cedrus_find_control_data(ctx,
> > +			V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX);
> > +		run.h264.slice_param = cedrus_find_control_data(ctx,
> > +			V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS);
> > +		run.h264.sps = cedrus_find_control_data(ctx,
> > +			V4L2_CID_MPEG_VIDEO_H264_SPS);
> > +		break;
> > +
> > 
> >  	default:
> >  		break;
> >  	
> >  	}
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c new file mode 100644
> > index 000000000000..51e5f57120a2
> > --- /dev/null
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c
> > @@ -0,0 +1,584 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Cedrus VPU driver
> > + *
> > + * Copyright (c) 2013 Jens Kuske <jenskuske@xxxxxxxxx>
> > + * Copyright (c) 2018 Bootlin
> > + */
> > +
> > +#include <linux/types.h>
> > +
> > +#include <media/videobuf2-dma-contig.h>
> > +
> > +#include "cedrus.h"
> > +#include "cedrus_hw.h"
> > +#include "cedrus_regs.h"
> > +
> > +enum cedrus_h264_sram_off {
> > +	CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE	= 0x000,
> > +	CEDRUS_SRAM_H264_FRAMEBUFFER_LIST	= 0x100,
> > +	CEDRUS_SRAM_H264_REF_LIST_0		= 0x190,
> > +	CEDRUS_SRAM_H264_REF_LIST_1		= 0x199,
> > +	CEDRUS_SRAM_H264_SCALING_LIST_8x8_0	= 0x200,
> > +	CEDRUS_SRAM_H264_SCALING_LIST_8x8_1	= 0x210,
> > +	CEDRUS_SRAM_H264_SCALING_LIST_4x4	= 0x220,
> > +};
> > +
> > +struct cedrus_h264_sram_ref_pic {
> > +	__le32	top_field_order_cnt;
> > +	__le32	bottom_field_order_cnt;
> > +	__le32	frame_info;
> > +	__le32	luma_ptr;
> > +	__le32	chroma_ptr;
> > +	__le32	mv_col_top_ptr;
> > +	__le32	mv_col_bot_ptr;
> > +	__le32	reserved;
> > +} __packed;
> > +
> > +#define CEDRUS_H264_FRAME_NUM		18
> > +
> > +#define CEDRUS_NEIGHBOR_INFO_BUF_SIZE	(16 * SZ_1K)
> > +#define CEDRUS_PIC_INFO_BUF_SIZE	(128 * SZ_1K)
> > +
> > +static void cedrus_h264_write_sram(struct cedrus_dev *dev,
> > +				   enum cedrus_h264_sram_off off,
> > +				   const void *data, size_t len)
> > +{
> > +	const u32 *buffer = data;
> > +	size_t count = DIV_ROUND_UP(len, 4);
> > +
> > +	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET, off << 2);
> > +
> > +	do {
> > +		cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
> > +	} while (--count);
> 
> Above loop will still write one word for count = 0. I propose following:
> 
> while (count--)
> 	cedrus_write(dev, VE_AVC_SRAM_PORT_DATA, *buffer++);
> 
> > +}
> > +
> > +static dma_addr_t cedrus_h264_mv_col_buf_addr(struct cedrus_ctx *ctx,
> > +					      unsigned int
> 
> position,
> 
> > +					      unsigned int
> 
> field)
> 
> > +{
> > +	dma_addr_t addr = ctx->codec.h264.mv_col_buf_dma;
> > +
> > +	/* Adjust for the position */
> > +	addr += position * ctx->codec.h264.mv_col_buf_field_size * 2;
> > +
> > +	/* Adjust for the field */
> > +	addr += field * ctx->codec.h264.mv_col_buf_field_size;
> > +
> > +	return addr;
> > +}
> > +
> > +static void cedrus_fill_ref_pic(struct cedrus_ctx *ctx,
> > +				struct cedrus_buffer *buf,
> > +				unsigned int top_field_order_cnt,
> > +				unsigned int
> 
> bottom_field_order_cnt,
> 
> > +				struct cedrus_h264_sram_ref_pic
> 
> *pic)
> 
> > +{
> > +	struct vb2_buffer *vbuf = &buf->m2m_buf.vb.vb2_buf;
> > +	unsigned int position = buf->codec.h264.position;
> > +
> > +	pic->top_field_order_cnt = top_field_order_cnt;
> > +	pic->bottom_field_order_cnt = bottom_field_order_cnt;
> > +	pic->frame_info = buf->codec.h264.pic_type << 8;
> > +
> > +	pic->luma_ptr = cedrus_buf_addr(vbuf, &ctx->dst_fmt, 0);
> > +	pic->chroma_ptr = cedrus_buf_addr(vbuf, &ctx->dst_fmt, 1);
> > +	pic->mv_col_top_ptr = cedrus_h264_mv_col_buf_addr(ctx, position,
> 
> 0);
> 
> > +	pic->mv_col_bot_ptr = cedrus_h264_mv_col_buf_addr(ctx, position,
> 
> 1);
> 
> > +}
> > +
> > +static void cedrus_write_frame_list(struct cedrus_ctx *ctx,
> > +				    struct cedrus_run *run)
> > +{
> > +	struct cedrus_h264_sram_ref_pic pic_list[CEDRUS_H264_FRAME_NUM];
> > +	const struct v4l2_ctrl_h264_decode_param *dec_param =
> > run->h264.decode_param; +	const struct v4l2_ctrl_h264_slice_param
> 
> *slice =
> 
> > run->h264.slice_param; +	const struct v4l2_ctrl_h264_sps *sps =
> > run->h264.sps;
> > +	const struct vb2_buffer *dst_buf = &run->dst->vb2_buf;
> > +	struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
> > +	struct cedrus_buffer *output_buf;
> > +	struct cedrus_dev *dev = ctx->dev;
> > +	unsigned long used_dpbs = 0;
> > +	unsigned int position;
> > +	unsigned int output = 0;
> > +	unsigned int i;
> > +
> > +	memset(pic_list, 0, sizeof(pic_list));
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
> > +		const struct v4l2_h264_dpb_entry *dpb = &dec_param-
> >
> >dpb[i];
> >
> > +		struct cedrus_buffer *cedrus_buf;
> > +		int buf_idx;
> > +
> > +		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
> > +			continue;
> > +
> > +		buf_idx = vb2_find_timestamp(cap_q, dpb->timestamp, 0);
> > +		if (buf_idx < 0)
> > +			continue;
> > +
> > +		cedrus_buf = vb2_to_cedrus_buffer(ctx-
> >
> >dst_bufs[buf_idx]);
> >
> > +		position = cedrus_buf->codec.h264.position;
> > +		used_dpbs |= BIT(position);
> > +
> > +		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
> > +			continue;
> > +
> > +		cedrus_fill_ref_pic(ctx, cedrus_buf,
> > +				    dpb->top_field_order_cnt,
> > +				    dpb->bottom_field_order_cnt,
> > +				    &pic_list[position]);
> > +
> > +		output = max(position, output);
> > +	}
> > +
> > +	position = find_next_zero_bit(&used_dpbs, CEDRUS_H264_FRAME_NUM,
> > +				      output);
> > +	if (position >= CEDRUS_H264_FRAME_NUM)
> > +		position = find_first_zero_bit(&used_dpbs,
> 
> CEDRUS_H264_FRAME_NUM);
> 
> I guess you didn't try any interlaced videos? Sometimes it happens that
> buffer is reference and output at the same time. In such cases, above code
> would make two entries, which doesn't work based on Kwiboo's and my
> experiments.
> 
> I guess decoding interlaced videos is out of scope at this time?
> 
> > +
> > +	output_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
> > +	output_buf->codec.h264.position = position;
> > +
> > +	if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
> > +		output_buf->codec.h264.pic_type =
> 
> CEDRUS_H264_PIC_TYPE_FIELD;
> 
> > +	else if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
> > +		output_buf->codec.h264.pic_type =
> 
> CEDRUS_H264_PIC_TYPE_MBAFF;
> 
> > +	else
> > +		output_buf->codec.h264.pic_type =
> 
> CEDRUS_H264_PIC_TYPE_FRAME;
> 
> > +
> > +	cedrus_fill_ref_pic(ctx, output_buf,
> > +			    dec_param->top_field_order_cnt,
> > +			    dec_param->bottom_field_order_cnt,
> > +			    &pic_list[position]);
> > +
> > +	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_FRAMEBUFFER_LIST,
> > +			       pic_list, sizeof(pic_list));
> > +
> > +	cedrus_write(dev, VE_H264_OUTPUT_FRAME_IDX, position);
> > +}
> > +
> > +#define CEDRUS_MAX_REF_IDX	32
> > +
> > +static void _cedrus_write_ref_list(struct cedrus_ctx *ctx,
> > +				   struct cedrus_run *run,
> > +				   const u8 *ref_list, u8 
num_ref,
> > +				   enum cedrus_h264_sram_off sram)
> > +{
> > +	const struct v4l2_ctrl_h264_decode_param *decode = run-
> >
> >h264.decode_param;
> >
> > +	struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
> > +	const struct vb2_buffer *dst_buf = &run->dst->vb2_buf;
> > +	struct cedrus_dev *dev = ctx->dev;
> > +	u8 sram_array[CEDRUS_MAX_REF_IDX];
> > +	unsigned int i;
> > +	size_t size;
> > +
> > +	memset(sram_array, 0, sizeof(sram_array));
> > +
> > +	for (i = 0; i < num_ref; i++) {
> > +		const struct v4l2_h264_dpb_entry *dpb;
> > +		const struct cedrus_buffer *cedrus_buf;
> > +		const struct vb2_v4l2_buffer *ref_buf;
> > +		unsigned int position;
> > +		int buf_idx;
> > +		u8 dpb_idx;
> > +
> > +		dpb_idx = ref_list[i];
> > +		dpb = &decode->dpb[dpb_idx];
> > +
> > +		if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
> > +			continue;
> > +
> > +		buf_idx = vb2_find_timestamp(cap_q, dpb->timestamp, 0);
> > +		if (buf_idx < 0)
> > +			continue;
> > +
> > +		ref_buf = to_vb2_v4l2_buffer(ctx->dst_bufs[buf_idx]);
> > +		cedrus_buf = vb2_v4l2_to_cedrus_buffer(ref_buf);
> > +		position = cedrus_buf->codec.h264.position;
> > +
> > +		sram_array[i] |= position << 1;
> > +		if (ref_buf->field == V4L2_FIELD_BOTTOM)
> 
> I'm still not convinced that checking buffer field is appropriate solution
> here. IMO this bit defines top or bottom reference and same buffer could be
> used for both.
> 
> But I guess this belongs for follow up patch which will fix decoding
> interlaced videos.
> 
> > +			sram_array[i] |= BIT(0);
> > +	}
> > +
> > +	size = min_t(size_t, ALIGN(num_ref, 4), sizeof(sram_array));
> > +	cedrus_h264_write_sram(dev, sram, &sram_array, size);
> > +}
> > +
> > +static void cedrus_write_ref_list0(struct cedrus_ctx *ctx,
> > +				   struct cedrus_run *run)
> > +{
> > +	const struct v4l2_ctrl_h264_slice_param *slice = run-
> >
> >h264.slice_param;
> >
> > +
> > +	_cedrus_write_ref_list(ctx, run,
> > +			       slice->ref_pic_list0,
> > +			       slice->num_ref_idx_l0_active_minus1 +
> 
> 1,
> 
> > +			       CEDRUS_SRAM_H264_REF_LIST_0);
> > +}
> > +
> > +static void cedrus_write_ref_list1(struct cedrus_ctx *ctx,
> > +				   struct cedrus_run *run)
> > +{
> > +	const struct v4l2_ctrl_h264_slice_param *slice = run-
> >
> >h264.slice_param;
> >
> > +
> > +	_cedrus_write_ref_list(ctx, run,
> > +			       slice->ref_pic_list1,
> > +			       slice->num_ref_idx_l1_active_minus1 +
> 
> 1,
> 
> > +			       CEDRUS_SRAM_H264_REF_LIST_1);
> > +}
> > +
> > +static void cedrus_write_scaling_lists(struct cedrus_ctx *ctx,
> > +				       struct cedrus_run *run)
> > +{
> > +	const struct v4l2_ctrl_h264_scaling_matrix *scaling =
> > +		run->h264.scaling_matrix;
> > +	struct cedrus_dev *dev = ctx->dev;
> > +
> > +	if (!scaling)
> > +		return;
> > +
> > +	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_0,
> > +			       scaling->scaling_list_8x8[0],
> > +			       sizeof(scaling-
>scaling_list_8x8[0]));
> > +
> > +	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_8x8_1,
> > +			       scaling->scaling_list_8x8[1],
> > +			       sizeof(scaling-
>scaling_list_8x8[1]));
> 
> Index above should be 3. IIRC 1 and 3 are used by 4:2:0 chroma subsampling,
> but currently I'm unable to find reference to that in standard.

I actually meant index 0 and 3. While I still can't pinpoint exact chapter in 
specs, I found comment in ffmpeg what each index represents:

0 - Intra, Y
1 - Intra, Cr
2 - Intra, Cb
3 - Inter, Y
4 - Inter, Cr
5 - Inter, Cb

So 0 and 3 makes sense.

> 
> > +
> > +	cedrus_h264_write_sram(dev, CEDRUS_SRAM_H264_SCALING_LIST_4x4,
> > +			       scaling->scaling_list_4x4,
> > +			       sizeof(scaling->scaling_list_4x4));
> > +}
> > +
> > +static void cedrus_write_pred_weight_table(struct cedrus_ctx *ctx,
> > +					   struct cedrus_run
> 
> *run)
> 
> > +{
> > +	const struct v4l2_ctrl_h264_slice_param *slice =
> > +		run->h264.slice_param;
> > +	const struct v4l2_h264_pred_weight_table *pred_weight =
> > +		&slice->pred_weight_table;
> > +	struct cedrus_dev *dev = ctx->dev;
> > +	int i, j, k;
> > +
> > +	cedrus_write(dev, VE_H264_SHS_WP,
> > +		     ((pred_weight->chroma_log2_weight_denom & 0xf) <<
> 
> 4) |
> 
> > +		     ((pred_weight->luma_log2_weight_denom & 0xf) <<
> 
> 0));
> 
> Denominators are only in range of 0-7, so mask should be 0x7. CedarX code
> also specify those two fields 3 bits wide.
> 
> > +
> > +	cedrus_write(dev, VE_AVC_SRAM_PORT_OFFSET,
> > +		     CEDRUS_SRAM_H264_PRED_WEIGHT_TABLE << 2);
> > +
> > +	for (i = 0; i < ARRAY_SIZE(pred_weight->weight_factors); i++) {
> > +		const struct v4l2_h264_weight_factors *factors =
> > +			&pred_weight->weight_factors[i];
> > +
> > +		for (j = 0; j < ARRAY_SIZE(factors->luma_weight); j++) 
{
> > +			u32 val;
> > +
> > +			val = ((factors->luma_offset[j] & 0x1ff) << 
16)
> > 
> > +				(factors->luma_weight[j] & 0x1ff);
> > +			cedrus_write(dev, VE_AVC_SRAM_PORT_DATA,
> 
> val);
> 
> You should cast offset varible to wider type. Currently some videos which
> use prediction weight table don't work for me, unless offset is casted to
> u32 first. Shifting 8 bit variable for 16 places gives you 0 every time.
> 
> Luma offset and weight are defined as s8, so having wider mask doesn't
> really make sense. However, I think weight should be s16 anyway, because
> standard says that it's value could be 2^denominator for default value or
> in range -128..127. Worst case would be 2^7 = 128 and -128. To cover both
> values you need at least 9 bits.
> 
> I guess there is a way to detect when default values need to be written (if
> at all) and that can be handled separately. But I don't see any reason why
> bigger type can't be used for just in case. Offset being s8 is fine and you
> can drop mask for it.
> 
> Everything applies for chroma below too.
> 
> > +		}
> > +
> > +		for (j = 0; j < ARRAY_SIZE(factors->chroma_weight); j+
+)
> 
> {
> 
> > +			for (k = 0; k < ARRAY_SIZE(factors-
> >
> >chroma_weight[0]); k++) {
> >
> > +				u32 val;
> > +
> > +				val = ((factors->chroma_offset[j]
> 
> [k] & 0x1ff) << 16) |
> 
> > +					(factors-
> >
> >chroma_weight[j][k] & 0x1ff);
> >
> > +				cedrus_write(dev,
> 
> VE_AVC_SRAM_PORT_DATA, val);
> 
> > +			}
> > +		}
> > +	}
> > +}
> > +
> > +static void cedrus_set_params(struct cedrus_ctx *ctx,
> > +			      struct cedrus_run *run)
> > +{
> > +	const struct v4l2_ctrl_h264_scaling_matrix *scaling =
> > run->h264.scaling_matrix; +	const struct v4l2_ctrl_h264_decode_param
> > *decode = run->h264.decode_param; +	const struct
> 
> v4l2_ctrl_h264_slice_param
> 
> > *slice = run->h264.slice_param; +	const struct v4l2_ctrl_h264_pps *pps =
> > run->h264.pps;
> > +	const struct v4l2_ctrl_h264_sps *sps = run->h264.sps;
> > +	struct vb2_buffer *src_buf = &run->src->vb2_buf;
> > +	struct cedrus_dev *dev = ctx->dev;
> > +	dma_addr_t src_buf_addr;
> > +	u32 offset = slice->header_bit_size;
> > +	u32 len = (slice->size * 8) - offset;
> > +	u32 reg;
> > +
> > +	cedrus_write(dev, VE_H264_VLD_LEN, len);
> > +	cedrus_write(dev, VE_H264_VLD_OFFSET, offset);
> > +
> > +	src_buf_addr = vb2_dma_contig_plane_dma_addr(src_buf, 0);
> > +	cedrus_write(dev, VE_H264_VLD_END,
> > +		     src_buf_addr + vb2_get_plane_payload(src_buf, 0));
> > +	cedrus_write(dev, VE_H264_VLD_ADDR,
> > +		     VE_H264_VLD_ADDR_VAL(src_buf_addr) |
> > +		     VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID |
> > +		     VE_H264_VLD_ADDR_LAST);
> > +
> > +	/*
> > +	 * FIXME: Since the bitstream parsing is done in software, and
> > +	 * in userspace, this shouldn't be needed anymore. But it
> > +	 * turns out that removing it breaks the decoding process,
> > +	 * without any clear indication why.
> > +	 */
> > +	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
> > +		     VE_H264_TRIGGER_TYPE_INIT_SWDEC);
> > +
> > +	if (((pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) &&
> > +	     (slice->slice_type == V4L2_H264_SLICE_TYPE_P ||
> > +	      slice->slice_type == V4L2_H264_SLICE_TYPE_SP)) ||
> > +	    (pps->weighted_bipred_idc == 1 &&
> > +	     slice->slice_type == V4L2_H264_SLICE_TYPE_B))
> > +		cedrus_write_pred_weight_table(ctx, run);
> > +
> > +	if ((slice->slice_type == V4L2_H264_SLICE_TYPE_P) ||
> > +	    (slice->slice_type == V4L2_H264_SLICE_TYPE_SP) ||
> > +	    (slice->slice_type == V4L2_H264_SLICE_TYPE_B))
> > +		cedrus_write_ref_list0(ctx, run);
> > +
> > +	if (slice->slice_type == V4L2_H264_SLICE_TYPE_B)
> > +		cedrus_write_ref_list1(ctx, run);
> > +
> > +	// picture parameters
> > +	reg = 0;
> > +	/*
> > +	 * FIXME: the kernel headers are allowing the default value to
> > +	 * be passed, but the libva doesn't give us that.
> > +	 */
> > +	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 10;
> > +	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 5;
> > +	reg |= (pps->weighted_bipred_idc & 0x3) << 2;
> > +	if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
> > +		reg |= VE_H264_PPS_ENTROPY_CODING_MODE;
> > +	if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
> > +		reg |= VE_H264_PPS_WEIGHTED_PRED;
> > +	if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
> > +		reg |= VE_H264_PPS_CONSTRAINED_INTRA_PRED;
> > +	if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
> > +		reg |= VE_H264_PPS_TRANSFORM_8X8_MODE;
> > +	cedrus_write(dev, VE_H264_PPS, reg);
> > +
> > +	// sequence parameters
> > +	reg = 0;
> > +	reg |= (sps->chroma_format_idc & 0x7) << 19;
> > +	reg |= (sps->pic_width_in_mbs_minus1 & 0xff) << 8;
> > +	reg |= sps->pic_height_in_map_units_minus1 & 0xff;
> > +	if (sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
> > +		reg |= VE_H264_SPS_MBS_ONLY;
> > +	if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
> > +		reg |= VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD;
> > +	if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
> > +		reg |= VE_H264_SPS_DIRECT_8X8_INFERENCE;
> > +	cedrus_write(dev, VE_H264_SPS, reg);
> > +
> > +	// slice parameters
> > +	reg = 0;
> > +	reg |= decode->nal_ref_idc ? BIT(12) : 0;

BIT(12) should be a flag.

> > +	reg |= (slice->slice_type & 0xf) << 8;
> > +	reg |= slice->cabac_init_idc & 0x3;
> > +	reg |= VE_H264_SHS_FIRST_SLICE_IN_PIC;
> > +	if (slice->flags & V4L2_H264_SLICE_FLAG_FIELD_PIC)
> > +		reg |= VE_H264_SHS_FIELD_PIC;
> > +	if (slice->flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
> > +		reg |= VE_H264_SHS_BOTTOM_FIELD;
> > +	if (slice->flags & V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED)
> > +		reg |= VE_H264_SHS_DIRECT_SPATIAL_MV_PRED;
> > +	cedrus_write(dev, VE_H264_SHS, reg);
> > +
> > +	reg = 0;
> > +	reg |= VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD;
> > +	reg |= (slice->num_ref_idx_l0_active_minus1 & 0x1f) << 24;
> > +	reg |= (slice->num_ref_idx_l1_active_minus1 & 0x1f) << 16;
> > +	reg |= (slice->disable_deblocking_filter_idc & 0x3) << 8;
> > +	reg |= (slice->slice_alpha_c0_offset_div2 & 0xf) << 4;
> > +	reg |= slice->slice_beta_offset_div2 & 0xf;
> > +	cedrus_write(dev, VE_H264_SHS2, reg);
> > +
> > +	reg = 0;
> > +	if (!(scaling && (pps->flags &
> > V4L2_H264_PPS_FLAG_PIC_SCALING_MATRIX_PRESENT))) +		reg |=
> > VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT;
> > +	reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
> > +	reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
> > +	reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) &
> 
> 0x3f;
> 
> > +	cedrus_write(dev, VE_H264_SHS_QP, reg);
> > +
> > +	// clear status flags
> > +	cedrus_write(dev, VE_H264_STATUS, cedrus_read(dev,
> 
> VE_H264_STATUS));
> 
> I'm not sure clearing status here is needed. Do you have any case where it
> is need? Maybe if some error happened before and cedrus_h264_irq_clear()
> wasn't cleared. I'm fine either way.
> 
> > +
> > +	// enable int
> > +	reg = cedrus_read(dev, VE_H264_CTRL);
> > +	cedrus_write(dev, VE_H264_CTRL, reg |
> > +		     VE_H264_CTRL_SLICE_DECODE_INT |
> > +		     VE_H264_CTRL_DECODE_ERR_INT |
> > +		     VE_H264_CTRL_VLD_DATA_REQ_INT);
> 
> Since this is the only place where you set VE_H264_CTRL, I wouldn't preserve
> previous content. This mode is also capable of decoding VP8 and AVS. So in
> theory, if user would want to decode H264 and VP8 videos at the same time,
> preserving content will probably corrupt your output. I would just set all
> other bits to 0. What do you think? I tested this without preservation and
> it works fine.
> 
> > +}
> > +
> > +static enum cedrus_irq_status
> > +cedrus_h264_irq_status(struct cedrus_ctx *ctx)
> > +{
> > +	struct cedrus_dev *dev = ctx->dev;
> > +	u32 reg = cedrus_read(dev, VE_H264_STATUS);
> > +
> > +	if (reg & (VE_H264_STATUS_DECODE_ERR_INT |
> > +		   VE_H264_STATUS_VLD_DATA_REQ_INT))
> > +		return CEDRUS_IRQ_ERROR;
> > +
> > +	if (reg & VE_H264_CTRL_SLICE_DECODE_INT)
> > +		return CEDRUS_IRQ_OK;
> > +
> > +	return CEDRUS_IRQ_NONE;
> > +}
> > +
> > +static void cedrus_h264_irq_clear(struct cedrus_ctx *ctx)
> > +{
> > +	struct cedrus_dev *dev = ctx->dev;
> > +
> > +	cedrus_write(dev, VE_H264_STATUS,
> > +		     VE_H264_STATUS_INT_MASK);
> > +}
> > +
> > +static void cedrus_h264_irq_disable(struct cedrus_ctx *ctx)
> > +{
> > +	struct cedrus_dev *dev = ctx->dev;
> > +	u32 reg = cedrus_read(dev, VE_H264_CTRL);
> > +
> > +	cedrus_write(dev, VE_H264_CTRL,
> > +		     reg & ~VE_H264_CTRL_INT_MASK);
> > +}
> > +
> > +static void cedrus_h264_setup(struct cedrus_ctx *ctx,
> > +			      struct cedrus_run *run)
> > +{
> > +	struct cedrus_dev *dev = ctx->dev;
> > +
> > +	cedrus_engine_enable(dev, CEDRUS_CODEC_H264);
> > +
> > +	cedrus_write(dev, VE_H264_SDROT_CTRL, 0);
> > +	cedrus_write(dev, VE_H264_EXTRA_BUFFER1,
> > +		     ctx->codec.h264.pic_info_buf_dma);
> > +	cedrus_write(dev, VE_H264_EXTRA_BUFFER2,
> > +		     ctx->codec.h264.neighbor_info_buf_dma);
> > +
> > +	cedrus_write_scaling_lists(ctx, run);
> > +	cedrus_write_frame_list(ctx, run);
> > +
> > +	cedrus_set_params(ctx, run);
> > +}
> > +
> > +static int cedrus_h264_start(struct cedrus_ctx *ctx)
> > +{
> > +	struct cedrus_dev *dev = ctx->dev;
> > +	unsigned int field_size;
> > +	unsigned int mv_col_size;
> > +	int ret;
> > +
> > +	/*
> > +	 * FIXME: It seems that the H6 cedarX code is using a formula
> > +	 * here based on the size of the frame, while all the older
> > +	 * code is using a fixed size, so that might need to be
> > +	 * changed at some point.
> > +	 */
> > +	ctx->codec.h264.pic_info_buf =
> > +		dma_alloc_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
> > +				   &ctx-
> >
> >codec.h264.pic_info_buf_dma,
> >
> > +				   GFP_KERNEL);
> > +	if (!ctx->codec.h264.pic_info_buf)
> > +		return -ENOMEM;
> > +
> > +	/*
> > +	 * That buffer is supposed to be 16kiB in size, and be aligned
> > +	 * on 16kiB as well. However, dma_alloc_coherent provides the
> > +	 * guarantee that we'll have a CPU and DMA address aligned on
> > +	 * the smallest page order that is greater to the requested
> > +	 * size, so we don't have to overallocate.
> > +	 */
> > +	ctx->codec.h264.neighbor_info_buf =
> > +		dma_alloc_coherent(dev->dev,
> 
> CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
> 
> > +				   &ctx-
> >
> >codec.h264.neighbor_info_buf_dma,
> >
> > +				   GFP_KERNEL);
> > +	if (!ctx->codec.h264.neighbor_info_buf) {
> > +		ret = -ENOMEM;
> > +		goto err_pic_buf;
> > +	}
> > +
> > +	field_size = DIV_ROUND_UP(ctx->src_fmt.width, 16) *
> > +		DIV_ROUND_UP(ctx->src_fmt.height, 16) * 16;
> > +
> > +	/*
> > +	 * FIXME: This is actually conditional to
> > +	 * V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE not being set, we
> > +	 * might have to rework this if memory efficiency ever is
> > +	 * something we need to work on.
> > +	 */
> > +	field_size = field_size * 2;
> > +
> > +	/*
> > +	 * FIXME: This is actually conditional to
> > +	 * V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY not being set, we might
> > +	 * have to rework this if memory efficiency ever is something
> > +	 * we need to work on.
> > +	 */
> > +	field_size = field_size * 2;
> > +	ctx->codec.h264.mv_col_buf_field_size = field_size;
> 
> CedarX code aligns this buffer to 1024. Should we do it too just to be on
> the safe side? I don't think it cost us anything due to
> dma_alloc_coherent() alignments.

I mixed up this a bit. While I still think we should do it, it cost us just a 
little extra memory.

It wouldn't cost us anything if they would be separately allocated (due to 
alignment), but that's not the case here.

Best regards,
Jernej

> 
> Sorry again for a bit late in-depth review.
> 
> Best regards,
> Jernej
> 
> > +
> > +	mv_col_size = field_size * 2 * CEDRUS_H264_FRAME_NUM;
> > +	ctx->codec.h264.mv_col_buf_size = mv_col_size;
> > +	ctx->codec.h264.mv_col_buf = dma_alloc_coherent(dev->dev,
> > +
> 
> ctx->codec.h264.mv_col_buf_size,
> 
> > +
> 
> &ctx->codec.h264.mv_col_buf_dma,
> 
> > +
> 
> GFP_KERNEL);
> 
> > +	if (!ctx->codec.h264.mv_col_buf) {
> > +		ret = -ENOMEM;
> > +		goto err_neighbor_buf;
> > +	}
> > +
> > +	return 0;
> > +
> > +err_neighbor_buf:
> > +	dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
> > +			  ctx->codec.h264.neighbor_info_buf,
> > +			  ctx->codec.h264.neighbor_info_buf_dma);
> > +
> > +err_pic_buf:
> > +	dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
> > +			  ctx->codec.h264.pic_info_buf,
> > +			  ctx->codec.h264.pic_info_buf_dma);
> > +	return ret;
> > +}
> > +
> > +static void cedrus_h264_stop(struct cedrus_ctx *ctx)
> > +{
> > +	struct cedrus_dev *dev = ctx->dev;
> > +
> > +	dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size,
> > +			  ctx->codec.h264.mv_col_buf,
> > +			  ctx->codec.h264.mv_col_buf_dma);
> > +	dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE,
> > +			  ctx->codec.h264.neighbor_info_buf,
> > +			  ctx->codec.h264.neighbor_info_buf_dma);
> > +	dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE,
> > +			  ctx->codec.h264.pic_info_buf,
> > +			  ctx->codec.h264.pic_info_buf_dma);
> > +}
> > +
> > +static void cedrus_h264_trigger(struct cedrus_ctx *ctx)
> > +{
> > +	struct cedrus_dev *dev = ctx->dev;
> > +
> > +	cedrus_write(dev, VE_H264_TRIGGER_TYPE,
> > +		     VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE);
> > +}
> > +
> > +struct cedrus_dec_ops cedrus_dec_ops_h264 = {
> > +	.irq_clear	= cedrus_h264_irq_clear,
> > +	.irq_disable	= cedrus_h264_irq_disable,
> > +	.irq_status	= cedrus_h264_irq_status,
> > +	.setup		= cedrus_h264_setup,
> > +	.start		= cedrus_h264_start,
> > +	.stop		= cedrus_h264_stop,
> > +	.trigger	= cedrus_h264_trigger,
> > +};
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index
> > 0acf219a8c91..ab402b0cac4e 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > @@ -46,6 +46,10 @@ int cedrus_engine_enable(struct cedrus_dev *dev, enum
> > cedrus_codec codec) reg |= VE_MODE_DEC_MPEG;
> > 
> >  		break;
> > 
> > +	case CEDRUS_CODEC_H264:
> > +		reg |= VE_MODE_DEC_H264;
> > +		break;
> > +
> > 
> >  	default:
> >  		return -EINVAL;
> >  	
> >  	}
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h
> > b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h index
> > de2d6b6f64bf..3e9931416e45 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h
> > @@ -232,4 +232,95 @@
> > 
> >  #define VE_DEC_MPEG_ROT_LUMA			(VE_ENGINE_DEC_MPEG +
> 
> 0xcc)
> 
> >  #define VE_DEC_MPEG_ROT_CHROMA
> 
> (VE_ENGINE_DEC_MPEG + 0xd0)
> 
> > +#define VE_H264_SPS			0x200
> > +#define VE_H264_SPS_MBS_ONLY			BIT(18)
> > +#define VE_H264_SPS_MB_ADAPTIVE_FRAME_FIELD	BIT(17)
> > +#define VE_H264_SPS_DIRECT_8X8_INFERENCE	BIT(16)
> > +
> > +#define VE_H264_PPS			0x204
> > +#define VE_H264_PPS_ENTROPY_CODING_MODE		BIT(15)
> > +#define VE_H264_PPS_WEIGHTED_PRED		BIT(4)
> > +#define VE_H264_PPS_CONSTRAINED_INTRA_PRED	BIT(1)
> > +#define VE_H264_PPS_TRANSFORM_8X8_MODE		BIT(0)
> > +
> > +#define VE_H264_SHS			0x208
> > +#define VE_H264_SHS_FIRST_SLICE_IN_PIC		BIT(5)
> > +#define VE_H264_SHS_FIELD_PIC			BIT(4)
> > +#define VE_H264_SHS_BOTTOM_FIELD		BIT(3)
> > +#define VE_H264_SHS_DIRECT_SPATIAL_MV_PRED	BIT(2)
> > +
> > +#define VE_H264_SHS2			0x20c
> > +#define VE_H264_SHS2_NUM_REF_IDX_ACTIVE_OVRD	BIT(12)
> > +
> > +#define VE_H264_SHS_WP			0x210
> > +
> > +#define VE_H264_SHS_QP			0x21c
> > +#define VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT	BIT(24)
> > +
> > +#define VE_H264_CTRL			0x220
> > +#define VE_H264_CTRL_VLD_DATA_REQ_INT		BIT(2)
> > +#define VE_H264_CTRL_DECODE_ERR_INT		BIT(1)
> > +#define VE_H264_CTRL_SLICE_DECODE_INT		BIT(0)
> > +
> > +#define VE_H264_CTRL_INT_MASK		(VE_H264_CTRL_VLD_DATA_REQ_INT | 
\
> > +
> 
> VE_H264_CTRL_DECODE_ERR_INT | \
> 
> > +
> 
> VE_H264_CTRL_SLICE_DECODE_INT)
> 
> > +
> > +#define VE_H264_TRIGGER_TYPE		0x224
> > +#define VE_H264_TRIGGER_TYPE_AVC_SLICE_DECODE	(8 << 0)
> > +#define VE_H264_TRIGGER_TYPE_INIT_SWDEC		(7 << 0)
> > +
> > +#define VE_H264_STATUS			0x228
> > +#define VE_H264_STATUS_VLD_DATA_REQ_INT
> 
> VE_H264_CTRL_VLD_DATA_REQ_INT
> 
> > +#define VE_H264_STATUS_DECODE_ERR_INT
> 
> VE_H264_CTRL_DECODE_ERR_INT
> 
> > +#define VE_H264_STATUS_SLICE_DECODE_INT
> 
> VE_H264_CTRL_SLICE_DECODE_INT
> 
> > +
> > +#define VE_H264_STATUS_INT_MASK
> 
> VE_H264_CTRL_INT_MASK
> 
> > +
> > +#define VE_H264_CUR_MB_NUM		0x22c
> > +
> > +#define VE_H264_VLD_ADDR		0x230
> > +#define VE_H264_VLD_ADDR_FIRST			BIT(30)
> > +#define VE_H264_VLD_ADDR_LAST			BIT(29)
> > +#define VE_H264_VLD_ADDR_VALID			BIT(28)
> > +#define VE_H264_VLD_ADDR_VAL(x)			(((x) & 
0x0ffffff0) |
> 
> ((x) >> 28))
> 
> > +
> > +#define VE_H264_VLD_OFFSET		0x234
> > +#define VE_H264_VLD_LEN			0x238
> > +#define VE_H264_VLD_END			0x23c
> > +#define VE_H264_SDROT_CTRL		0x240
> > +#define VE_H264_OUTPUT_FRAME_IDX	0x24c
> > +#define VE_H264_EXTRA_BUFFER1		0x250
> > +#define VE_H264_EXTRA_BUFFER2		0x254
> > +#define VE_H264_BASIC_BITS		0x2dc
> > +#define VE_AVC_SRAM_PORT_OFFSET		0x2e0
> > +#define VE_AVC_SRAM_PORT_DATA		0x2e4
> > +
> > +#define VE_ISP_INPUT_SIZE		0xa00
> > +#define VE_ISP_INPUT_STRIDE		0xa04
> > +#define VE_ISP_CTRL			0xa08
> > +#define VE_ISP_INPUT_LUMA		0xa78
> > +#define VE_ISP_INPUT_CHROMA		0xa7c
> > +
> > +#define VE_AVC_PARAM			0xb04
> > +#define VE_AVC_QP			0xb08
> > +#define VE_AVC_MOTION_EST		0xb10
> > +#define VE_AVC_CTRL			0xb14
> > +#define VE_AVC_TRIGGER			0xb18
> > +#define VE_AVC_STATUS			0xb1c
> > +#define VE_AVC_BASIC_BITS		0xb20
> > +#define VE_AVC_UNK_BUF			0xb60
> > +#define VE_AVC_VLE_ADDR			0xb80
> > +#define VE_AVC_VLE_END			0xb84
> > +#define VE_AVC_VLE_OFFSET		0xb88
> > +#define VE_AVC_VLE_MAX			0xb8c
> > +#define VE_AVC_VLE_LENGTH		0xb90
> > +#define VE_AVC_REF_LUMA			0xba0
> > +#define VE_AVC_REF_CHROMA		0xba4
> > +#define VE_AVC_REC_LUMA			0xbb0
> > +#define VE_AVC_REC_CHROMA		0xbb4
> > +#define VE_AVC_REF_SLUMA		0xbb8
> > +#define VE_AVC_REC_SLUMA		0xbbc
> > +#define VE_AVC_MB_INFO			0xbc0
> > +
> > 
> >  #endif
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus_video.c index
> > b5cc79389d67..67062900f87a 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > @@ -38,6 +38,10 @@ static struct cedrus_format cedrus_formats[] = {
> > 
> >  		.directions	= CEDRUS_DECODE_SRC,
> >  	
> >  	},
> >  	{
> > 
> > +		.pixelformat	= V4L2_PIX_FMT_H264_SLICE,
> > +		.directions	= CEDRUS_DECODE_SRC,
> > +	},
> > +	{
> > 
> >  		.pixelformat	= V4L2_PIX_FMT_SUNXI_TILED_NV12,
> >  		.directions	= CEDRUS_DECODE_DST,
> >  	
> >  	},
> > 
> > @@ -100,6 +104,7 @@ static void cedrus_prepare_format(struct
> > v4l2_pix_format *pix_fmt)
> > 
> >  	switch (pix_fmt->pixelformat) {
> > 
> >  	case V4L2_PIX_FMT_MPEG2_SLICE:
> > +	case V4L2_PIX_FMT_H264_SLICE:
> >  		/* Zero bytes per line for encoded source. */
> >  		bytesperline = 0;
> > 
> > @@ -454,6 +459,10 @@ static int cedrus_start_streaming(struct vb2_queue
> > *vq, unsigned int count) ctx->current_codec = CEDRUS_CODEC_MPEG2;
> > 
> >  		break;
> > 
> > +	case V4L2_PIX_FMT_H264_SLICE:
> > +		ctx->current_codec = CEDRUS_CODEC_H264;
> > +		break;
> > +
> > 
> >  	default:
> >  		return -EINVAL;
> >  	
> >  	}