RE: [PATCH v2 8/8] [media] coda: add CODA7541 decoding support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Philipp,

This patch did not apply well on my tree. I find this quite strange.
I did try and applied it manually, but please check here if it is
correct:
http://git.linuxtv.org/kdebski/media.git/shortlog/refs/heads/master

Best wishes,
-- 
Kamil Debski
Linux Kernel Developer
Samsung R&D Institute Poland


> -----Original Message-----
> From: Philipp Zabel [mailto:p.zabel@xxxxxxxxxxxxxx]
> Sent: Friday, June 21, 2013 9:56 AM
> To: linux-media@xxxxxxxxxxxxxxx
> Cc: Kamil Debski; Javier Martin; Sylwester Nawrocki; Gaëtan Carlier;
> Wei Yongjun; Philipp Zabel
> Subject: [PATCH v2 8/8] [media] coda: add CODA7541 decoding support
> 
> This patch enables decoding of h.264 and mpeg4 streams on CODA7541.
> Queued output buffers are immediately copied into the bitstream
> ringbuffer. A device_run can be scheduled whenever there is either
> enough compressed bitstream data, or the CODA is in stream end mode.
> 
> Each successful device_run, data is read from the bitstream ringbuffer
> and a frame is decoded into a free internal framebuffer. Depending on
> reordering, a possibly previously decoded frame is marked as display
> frame, and at the same time the display frame from the previous run is
> copied out into a capture buffer by the rotator hardware.
> 
> The dequeued capture buffers are counted to send the EOS signal to
> userspace with the last frame. When userspace sends the decoder stop
> command or enqueues an empty output buffer, the stream end flag is set
> to allow decoding the remaining frames in the bitstream ringbuffer.
> 
> The enum_fmt/try_fmt functions return fixed capture buffer sizes while
> the output queue is streaming, to allow better autonegotiation in
> userspace.
> 
> A per-context buffer mutex is used to lock the picture run against
> buffer dequeueing: if a job gets queued, then streamoff dequeues the
> last buffer, and then device_run is called, bail out. For that the
> interrupt handler has to be threaded.
> 
> Signed-off-by: Philipp Zabel <p.zabel@xxxxxxxxxxxxxx>
> ---
> Changes since v1:
>  - Included the fix by Wei Yongjun, adding a missing unlock in the
>    coda_stop_streaming() error handling case.
>  - Restricted check for available bitstream data in coda_job_ready()
>    to the decoder case.
> ---
>  drivers/media/platform/coda.c | 787
> ++++++++++++++++++++++++++++++++++++++----
>  drivers/media/platform/coda.h |  84 +++++
>  2 files changed, 813 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/media/platform/coda.c
> b/drivers/media/platform/coda.c index e8b3708..9cbdea6 100644
> --- a/drivers/media/platform/coda.c
> +++ b/drivers/media/platform/coda.c
> @@ -29,6 +29,7 @@
> 
>  #include <media/v4l2-ctrls.h>
>  #include <media/v4l2-device.h>
> +#include <media/v4l2-event.h>
>  #include <media/v4l2-ioctl.h>
>  #include <media/v4l2-mem2mem.h>
>  #include <media/videobuf2-core.h>
> @@ -47,9 +48,11 @@
>  #define CODA_PARA_BUF_SIZE	(10 * 1024)
>  #define CODA_ISRAM_SIZE	(2048 * 2)
>  #define CODADX6_IRAM_SIZE	0xb000
> -#define CODA7_IRAM_SIZE		0x14000 /* 81920 bytes */
> +#define CODA7_IRAM_SIZE		0x14000
> 
> -#define CODA_MAX_FRAMEBUFFERS	2
> +#define CODA7_PS_BUF_SIZE	0x28000
> +
> +#define CODA_MAX_FRAMEBUFFERS	8
> 
>  #define MAX_W		8192
>  #define MAX_H		8192
> @@ -178,12 +181,16 @@ struct coda_iram_info {
> 
>  struct coda_ctx {
>  	struct coda_dev			*dev;
> +	struct mutex			buffer_mutex;
>  	struct list_head		list;
> +	struct work_struct		skip_run;
>  	int				aborting;
> +	int				initialized;
>  	int				streamon_out;
>  	int				streamon_cap;
>  	u32				isequence;
>  	u32				qsequence;
> +	u32				osequence;
>  	struct coda_q_data		q_data[2];
>  	enum coda_inst_type		inst_type;
>  	struct coda_codec		*codec;
> @@ -193,12 +200,16 @@ struct coda_ctx {
>  	struct v4l2_ctrl_handler	ctrls;
>  	struct v4l2_fh			fh;
>  	int				gopcounter;
> +	int				runcounter;
>  	char				vpu_header[3][64];
>  	int				vpu_header_size[3];
>  	struct kfifo			bitstream_fifo;
>  	struct mutex			bitstream_mutex;
>  	struct coda_aux_buf		bitstream;
> +	bool				prescan_failed;
>  	struct coda_aux_buf		parabuf;
> +	struct coda_aux_buf		psbuf;
> +	struct coda_aux_buf		slicebuf;
>  	struct coda_aux_buf
> 	internal_frames[CODA_MAX_FRAMEBUFFERS];
>  	struct coda_aux_buf		workbuf;
>  	int				num_internal_frames;
> @@ -206,6 +217,8 @@ struct coda_ctx {
>  	int				reg_idx;
>  	struct coda_iram_info		iram_info;
>  	u32				bit_stream_param;
> +	u32				frm_dis_flg;
> +	int				display_idx;
>  };
> 
>  static const u8 coda_filler_nal[14] = { 0x00, 0x00, 0x00, 0x01, 0x0c,
> 0xff, @@ -257,6 +270,8 @@ static void coda_command_async(struct
> coda_ctx *ctx, int cmd)
>  		/* Restore context related registers to CODA */
>  		coda_write(dev, ctx->bit_stream_param,
>  				CODA_REG_BIT_BIT_STREAM_PARAM);
> +		coda_write(dev, ctx->frm_dis_flg,
> +				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
>  		coda_write(dev, ctx->workbuf.paddr,
> CODA_REG_BIT_WORK_BUF_ADDR);
>  	}
> 
> @@ -331,6 +346,8 @@ static struct coda_codec codadx6_codecs[] =
> {  static struct coda_codec coda7_codecs[] = {
>  	CODA_CODEC(CODA7_MODE_ENCODE_H264, V4L2_PIX_FMT_YUV420,
> V4L2_PIX_FMT_H264,   1280, 720),
>  	CODA_CODEC(CODA7_MODE_ENCODE_MP4,  V4L2_PIX_FMT_YUV420,
> V4L2_PIX_FMT_MPEG4,  1280, 720),
> +	CODA_CODEC(CODA7_MODE_DECODE_H264, V4L2_PIX_FMT_H264,
> V4L2_PIX_FMT_YUV420, 1920, 1080),
> +	CODA_CODEC(CODA7_MODE_DECODE_MP4,  V4L2_PIX_FMT_MPEG4,
> +V4L2_PIX_FMT_YUV420, 1920, 1080),
>  };
> 
>  static bool coda_format_is_yuv(u32 fourcc) @@ -399,7 +416,7 @@ static
> int vidioc_querycap(struct file *file, void *priv,  }
> 
>  static int enum_fmt(void *priv, struct v4l2_fmtdesc *f,
> -			enum v4l2_buf_type type)
> +			enum v4l2_buf_type type, int src_fourcc)
>  {
>  	struct coda_ctx *ctx = fh_to_ctx(priv);
>  	struct coda_codec *codecs = ctx->dev->devtype->codecs; @@ -411,7
> +428,8 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc *f,
> 
>  	for (i = 0; i < num_formats; i++) {
>  		/* Both uncompressed formats are always supported */
> -		if (coda_format_is_yuv(formats[i].fourcc)) {
> +		if (coda_format_is_yuv(formats[i].fourcc) &&
> +		    !coda_format_is_yuv(src_fourcc)) {
>  			if (num == f->index)
>  				break;
>  			++num;
> @@ -419,8 +437,10 @@ static int enum_fmt(void *priv, struct
> v4l2_fmtdesc *f,
>  		}
>  		/* Compressed formats may be supported, check the codec
> list */
>  		for (k = 0; k < num_codecs; k++) {
> +			/* if src_fourcc is set, only consider matching
> codecs */
>  			if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE &&
> -			    formats[i].fourcc == codecs[k].dst_fourcc)
> +			    formats[i].fourcc == codecs[k].dst_fourcc &&
> +			    (!src_fourcc || src_fourcc ==
> codecs[k].src_fourcc))
>  				break;
>  			if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
>  			    formats[i].fourcc == codecs[k].src_fourcc) @@ -
> 447,13 +467,26 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc
> *f,  static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
>  				   struct v4l2_fmtdesc *f)
>  {
> -	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +	struct coda_ctx *ctx = fh_to_ctx(priv);
> +	struct vb2_queue *src_vq;
> +	struct coda_q_data *q_data_src;
> +
> +	/* If the source format is already fixed, only list matching
> formats */
> +	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	if (vb2_is_streaming(src_vq)) {
> +		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +
> +		return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE,
> +				q_data_src->fourcc);
> +	}
> +
> +	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE, 0);
>  }
> 
>  static int vidioc_enum_fmt_vid_out(struct file *file, void *priv,
>  				   struct v4l2_fmtdesc *f)
>  {
> -	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_OUTPUT, 0);
>  }
> 
>  static int vidioc_g_fmt(struct file *file, void *priv, struct
> v4l2_format *f) @@ -526,15 +559,45 @@ static int
> vidioc_try_fmt_vid_cap(struct file *file, void *priv,
>  				  struct v4l2_format *f)
>  {
>  	struct coda_ctx *ctx = fh_to_ctx(priv);
> -	struct coda_codec *codec = NULL;
> +	struct coda_codec *codec;
> +	struct vb2_queue *src_vq;
> +	int ret;
> +
> +	/*
> +	 * If the source format is already fixed, try to find a codec
> that
> +	 * converts to the given destination format
> +	 */
> +	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	if (vb2_is_streaming(src_vq)) {
> +		struct coda_q_data *q_data_src;
> 
> -	/* Determine codec by the encoded format */
> -	codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420,
> -				f->fmt.pix.pixelformat);
> +		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +		codec = coda_find_codec(ctx->dev, q_data_src->fourcc,
> +					f->fmt.pix.pixelformat);
> +		if (!codec)
> +			return -EINVAL;
> +	} else {
> +		/* Otherwise determine codec by encoded format, if possible
> */
> +		codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420,
> +					f->fmt.pix.pixelformat);
> +	}
> 
>  	f->fmt.pix.colorspace = ctx->colorspace;
> 
> -	return vidioc_try_fmt(codec, f);
> +	ret = vidioc_try_fmt(codec, f);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* The h.264 decoder only returns complete 16x16 macroblocks */
> +	if (codec && codec->src_fourcc == V4L2_PIX_FMT_H264) {
> +		f->fmt.pix.width = round_up(f->fmt.pix.width, 16);
> +		f->fmt.pix.height = round_up(f->fmt.pix.height, 16);
> +		f->fmt.pix.bytesperline = f->fmt.pix.width;
> +		f->fmt.pix.sizeimage = f->fmt.pix.bytesperline *
> +				       f->fmt.pix.height * 3 / 2;
> +	}
> +
> +	return 0;
>  }
> 
>  static int vidioc_try_fmt_vid_out(struct file *file, void *priv, @@ -
> 644,11 +707,35 @@ static int vidioc_expbuf(struct file *file, void
> *priv,
>  	return v4l2_m2m_expbuf(file, ctx->m2m_ctx, eb);  }
> 
> +static bool coda_buf_is_end_of_stream(struct coda_ctx *ctx,
> +				      struct v4l2_buffer *buf)
> +{
> +	struct vb2_queue *src_vq;
> +
> +	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +
> +	return ((ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) &&
> +		(buf->sequence == (ctx->qsequence - 1))); }
> +
>  static int vidioc_dqbuf(struct file *file, void *priv, struct
> v4l2_buffer *buf)  {
>  	struct coda_ctx *ctx = fh_to_ctx(priv);
> +	int ret;
> +
> +	ret = v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf);
> 
> -	return v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf);
> +	/* If this is the last capture buffer, emit an end-of-stream
> event */
> +	if (buf->type == V4L2_BUF_TYPE_VIDEO_CAPTURE &&
> +	    coda_buf_is_end_of_stream(ctx, buf)) {
> +		const struct v4l2_event eos_event = {
> +			.type = V4L2_EVENT_EOS
> +		};
> +
> +		v4l2_event_queue_fh(&ctx->fh, &eos_event);
> +	}
> +
> +	return ret;
>  }
> 
>  static int vidioc_streamon(struct file *file, void *priv, @@ -663,8
> +750,53 @@ static int vidioc_streamoff(struct file *file, void *priv,
>  			    enum v4l2_buf_type type)
>  {
>  	struct coda_ctx *ctx = fh_to_ctx(priv);
> +	int ret;
> +
> +	/*
> +	 * This indirectly calls __vb2_queue_cancel, which dequeues all
> buffers.
> +	 * We therefore have to lock it against running hardware in this
> context,
> +	 * which still needs the buffers.
> +	 */
> +	mutex_lock(&ctx->buffer_mutex);
> +	ret = v4l2_m2m_streamoff(file, ctx->m2m_ctx, type);
> +	mutex_unlock(&ctx->buffer_mutex);
> 
> -	return v4l2_m2m_streamoff(file, ctx->m2m_ctx, type);
> +	return ret;
> +}
> +
> +static int vidioc_decoder_cmd(struct file *file, void *fh,
> +			      struct v4l2_decoder_cmd *dc)
> +{
> +	struct coda_ctx *ctx = fh_to_ctx(fh);
> +
> +	if (dc->cmd != V4L2_DEC_CMD_STOP)
> +		return -EINVAL;
> +
> +	if ((dc->flags & V4L2_DEC_CMD_STOP_TO_BLACK) ||
> +	    (dc->flags & V4L2_DEC_CMD_STOP_IMMEDIATELY))
> +		return -EINVAL;
> +
> +	if (dc->stop.pts != 0)
> +		return -EINVAL;
> +
> +	if (ctx->inst_type != CODA_INST_DECODER)
> +		return -EINVAL;
> +
> +	/* Set the strem-end flag on this context */
> +	ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
> +
> +	return 0;
> +}
> +
> +static int vidioc_subscribe_event(struct v4l2_fh *fh,
> +				  const struct v4l2_event_subscription *sub) {
> +	switch (sub->type) {
> +	case V4L2_EVENT_EOS:
> +		return v4l2_event_subscribe(fh, sub, 0, NULL);
> +	default:
> +		return v4l2_ctrl_subscribe_event(fh, sub);
> +	}
>  }
> 
>  static const struct v4l2_ioctl_ops coda_ioctl_ops = { @@ -689,8
> +821,22 @@ static const struct v4l2_ioctl_ops coda_ioctl_ops = {
> 
>  	.vidioc_streamon	= vidioc_streamon,
>  	.vidioc_streamoff	= vidioc_streamoff,
> +
> +	.vidioc_decoder_cmd	= vidioc_decoder_cmd,
> +
> +	.vidioc_subscribe_event = vidioc_subscribe_event,
> +	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
>  };
> 
> +static int coda_start_decoding(struct coda_ctx *ctx);
> +
> +static void coda_skip_run(struct work_struct *work) {
> +	struct coda_ctx *ctx = container_of(work, struct coda_ctx,
> skip_run);
> +
> +	v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->m2m_ctx); }
> +
>  static inline int coda_get_bitstream_payload(struct coda_ctx *ctx)  {
>  	return kfifo_len(&ctx->bitstream_fifo); @@ -771,6 +917,8 @@
> static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
>  	if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
>  		coda_kfifo_sync_to_device_write(ctx);
> 
> +	ctx->prescan_failed = false;
> +
>  	return true;
>  }
> 
> @@ -793,6 +941,84 @@ static void coda_fill_bitstream(struct coda_ctx
> *ctx)
>  /*
>   * Mem-to-mem operations.
>   */
> +static int coda_prepare_decode(struct coda_ctx *ctx) {
> +	struct vb2_buffer *dst_buf;
> +	struct coda_dev *dev = ctx->dev;
> +	struct coda_q_data *q_data_dst;
> +	u32 stridey, height;
> +	u32 picture_y, picture_cb, picture_cr;
> +
> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
> +	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +
> +	if (ctx->params.rot_mode & CODA_ROT_90) {
> +		stridey = q_data_dst->height;
> +		height = q_data_dst->width;
> +	} else {
> +		stridey = q_data_dst->width;
> +		height = q_data_dst->height;
> +	}
> +
> +	/* Try to copy source buffer contents into the bitstream
> ringbuffer */
> +	mutex_lock(&ctx->bitstream_mutex);
> +	coda_fill_bitstream(ctx);
> +	mutex_unlock(&ctx->bitstream_mutex);
> +
> +	if (coda_get_bitstream_payload(ctx) < 512 &&
> +	    (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			 "bitstream payload: %d, skipping\n",
> +			 coda_get_bitstream_payload(ctx));
> +		schedule_work(&ctx->skip_run);
> +		return -EAGAIN;
> +	}
> +
> +	/* Run coda_start_decoding (again) if not yet initialized */
> +	if (!ctx->initialized) {
> +		int ret = coda_start_decoding(ctx);
> +		if (ret < 0) {
> +			v4l2_err(&dev->v4l2_dev, "failed to start
> decoding\n");
> +			schedule_work(&ctx->skip_run);
> +			return -EAGAIN;
> +		} else {
> +			ctx->initialized = 1;
> +		}
> +	}
> +
> +	/* Set rotator output */
> +	picture_y = vb2_dma_contig_plane_dma_addr(dst_buf, 0);
> +	if (q_data_dst->fourcc == V4L2_PIX_FMT_YVU420) {
> +		/* Switch Cr and Cb for YVU420 format */
> +		picture_cr = picture_y + stridey * height;
> +		picture_cb = picture_cr + stridey / 2 * height / 2;
> +	} else {
> +		picture_cb = picture_y + stridey * height;
> +		picture_cr = picture_cb + stridey / 2 * height / 2;
> +	}
> +	coda_write(dev, picture_y, CODA_CMD_DEC_PIC_ROT_ADDR_Y);
> +	coda_write(dev, picture_cb, CODA_CMD_DEC_PIC_ROT_ADDR_CB);
> +	coda_write(dev, picture_cr, CODA_CMD_DEC_PIC_ROT_ADDR_CR);
> +	coda_write(dev, stridey, CODA_CMD_DEC_PIC_ROT_STRIDE);
> +	coda_write(dev, CODA_ROT_MIR_ENABLE | ctx->params.rot_mode,
> +			CODA_CMD_DEC_PIC_ROT_MODE);
> +
> +	switch (dev->devtype->product) {
> +	case CODA_DX6:
> +		/* TBD */
> +	case CODA_7541:
> +		coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
> +		break;
> +	}
> +
> +	coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
> +
> +	coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
> +	coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
> +
> +	return 0;
> +}
> +
>  static void coda_prepare_encode(struct coda_ctx *ctx)  {
>  	struct coda_q_data *q_data_src, *q_data_dst; @@ -810,9 +1036,9 @@
> static void coda_prepare_encode(struct coda_ctx *ctx)
>  	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
>  	dst_fourcc = q_data_dst->fourcc;
> 
> -	src_buf->v4l2_buf.sequence = ctx->isequence;
> -	dst_buf->v4l2_buf.sequence = ctx->isequence;
> -	ctx->isequence++;
> +	src_buf->v4l2_buf.sequence = ctx->osequence;
> +	dst_buf->v4l2_buf.sequence = ctx->osequence;
> +	ctx->osequence++;
> 
>  	/*
>  	 * Workaround coda firmware BUG that only marks the first @@ -
> 920,15 +1146,36 @@ static void coda_device_run(void *m2m_priv)  {
>  	struct coda_ctx *ctx = m2m_priv;
>  	struct coda_dev *dev = ctx->dev;
> +	int ret;
> 
> -	mutex_lock(&dev->coda_mutex);
> +	mutex_lock(&ctx->buffer_mutex);
> 
> -	coda_prepare_encode(ctx);
> +	/*
> +	 * If streamoff dequeued all buffers before we could get the lock,
> +	 * just bail out immediately.
> +	 */
> +	if ((!v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) &&
> +	    ctx->inst_type != CODA_INST_DECODER) ||
> +		!v4l2_m2m_num_dst_bufs_ready(ctx->m2m_ctx)) {
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			"%d: device_run without buffers\n", ctx->idx);
> +		mutex_unlock(&ctx->buffer_mutex);
> +		schedule_work(&ctx->skip_run);
> +		return;
> +	}
> 
> -	if (dev->devtype->product == CODA_7541) {
> -		coda_write(dev, CODA7_USE_BIT_ENABLE |
> CODA7_USE_HOST_BIT_ENABLE |
> -				CODA7_USE_ME_ENABLE | CODA7_USE_HOST_ME_ENABLE,
> -				CODA7_REG_BIT_AXI_SRAM_USE);
> +	mutex_lock(&dev->coda_mutex);
> +
> +	if (ctx->inst_type == CODA_INST_DECODER) {
> +		ret = coda_prepare_decode(ctx);
> +		if (ret < 0) {
> +			mutex_unlock(&dev->coda_mutex);
> +			mutex_unlock(&ctx->buffer_mutex);
> +			/* job_finish scheduled by prepare_decode */
> +			return;
> +		}
> +	} else {
> +		coda_prepare_encode(ctx);
>  	}
> 
>  	if (dev->devtype->product != CODA_DX6) @@ -938,6 +1185,8 @@
> static void coda_device_run(void *m2m_priv)
>  	/* 1 second timeout in case CODA locks up */
>  	schedule_delayed_work(&dev->timeout, HZ);
> 
> +	if (ctx->inst_type == CODA_INST_DECODER)
> +		coda_kfifo_sync_to_device_full(ctx);
>  	coda_command_async(ctx, CODA_COMMAND_PIC_RUN);  }
> 
> @@ -963,6 +1212,16 @@ static int coda_job_ready(void *m2m_priv)
>  		return 0;
>  	}
> 
> +	if (ctx->prescan_failed ||
> +	    ((ctx->inst_type == CODA_INST_DECODER) &&
> +	     (coda_get_bitstream_payload(ctx) < 512) &&
> +	     !(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
> +		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
> +			 "%d: not ready: not enough bitstream data.\n",
> +			 ctx->idx);
> +		return 0;
> +	}
> +
>  	if (ctx->aborting) {
>  		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
>  			 "not ready: aborting\n");
> @@ -1078,7 +1337,29 @@ static int coda_buf_prepare(struct vb2_buffer
> *vb)  static void coda_buf_queue(struct vb2_buffer *vb)  {
>  	struct coda_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> -	v4l2_m2m_buf_queue(ctx->m2m_ctx, vb);
> +	struct coda_q_data *q_data;
> +
> +	q_data = get_q_data(ctx, vb->vb2_queue->type);
> +
> +	/*
> +	 * In the decoder case, immediately try to copy the buffer into
> the
> +	 * bitstream ringbuffer and mark it as ready to be dequeued.
> +	 */
> +	if (q_data->fourcc == V4L2_PIX_FMT_H264 &&
> +	    vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> +		/*
> +		 * For backwards compatiblity, queuing an empty buffer
> marks
> +		 * the stream end
> +		 */
> +		if (vb2_get_plane_payload(vb, 0) == 0)
> +			ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
> +		mutex_lock(&ctx->bitstream_mutex);
> +		v4l2_m2m_buf_queue(ctx->m2m_ctx, vb);
> +		coda_fill_bitstream(ctx);
> +		mutex_unlock(&ctx->bitstream_mutex);
> +	} else {
> +		v4l2_m2m_buf_queue(ctx->m2m_ctx, vb);
> +	}
>  }
> 
>  static void coda_wait_prepare(struct vb2_queue *q) @@ -1366,6 +1647,8
> @@ static void coda_free_context_buffers(struct coda_ctx *ctx)  {
>  	struct coda_dev *dev = ctx->dev;
> 
> +	coda_free_aux_buf(dev, &ctx->slicebuf);
> +	coda_free_aux_buf(dev, &ctx->psbuf);
>  	if (dev->devtype->product != CODA_DX6)
>  		coda_free_aux_buf(dev, &ctx->workbuf);  } @@ -1385,12
> +1668,40 @@ static int coda_alloc_context_buffers(struct coda_ctx *ctx,
>  		return 0;
>  	}
> 
> +	if (ctx->psbuf.vaddr) {
> +		v4l2_err(&dev->v4l2_dev, "psmembuf still allocated\n");
> +		return -EBUSY;
> +	}
> +	if (ctx->slicebuf.vaddr) {
> +		v4l2_err(&dev->v4l2_dev, "slicebuf still allocated\n");
> +		return -EBUSY;
> +	}
>  	if (ctx->workbuf.vaddr) {
>  		v4l2_err(&dev->v4l2_dev, "context buffer still
> allocated\n");
>  		ret = -EBUSY;
>  		return -ENOMEM;
>  	}
> 
> +	if (q_data->fourcc == V4L2_PIX_FMT_H264) {
> +		/* worst case slice size */
> +		size = (DIV_ROUND_UP(q_data->width, 16) *
> +			DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
> +		ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size);
> +		if (ret < 0) {
> +			v4l2_err(&dev->v4l2_dev, "failed to allocate %d byte
> slice buffer",
> +				 ctx->slicebuf.size);
> +			return ret;
> +		}
> +	}
> +
> +	if (dev->devtype->product == CODA_7541) {
> +		ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
> CODA7_PS_BUF_SIZE);
> +		if (ret < 0) {
> +			v4l2_err(&dev->v4l2_dev, "failed to allocate psmem
> buffer");
> +			goto err;
> +		}
> +	}
> +
>  	ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size);
>  	if (ret < 0) {
>  		v4l2_err(&dev->v4l2_dev, "failed to allocate %d byte
> context buffer", @@ -1405,6 +1716,148 @@ err:
>  	return ret;
>  }
> 
> +static int coda_start_decoding(struct coda_ctx *ctx) {
> +	struct coda_q_data *q_data_src, *q_data_dst;
> +	u32 bitstream_buf, bitstream_size;
> +	struct coda_dev *dev = ctx->dev;
> +	int width, height;
> +	u32 src_fourcc;
> +	u32 val;
> +	int ret;
> +
> +	/* Start decoding */
> +	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +	bitstream_buf = ctx->bitstream.paddr;
> +	bitstream_size = ctx->bitstream.size;
> +	src_fourcc = q_data_src->fourcc;
> +
> +	coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
> +
> +	/* Update coda bitstream read and write pointers from kfifo */
> +	coda_kfifo_sync_to_device_full(ctx);
> +
> +	ctx->display_idx = -1;
> +	ctx->frm_dis_flg = 0;
> +	coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
> +
> +	coda_write(dev, CODA_BIT_DEC_SEQ_INIT_ESCAPE,
> +			CODA_REG_BIT_BIT_STREAM_PARAM);
> +
> +	coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
> +	coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
> +	val = 0;
> +	if (dev->devtype->product == CODA_7541)
> +		val |= CODA_REORDER_ENABLE;
> +	coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
> +
> +	ctx->params.codec_mode = ctx->codec->mode;
> +	ctx->params.codec_mode_aux = 0;
> +	if (src_fourcc == V4L2_PIX_FMT_H264) {
> +		if (dev->devtype->product == CODA_7541) {
> +			coda_write(dev, ctx->psbuf.paddr,
> +					CODA_CMD_DEC_SEQ_PS_BB_START);
> +			coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
> +					CODA_CMD_DEC_SEQ_PS_BB_SIZE);
> +		}
> +	}
> +
> +	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT)) {
> +		v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
> +		coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
> +		return -ETIMEDOUT;
> +	}
> +
> +	/* Update kfifo out pointer from coda bitstream read pointer */
> +	coda_kfifo_sync_from_device(ctx);
> +
> +	coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
> +
> +	if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
> +		v4l2_err(&dev->v4l2_dev,
> +			"CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
> +			coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
> +		return -EAGAIN;
> +	}
> +
> +	val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
> +	if (dev->devtype->product == CODA_DX6) {
> +		width = (val >> CODADX6_PICWIDTH_OFFSET) &
> CODADX6_PICWIDTH_MASK;
> +		height = val & CODADX6_PICHEIGHT_MASK;
> +	} else {
> +		width = (val >> CODA7_PICWIDTH_OFFSET) &
> CODA7_PICWIDTH_MASK;
> +		height = val & CODA7_PICHEIGHT_MASK;
> +	}
> +
> +	if (width > q_data_dst->width || height > q_data_dst->height) {
> +		v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
> +			 width, height, q_data_dst->width, q_data_dst-
> >height);
> +		return -EINVAL;
> +	}
> +
> +	width = round_up(width, 16);
> +	height = round_up(height, 16);
> +
> +	v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d
> now: %dx%d\n",
> +		 __func__, ctx->idx, width, height);
> +
> +	ctx->num_internal_frames = coda_read(dev,
> CODA_RET_DEC_SEQ_FRAME_NEED) + 1;
> +	if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
> +		v4l2_err(&dev->v4l2_dev,
> +			 "not enough framebuffers to decode (%d < %d)\n",
> +			 CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
> +		return -EINVAL;
> +	}
> +
> +	ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* Tell the decoder how many frame buffers we allocated. */
> +	coda_write(dev, ctx->num_internal_frames,
> CODA_CMD_SET_FRAME_BUF_NUM);
> +	coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
> +
> +	if (dev->devtype->product != CODA_DX6) {
> +		/* Set secondary AXI IRAM */
> +		coda_setup_iram(ctx);
> +
> +		coda_write(dev, ctx->iram_info.buf_bit_use,
> +				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
> +		coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
> +				CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
> +		coda_write(dev, ctx->iram_info.buf_dbk_y_use,
> +				CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
> +		coda_write(dev, ctx->iram_info.buf_dbk_c_use,
> +				CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
> +		coda_write(dev, ctx->iram_info.buf_ovl_use,
> +				CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
> +	}
> +
> +	if (src_fourcc == V4L2_PIX_FMT_H264) {
> +		coda_write(dev, ctx->slicebuf.paddr,
> +				CODA_CMD_SET_FRAME_SLICE_BB_START);
> +		coda_write(dev, ctx->slicebuf.size / 1024,
> +				CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
> +	}
> +
> +	if (dev->devtype->product == CODA_7541) {
> +		int max_mb_x = 1920 / 16;
> +		int max_mb_y = 1088 / 16;
> +		int max_mb_num = max_mb_x * max_mb_y;
> +		coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
> +				CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
> +	}
> +
> +	if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
> +		v4l2_err(&ctx->dev->v4l2_dev,
> +			 "CODA_COMMAND_SET_FRAME_BUF timeout\n");
> +		return -ETIMEDOUT;
> +	}
> +
> +	return 0;
> +}
> +
>  static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer
> *buf,
>  			      int header_code, u8 *header, int *size)  { @@ -
> 1439,26 +1892,36 @@ static int coda_start_streaming(struct vb2_queue *q,
> unsigned int count)
>  	u32 value;
>  	int ret = 0;
> 
> -	if (count < 1)
> -		return -EINVAL;
> +	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> +		if (q_data_src->fourcc == V4L2_PIX_FMT_H264) {
> +			if (coda_get_bitstream_payload(ctx) < 512)
> +				return -EINVAL;
> +		} else {
> +			if (count < 1)
> +				return -EINVAL;
> +		}
> 
> -	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
>  		ctx->streamon_out = 1;
> -	else
> -		ctx->streamon_cap = 1;
> 
> -	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> -	if (ctx->streamon_out) {
>  		if (coda_format_is_yuv(q_data_src->fourcc))
>  			ctx->inst_type = CODA_INST_ENCODER;
>  		else
>  			ctx->inst_type = CODA_INST_DECODER;
> +	} else {
> +		if (count < 1)
> +			return -EINVAL;
> +
> +		ctx->streamon_cap = 1;
>  	}
> 
>  	/* Don't start the coda unless both queues are on */
>  	if (!(ctx->streamon_out & ctx->streamon_cap))
>  		return 0;
> 
> +	/* Allow device_run with no buffers queued and after streamoff */
> +	v4l2_m2m_set_src_buffered(ctx->m2m_ctx, true);
> +
>  	ctx->gopcounter = ctx->params.gop_size - 1;
>  	buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
>  	bitstream_buf = vb2_dma_contig_plane_dma_addr(buf, 0); @@ -1478,6
> +1941,20 @@ static int coda_start_streaming(struct vb2_queue *q,
> unsigned int count)
>  	if (ret < 0)
>  		return ret;
> 
> +	if (ctx->inst_type == CODA_INST_DECODER) {
> +		mutex_lock(&dev->coda_mutex);
> +		ret = coda_start_decoding(ctx);
> +		mutex_unlock(&dev->coda_mutex);
> +		if (ret == -EAGAIN) {
> +			return 0;
> +		} else if (ret < 0) {
> +			return ret;
> +		} else {
> +			ctx->initialized = 1;
> +			return 0;
> +		}
> +	}
> +
>  	if (!coda_is_initialized(dev)) {
>  		v4l2_err(v4l2_dev, "coda is not initialized.\n");
>  		return -EFAULT;
> @@ -1619,6 +2096,9 @@ static int coda_start_streaming(struct vb2_queue
> *q, unsigned int count)
> 
>  	coda_write(dev, ctx->num_internal_frames,
> CODA_CMD_SET_FRAME_BUF_NUM);
>  	coda_write(dev, round_up(q_data_src->width, 8),
> CODA_CMD_SET_FRAME_BUF_STRIDE);
> +	if (dev->devtype->product == CODA_7541)
> +		coda_write(dev, round_up(q_data_src->width, 8),
> +				CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
>  	if (dev->devtype->product != CODA_DX6) {
>  		coda_write(dev, ctx->iram_info.buf_bit_use,
>  				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
> @@ -1710,32 +2190,26 @@ static int coda_stop_streaming(struct vb2_queue
> *q)
>  	struct coda_dev *dev = ctx->dev;
> 
>  	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> -		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
>  			 "%s: output\n", __func__);
>  		ctx->streamon_out = 0;
> +
> +		ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
> +
> +		ctx->isequence = 0;
>  	} else {
> -		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
>  			 "%s: capture\n", __func__);
>  		ctx->streamon_cap = 0;
> -	}
> -
> -	/* Don't stop the coda unless both queues are off */
> -	if (ctx->streamon_out || ctx->streamon_cap)
> -		return 0;
> 
> -	cancel_delayed_work(&dev->timeout);
> -
> -	mutex_lock(&dev->coda_mutex);
> -	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> -		 "%s: sent command 'SEQ_END' to coda\n", __func__);
> -	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
> -		v4l2_err(&dev->v4l2_dev,
> -			 "CODA_COMMAND_SEQ_END failed\n");
> -		return -ETIMEDOUT;
> +		ctx->osequence = 0;
>  	}
> -	mutex_unlock(&dev->coda_mutex);
> 
> -	coda_free_framebuffers(ctx);
> +	if (!ctx->streamon_out && !ctx->streamon_cap) {
> +		kfifo_init(&ctx->bitstream_fifo,
> +			ctx->bitstream.vaddr, ctx->bitstream.size);
> +		ctx->runcounter = 0;
> +	}
> 
>  	return 0;
>  }
> @@ -1895,7 +2369,7 @@ static int coda_open(struct file *file)  {
>  	struct coda_dev *dev = video_drvdata(file);
>  	struct coda_ctx *ctx = NULL;
> -	int ret = 0;
> +	int ret;
>  	int idx;
> 
>  	idx = coda_next_free_instance(dev);
> @@ -1907,6 +2381,7 @@ static int coda_open(struct file *file)
>  	if (!ctx)
>  		return -ENOMEM;
> 
> +	INIT_WORK(&ctx->skip_run, coda_skip_run);
>  	v4l2_fh_init(&ctx->fh, video_devdata(file));
>  	file->private_data = &ctx->fh;
>  	v4l2_fh_add(&ctx->fh);
> @@ -1954,6 +2429,7 @@ static int coda_open(struct file *file)
>  	kfifo_init(&ctx->bitstream_fifo,
>  		ctx->bitstream.vaddr, ctx->bitstream.size);
>  	mutex_init(&ctx->bitstream_mutex);
> +	mutex_init(&ctx->buffer_mutex);
> 
>  	coda_lock(ctx);
>  	list_add(&ctx->list, &dev->instances); @@ -1982,6 +2458,23 @@
> static int coda_release(struct file *file)
>  	v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "Releasing instance %p\n",
>  		 ctx);
> 
> +	/* If this instance is running, call .job_abort and wait for it
> to end */
> +	v4l2_m2m_ctx_release(ctx->m2m_ctx);
> +
> +	/* In case the instance was not running, we still need to call
> SEQ_END */
> +	mutex_lock(&dev->coda_mutex);
> +	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +		 "%s: sent command 'SEQ_END' to coda\n", __func__);
> +	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
> +		v4l2_err(&dev->v4l2_dev,
> +			 "CODA_COMMAND_SEQ_END failed\n");
> +		mutex_unlock(&dev->coda_mutex);
> +		return -ETIMEDOUT;
> +	}
> +	mutex_unlock(&dev->coda_mutex);
> +
> +	coda_free_framebuffers(ctx);
> +
>  	coda_lock(ctx);
>  	list_del(&ctx->list);
>  	coda_unlock(ctx);
> @@ -2032,7 +2525,159 @@ static const struct v4l2_file_operations
> coda_fops = {
>  	.mmap		= coda_mmap,
>  };
> 
> -static void coda_encode_finish(struct coda_ctx *ctx)
> +static void coda_finish_decode(struct coda_ctx *ctx) {
> +	struct coda_dev *dev = ctx->dev;
> +	struct coda_q_data *q_data_src;
> +	struct coda_q_data *q_data_dst;
> +	struct vb2_buffer *dst_buf;
> +	int width, height;
> +	int decoded_idx;
> +	int display_idx;
> +	u32 src_fourcc;
> +	int success;
> +	u32 val;
> +
> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
> +
> +	/* Update kfifo out pointer from coda bitstream read pointer */
> +	coda_kfifo_sync_from_device(ctx);
> +
> +	/*
> +	 * in stream-end mode, the read pointer can overshoot the write
> pointer
> +	 * by up to 512 bytes
> +	 */
> +	if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
> +		if (coda_get_bitstream_payload(ctx) >= 0x100000 - 512)
> +			kfifo_init(&ctx->bitstream_fifo,
> +				ctx->bitstream.vaddr, ctx->bitstream.size);
> +	}
> +
> +	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	src_fourcc = q_data_src->fourcc;
> +
> +	val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
> +	if (val != 1)
> +		pr_err("DEC_PIC_SUCCESS = %d\n", val);
> +
> +	success = val & 0x1;
> +	if (!success)
> +		v4l2_err(&dev->v4l2_dev, "decode failed\n");
> +
> +	if (src_fourcc == V4L2_PIX_FMT_H264) {
> +		if (val & (1 << 3))
> +			v4l2_err(&dev->v4l2_dev,
> +				 "insufficient PS buffer space (%d bytes)\n",
> +				 ctx->psbuf.size);
> +		if (val & (1 << 2))
> +			v4l2_err(&dev->v4l2_dev,
> +				 "insufficient slice buffer space (%d bytes)\n",
> +				 ctx->slicebuf.size);
> +	}
> +
> +	val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
> +	width = (val >> 16) & 0xffff;
> +	height = val & 0xffff;
> +
> +	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +
> +	val = coda_read(dev, CODA_RET_DEC_PIC_TYPE);
> +	if ((val & 0x7) == 0) {
> +		dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME;
> +		dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_PFRAME;
> +	} else {
> +		dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_PFRAME;
> +		dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
> +	}
> +
> +	val = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
> +	if (val > 0)
> +		v4l2_err(&dev->v4l2_dev,
> +			 "errors in %d macroblocks\n", val);
> +
> +	if (dev->devtype->product == CODA_7541) {
> +		val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
> +		if (val == 0) {
> +			/* not enough bitstream data */
> +			v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +				 "prescan failed: %d\n", val);
> +			ctx->prescan_failed = true;
> +			return;
> +		}
> +	}
> +
> +	ctx->frm_dis_flg = coda_read(dev,
> +CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
> +
> +	/*
> +	 * The previous display frame was copied out by the rotator,
> +	 * now it can be overwritten again
> +	 */
> +	if (ctx->display_idx >= 0 &&
> +	    ctx->display_idx < ctx->num_internal_frames) {
> +		ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
> +		coda_write(dev, ctx->frm_dis_flg,
> +				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
> +	}
> +
> +	/*
> +	 * The index of the last decoded frame, not necessarily in
> +	 * display order, and the index of the next display frame.
> +	 * The latter could have been decoded in a previous run.
> +	 */
> +	decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
> +	display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
> +
> +	if (decoded_idx == -1) {
> +		/* no frame was decoded, but we might have a display frame
> */
> +		if (display_idx < 0 && ctx->display_idx < 0)
> +			ctx->prescan_failed = true;
> +	} else if (decoded_idx == -2) {
> +		/* no frame was decoded, we still return the remaining
> buffers */
> +	} else if (decoded_idx < 0 || decoded_idx >= ctx-
> >num_internal_frames) {
> +		v4l2_err(&dev->v4l2_dev,
> +			 "decoded frame index out of range: %d\n",
> decoded_idx);
> +	}
> +
> +	if (display_idx == -1) {
> +		/*
> +		 * no more frames to be decoded, but there could still
> +		 * be rotator output to dequeue
> +		 */
> +		ctx->prescan_failed = true;
> +	} else if (display_idx == -3) {
> +		/* possibly prescan failure */
> +	} else if (display_idx < 0 || display_idx >= ctx-
> >num_internal_frames) {
> +		v4l2_err(&dev->v4l2_dev,
> +			 "presentation frame index out of range: %d\n",
> +			 display_idx);
> +	}
> +
> +	/* If a frame was copied out, return it */
> +	if (ctx->display_idx >= 0 &&
> +	    ctx->display_idx < ctx->num_internal_frames) {
> +		dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
> +		dst_buf->v4l2_buf.sequence = ctx->osequence++;
> +
> +		vb2_set_plane_payload(dst_buf, 0, width * height * 3 / 2);
> +
> +		v4l2_m2m_buf_done(dst_buf, success ? VB2_BUF_STATE_DONE :
> +						     VB2_BUF_STATE_ERROR);
> +
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			"job finished: decoding frame (%d) (%s)\n",
> +			dst_buf->v4l2_buf.sequence,
> +			(dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ?
> +			"KEYFRAME" : "PFRAME");
> +	} else {
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			"job finished: no frame decoded\n");
> +	}
> +
> +	/* The rotator will copy the current display frame next time */
> +	ctx->display_idx = display_idx;
> +}
> +
> +static void coda_finish_encode(struct coda_ctx *ctx)
>  {
>  	struct vb2_buffer *src_buf, *dst_buf;
>  	struct coda_dev *dev = ctx->dev;
> @@ -2109,8 +2754,7 @@ static irqreturn_t coda_irq_handler(int irq, void
> *data)
>  	if (ctx->aborting) {
>  		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
>  			 "task has been aborted\n");
> -		mutex_unlock(&dev->coda_mutex);
> -		return IRQ_HANDLED;
> +		goto out;
>  	}
> 
>  	if (coda_isbusy(ctx->dev)) {
> @@ -2119,9 +2763,29 @@ static irqreturn_t coda_irq_handler(int irq,
> void *data)
>  		return IRQ_NONE;
>  	}
> 
> -	coda_encode_finish(ctx);
> +	if (ctx->inst_type == CODA_INST_DECODER)
> +		coda_finish_decode(ctx);
> +	else
> +		coda_finish_encode(ctx);
> +
> +out:
> +	if (ctx->aborting || (!ctx->streamon_cap && !ctx->streamon_out))
> {
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			 "%s: sent command 'SEQ_END' to coda\n", __func__);
> +		if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
> +			v4l2_err(&dev->v4l2_dev,
> +				 "CODA_COMMAND_SEQ_END failed\n");
> +		}
> +
> +		kfifo_init(&ctx->bitstream_fifo,
> +			ctx->bitstream.vaddr, ctx->bitstream.size);
> +
> +		coda_free_framebuffers(ctx);
> +		coda_free_context_buffers(ctx);
> +	}
> 
>  	mutex_unlock(&dev->coda_mutex);
> +	mutex_unlock(&ctx->buffer_mutex);
> 
>  	v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->m2m_ctx);
> 
> @@ -2138,6 +2802,8 @@ static void coda_timeout(struct work_struct *work)
> 
>  	mutex_lock(&dev->dev_mutex);
>  	list_for_each_entry(ctx, &dev->instances, list) {
> +		if (mutex_is_locked(&ctx->buffer_mutex))
> +			mutex_unlock(&ctx->buffer_mutex);
>  		v4l2_m2m_streamoff(NULL, ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_OUTPUT);
>  		v4l2_m2m_streamoff(NULL, ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_CAPTURE);
>  	}
> @@ -2218,6 +2884,7 @@ static int coda_hw_init(struct coda_dev *dev)
>  	if (dev->devtype->product == CODA_7541) {
>  		coda_write(dev, dev->tempbuf.paddr,
>  				CODA_REG_BIT_TEMP_BUF_ADDR);
> +		coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
>  	} else {
>  		coda_write(dev, dev->workbuf.paddr,
>  			      CODA_REG_BIT_WORK_BUF_ADDR);
> @@ -2462,8 +3129,8 @@ static int coda_probe(struct platform_device
> *pdev)
>  		return -ENOENT;
>  	}
> 
> -	if (devm_request_irq(&pdev->dev, irq, coda_irq_handler,
> -		0, CODA_NAME, dev) < 0) {
> +	if (devm_request_threaded_irq(&pdev->dev, irq, NULL,
> coda_irq_handler,
> +		IRQF_ONESHOT, CODA_NAME, dev) < 0) {
>  		dev_err(&pdev->dev, "failed to request irq\n");
>  		return -ENOENT;
>  	}
> @@ -2521,10 +3188,14 @@ static int coda_probe(struct platform_device
> *pdev)
>  		}
>  	}
> 
> -	if (dev->devtype->product == CODA_DX6)
> +	switch (dev->devtype->product) {
> +	case CODA_DX6:
>  		dev->iram_size = CODADX6_IRAM_SIZE;
> -	else
> +		break;
> +	case CODA_7541:
>  		dev->iram_size = CODA7_IRAM_SIZE;
> +		break;
> +	}
>  	dev->iram_vaddr = gen_pool_alloc(dev->iram_pool, dev->iram_size);
>  	if (!dev->iram_vaddr) {
>  		dev_err(&pdev->dev, "unable to alloc iram\n"); diff --git
> a/drivers/media/platform/coda.h b/drivers/media/platform/coda.h index
> 140eea5..4e32e2e 100644
> --- a/drivers/media/platform/coda.h
> +++ b/drivers/media/platform/coda.h
> @@ -49,6 +49,7 @@
>  #define CODA_REG_BIT_TEMP_BUF_ADDR		0x118
>  #define CODA_REG_BIT_RD_PTR(x)			(0x120 + 8 * (x))
>  #define CODA_REG_BIT_WR_PTR(x)			(0x124 + 8 * (x))
> +#define CODA_REG_BIT_FRM_DIS_FLG(x)		(0x150 + 4 * (x))
>  #define CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR	0x140
>  #define CODA7_REG_BIT_AXI_SRAM_USE		0x140
>  #define		CODA7_USE_HOST_ME_ENABLE	(1 << 11)
> @@ -95,6 +96,7 @@
>  #define 	CODA_MODE_INVALID		0xffff
>  #define CODA_REG_BIT_INT_ENABLE		0x170
>  #define		CODA_INT_INTERRUPT_ENABLE	(1 << 3)
> +#define CODA_REG_BIT_INT_REASON			0x174
>  #define CODA7_REG_BIT_RUN_AUX_STD		0x178
>  #define		CODA_MP4_AUX_MPEG4		0
>  #define		CODA_MP4_AUX_DIVX3		1
> @@ -111,15 +113,89 @@
>   * issued.
>   */
> 
> +/* Decoder Sequence Initialization */
> +#define CODA_CMD_DEC_SEQ_BB_START		0x180
> +#define CODA_CMD_DEC_SEQ_BB_SIZE		0x184
> +#define CODA_CMD_DEC_SEQ_OPTION			0x188
> +#define		CODA_REORDER_ENABLE			(1 << 1)
> +#define		CODADX6_QP_REPORT			(1 << 0)
> +#define		CODA7_MP4_DEBLK_ENABLE			(1 << 0)
> +#define CODA_CMD_DEC_SEQ_SRC_SIZE		0x18c
> +#define CODA_CMD_DEC_SEQ_START_BYTE		0x190
> +#define CODA_CMD_DEC_SEQ_PS_BB_START		0x194
> +#define CODA_CMD_DEC_SEQ_PS_BB_SIZE		0x198
> +#define CODA_CMD_DEC_SEQ_MP4_ASP_CLASS		0x19c
> +#define CODA_CMD_DEC_SEQ_X264_MV_EN		0x19c
> +#define CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE		0x1a0
> +
> +#define CODA7_RET_DEC_SEQ_ASPECT		0x1b0
> +#define CODA_RET_DEC_SEQ_SUCCESS		0x1c0
> +#define CODA_RET_DEC_SEQ_SRC_FMT		0x1c4 /* SRC_SIZE on CODA7 */
> +#define CODA_RET_DEC_SEQ_SRC_SIZE		0x1c4
> +#define CODA_RET_DEC_SEQ_SRC_F_RATE		0x1c8
> +#define CODA9_RET_DEC_SEQ_ASPECT		0x1c8
> +#define CODA_RET_DEC_SEQ_FRAME_NEED		0x1cc
> +#define CODA_RET_DEC_SEQ_FRAME_DELAY		0x1d0
> +#define CODA_RET_DEC_SEQ_INFO			0x1d4
> +#define CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT	0x1d8
> +#define CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM	0x1dc
> +#define CODA_RET_DEC_SEQ_NEXT_FRAME_NUM		0x1e0
> +#define CODA_RET_DEC_SEQ_ERR_REASON		0x1e0
> +#define CODA_RET_DEC_SEQ_FRATE_NR		0x1e4
> +#define CODA_RET_DEC_SEQ_FRATE_DR		0x1e8
> +#define CODA_RET_DEC_SEQ_JPG_PARA		0x1e4
> +#define CODA_RET_DEC_SEQ_JPG_THUMB_IND		0x1e8
> +
> +/* Decoder Picture Run */
> +#define CODA_CMD_DEC_PIC_ROT_MODE		0x180
> +#define CODA_CMD_DEC_PIC_ROT_ADDR_Y		0x184
> +#define CODA_CMD_DEC_PIC_ROT_ADDR_CB		0x188
> +#define CODA_CMD_DEC_PIC_ROT_ADDR_CR		0x18c
> +#define CODA_CMD_DEC_PIC_ROT_STRIDE		0x190
> +
> +#define CODA_CMD_DEC_PIC_OPTION			0x194
> +#define		CODA_PRE_SCAN_EN			(1 << 0)
> +#define		CODA_PRE_SCAN_MODE_DECODE		(0 << 1)
> +#define		CODA_PRE_SCAN_MODE_RETURN		(1 << 1)
> +#define		CODA_IFRAME_SEARCH_EN			(1 << 2)
> +#define		CODA_SKIP_FRAME_MODE			(0x3 << 3)
> +#define CODA_CMD_DEC_PIC_SKIP_NUM		0x198
> +#define CODA_CMD_DEC_PIC_CHUNK_SIZE		0x19c
> +#define CODA_CMD_DEC_PIC_BB_START		0x1a0
> +#define CODA_CMD_DEC_PIC_START_BYTE		0x1a4
> +#define CODA_RET_DEC_PIC_SIZE			0x1bc
> +#define CODA_RET_DEC_PIC_FRAME_NUM		0x1c0
> +#define CODA_RET_DEC_PIC_FRAME_IDX		0x1c4
> +#define CODA_RET_DEC_PIC_ERR_MB			0x1c8
> +#define CODA_RET_DEC_PIC_TYPE			0x1cc
> +#define		CODA_PIC_TYPE_MASK			0x7
> +#define		CODA_PIC_TYPE_MASK_VC1			0x3f
> +#define		CODA9_PIC_TYPE_FIRST_MASK		(0x7 << 3)
> +#define		CODA9_PIC_TYPE_IDR_MASK			(0x3 << 6)
> +#define		CODA7_PIC_TYPE_H264_NPF_MASK		(0x3 << 16)
> +#define		CODA7_PIC_TYPE_INTERLACED		(1 << 18)
> +#define CODA_RET_DEC_PIC_POST			0x1d0
> +#define CODA_RET_DEC_PIC_MVC_REPORT		0x1d0
> +#define CODA_RET_DEC_PIC_OPTION			0x1d4
> +#define CODA_RET_DEC_PIC_SUCCESS		0x1d8
> +#define CODA_RET_DEC_PIC_CUR_IDX		0x1dc
> +#define CODA_RET_DEC_PIC_CROP_LEFT_RIGHT	0x1e0
> +#define CODA_RET_DEC_PIC_CROP_TOP_BOTTOM	0x1e4
> +#define CODA_RET_DEC_PIC_FRAME_NEED		0x1ec
> +
>  /* Encoder Sequence Initialization */
>  #define CODA_CMD_ENC_SEQ_BB_START				0x180
>  #define CODA_CMD_ENC_SEQ_BB_SIZE				0x184
>  #define CODA_CMD_ENC_SEQ_OPTION				0x188
> +#define		CODA7_OPTION_AVCINTRA16X16ONLY_OFFSET		9
>  #define		CODA7_OPTION_GAMMA_OFFSET			8
> +#define		CODA7_OPTION_RCQPMAX_OFFSET			7
>  #define		CODADX6_OPTION_GAMMA_OFFSET			7
> +#define		CODA7_OPTION_RCQPMIN_OFFSET			6
>  #define		CODA_OPTION_LIMITQP_OFFSET			6
>  #define		CODA_OPTION_RCINTRAQP_OFFSET			5
>  #define		CODA_OPTION_FMO_OFFSET				4
> +#define		CODA_OPTION_AVC_AUD_OFFSET			2
>  #define		CODA_OPTION_SLICEREPORT_OFFSET			1
>  #define CODA_CMD_ENC_SEQ_COD_STD				0x18c
>  #define		CODA_STD_MPEG4					0
> @@ -188,8 +264,10 @@
>  #define		CODA_FMOPARAM_TYPE_MASK				1
>  #define		CODA_FMOPARAM_SLICENUM_OFFSET			0
>  #define		CODA_FMOPARAM_SLICENUM_MASK			0x0f
> +#define CODADX6_CMD_ENC_SEQ_INTRA_QP				0x1bc
>  #define CODA7_CMD_ENC_SEQ_SEARCH_BASE				0x1b8
>  #define CODA7_CMD_ENC_SEQ_SEARCH_SIZE				0x1bc
> +#define CODA7_CMD_ENC_SEQ_INTRA_QP				0x1c4
>  #define CODA_CMD_ENC_SEQ_RC_QP_MAX				0x1c8
>  #define		CODA_QPMAX_OFFSET				0
>  #define		CODA_QPMAX_MASK					0x3f
> @@ -216,18 +294,24 @@
>  #define CODA_CMD_ENC_PIC_OPTION	0x194
>  #define CODA_CMD_ENC_PIC_BB_START	0x198
>  #define CODA_CMD_ENC_PIC_BB_SIZE	0x19c
> +#define CODA_RET_ENC_FRAME_NUM		0x1c0
>  #define CODA_RET_ENC_PIC_TYPE		0x1c4
> +#define CODA_RET_ENC_PIC_FRAME_IDX	0x1c8
>  #define CODA_RET_ENC_PIC_SLICE_NUM	0x1cc
>  #define CODA_RET_ENC_PIC_FLAG		0x1d0
> +#define CODA_RET_ENC_PIC_SUCCESS	0x1d8
> 
>  /* Set Frame Buffer */
>  #define CODA_CMD_SET_FRAME_BUF_NUM		0x180
>  #define CODA_CMD_SET_FRAME_BUF_STRIDE		0x184
> +#define CODA_CMD_SET_FRAME_SLICE_BB_START	0x188
> +#define CODA_CMD_SET_FRAME_SLICE_BB_SIZE	0x18c
>  #define CODA7_CMD_SET_FRAME_AXI_BIT_ADDR	0x190
>  #define CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR	0x194
>  #define CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR	0x198
>  #define CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR	0x19c
>  #define CODA7_CMD_SET_FRAME_AXI_OVL_ADDR	0x1a0
> +#define CODA7_CMD_SET_FRAME_MAX_DEC_SIZE	0x1a4
>  #define CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE	0x1a8
> 
>  /* Encoder Header */
> --
> 1.8.3.1


--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Input]     [Video for Linux]     [Gstreamer Embedded]     [Mplayer Users]     [Linux USB Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Yosemite Backpacking]
  Powered by Linux