Re: [PATCH v2 0/6] media: cedrus: h264: Support multi-slice frames

Hans Verkuil <hverkuil-cisco@xxxxxxxxx> · Mon, 7 Oct 2019 12:44:24 +0200

Hi Jernej,

On 9/29/19 10:00 PM, Jernej Skrabec wrote:
> This series adds support for decoding multi-slice H264 frames along with
> support for V4L2_DEC_CMD_FLUSH and V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF.
> 
> Code was tested by modified ffmpeg, which can be found here:
> https://github.com/jernejsk/FFmpeg, branch mainline-test
> It has to be configured with at least following options:
> --enable-v4l2-request --enable-libudev --enable-libdrm
> 
> Samples used for testing:
> http://jernej.libreelec.tv/videos/h264/BA1_FT_C.mp4
> http://jernej.libreelec.tv/videos/h264/h264.mp4
> 
> Command line used for testing:
> ffmpeg -hwaccel drm -hwaccel_device /dev/dri/card0 -i h264.mp4 -pix_fmt bgra -f fbdev /dev/fb0
> 
> Please note that V4L2_DEC_CMD_FLUSH was not tested because I'm
> not sure how. ffmpeg follows exactly which slice is last in frame
> and sets hold flag accordingly. Improper usage of hold flag would
> corrupt ffmpeg assumptions and it would probably crash. Any ideas
> how to test this are welcome!
> 
> Thanks to Jonas for adjusting ffmpeg.
> 
> Please let me know what you think.
> 
> Best regards,
> Jernej
> 
> Changes from v1:
> - added Rb tags
> - updated V4L2_DEC_CMD_FLUSH documentation
> - updated first slice detection in Cedrus
> - hold capture buffer flag is set according to source format
> - added v4l m2m stateless_(try_)decoder_cmd ioctl helpers
> 
> Hans Verkuil (2):
>   vb2: add V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF
>   videodev2.h: add V4L2_DEC_CMD_FLUSH
> 
> Jernej Skrabec (4):
>   media: v4l2-mem2mem: add stateless_(try_)decoder_cmd ioctl helpers
>   media: cedrus: Detect first slice of a frame
>   media: cedrus: h264: Support multiple slices per frame
>   media: cedrus: Add support for holding capture buffer
> 
>  Documentation/media/uapi/v4l/buffer.rst       | 13 ++++++
>  .../media/uapi/v4l/vidioc-decoder-cmd.rst     | 10 +++-
>  .../media/uapi/v4l/vidioc-reqbufs.rst         |  6 +++
>  .../media/videodev2.h.rst.exceptions          |  1 +
>  .../media/common/videobuf2/videobuf2-v4l2.c   |  8 +++-
>  drivers/media/v4l2-core/v4l2-mem2mem.c        | 35 ++++++++++++++
>  drivers/staging/media/sunxi/cedrus/cedrus.h   |  1 +
>  .../staging/media/sunxi/cedrus/cedrus_dec.c   | 11 +++++
>  .../staging/media/sunxi/cedrus/cedrus_h264.c  | 11 ++++-
>  .../staging/media/sunxi/cedrus/cedrus_hw.c    |  8 ++--
>  .../staging/media/sunxi/cedrus/cedrus_video.c | 14 ++++++
>  include/media/v4l2-mem2mem.h                  | 46 +++++++++++++++++++
>  include/media/videobuf2-core.h                |  3 ++
>  include/media/videobuf2-v4l2.h                |  5 ++
>  include/uapi/linux/videodev2.h                | 14 ++++--
>  15 files changed, 175 insertions(+), 11 deletions(-)
> 

I didn't want to make a v3 of this series, instead I hacked this patch that will
hopefully do all the locking right.

Basically I moved all the 'held' related code into v4l2-mem2mem under job_spinlock.
This simplifies the driver code as well.

But this is a hack that sits on top of this series. If your ffmpeg tests are now
successful, then I'll turn this into a proper series with correct documentation
(a lot of the comments are now wrong with this patch, so just ignore that).

Regards,

	Hans

diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index 2677a07e4c9b..f81a8f2465ab 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -412,25 +412,24 @@ static void v4l2_m2m_cancel_job(struct v4l2_m2m_ctx *m2m_ctx)
 	}
 }

-void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
-			 struct v4l2_m2m_ctx *m2m_ctx)
+static bool _v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
+			  struct v4l2_m2m_ctx *m2m_ctx)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
 	if (!m2m_dev->curr_ctx || m2m_dev->curr_ctx != m2m_ctx) {
-		spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
 		dprintk("Called by an instance not currently running\n");
-		return;
+		return false;
 	}

 	list_del(&m2m_dev->curr_ctx->queue);
 	m2m_dev->curr_ctx->job_flags &= ~(TRANS_QUEUED | TRANS_RUNNING);
 	wake_up(&m2m_dev->curr_ctx->finished);
 	m2m_dev->curr_ctx = NULL;
+	return true;
+}

-	spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
-
+static void v4l2_m2m_job_next(struct v4l2_m2m_dev *m2m_dev,
+		       struct v4l2_m2m_ctx *m2m_ctx)
+{
 	/* This instance might have more buffers ready, but since we do not
 	 * allow more than one job on the job_queue per instance, each has
 	 * to be scheduled separately after the previous one finishes. */
@@ -441,8 +440,113 @@ void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
 	 */
 	schedule_work(&m2m_dev->job_work);
 }
+
+void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
+			 struct v4l2_m2m_ctx *m2m_ctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
+	if (!_v4l2_m2m_job_finish(m2m_dev, m2m_ctx)) {
+		spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+
+	v4l2_m2m_job_next(m2m_dev, m2m_ctx);
+}
 EXPORT_SYMBOL(v4l2_m2m_job_finish);

+void v4l2_m2m_job_finish_held(struct v4l2_m2m_dev *m2m_dev,
+			 struct v4l2_m2m_ctx *m2m_ctx,
+			 enum vb2_buffer_state state)
+{
+	struct vb2_v4l2_buffer *src_buf, *dst_buf;
+	unsigned long flags;
+
+	spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
+	src_buf = v4l2_m2m_src_buf_remove(m2m_ctx);
+	dst_buf = v4l2_m2m_next_dst_buf(m2m_ctx);
+
+	if (!src_buf || !dst_buf) {
+		pr_err("Missing source and/or destination buffers\n");
+		spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+		return;
+	}
+	v4l2_m2m_buf_done(src_buf, state);
+	if (!dst_buf->is_held) {
+		v4l2_m2m_dst_buf_remove(m2m_ctx);
+		v4l2_m2m_buf_done(dst_buf, state);
+	}
+	if (!_v4l2_m2m_job_finish(m2m_dev, m2m_ctx)) {
+		spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+
+	v4l2_m2m_job_next(m2m_dev, m2m_ctx);
+}
+EXPORT_SYMBOL(v4l2_m2m_job_finish_held);
+
+/**
+ * v4l2_m2m_release_capture_buf() - check if the capture buffer should be
+ * released
+ *
+ * @out_vb: the output buffer
+ * @cap_vb: the capture buffer
+ *
+ * This helper function returns true if the current capture buffer should
+ * be released to vb2. This is the case if the output buffer specified that
+ * the capture buffer should be held (i.e. not returned to vb2) AND if the
+ * timestamp of the capture buffer differs from the output buffer timestamp.
+ *
+ * This helper is to be called at the start of the device_run callback:
+ *
+ * .. code-block:: c
+ *
+ *	if (v4l2_m2m_release_capture_buf(out_vb, cap_vb)) {
+ *		v4l2_m2m_dst_buf_remove(m2m_ctx);
+ *		v4l2_m2m_buf_done(cap_vb, VB2_BUF_STATE_DONE);
+ *		cap_vb = v4l2_m2m_next_dst_buf(m2m_ctx);
+ *	}
+ *	cap_vb->is_held = out_vb->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
+ *
+ *	...
+ *
+ *	v4l2_m2m_buf_done(out_vb, VB2_BUF_STATE_DONE);
+ *	if (!cap_vb->is_held) {
+ *		v4l2_m2m_dst_buf_remove(m2m_ctx);
+ *		v4l2_m2m_buf_done(cap_vb, VB2_BUF_STATE_DONE);
+ *	}
+ *
+ * This allows for multiple output buffers to be used to fill in a single
+ * capture buffer. This is typically used by stateless decoders where
+ * multiple e.g. H.264 slices contribute to a single decoded frame.
+ */
+struct vb2_v4l2_buffer *v4l2_m2m_release_capture_buf(struct v4l2_m2m_ctx *m2m_ctx)
+{
+	struct v4l2_m2m_dev *m2m_dev = m2m_ctx->m2m_dev;
+	struct vb2_v4l2_buffer *src, *dst;
+	unsigned long flags;
+
+	spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
+	src = v4l2_m2m_next_src_buf(m2m_ctx);
+	dst = v4l2_m2m_next_dst_buf(m2m_ctx);
+
+	if (dst->is_held && dst->vb2_buf.copied_timestamp &&
+	    src->vb2_buf.timestamp != dst->vb2_buf.timestamp) {
+		dst->is_held = false;
+		v4l2_m2m_dst_buf_remove(m2m_ctx);
+		v4l2_m2m_buf_done(dst, VB2_BUF_STATE_DONE);
+		dst = v4l2_m2m_next_dst_buf(m2m_ctx);
+	}
+	dst->is_held = src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
+	src->flags &= ~V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
+	spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
+	return dst;
+}
+EXPORT_SYMBOL(v4l2_m2m_release_capture_buf);
+
 int v4l2_m2m_reqbufs(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
 		     struct v4l2_requestbuffers *reqbufs)
 {
@@ -1171,19 +1275,28 @@ int v4l2_m2m_ioctl_stateless_decoder_cmd(struct file *file, void *priv,
 {
 	struct v4l2_fh *fh = file->private_data;
 	struct vb2_v4l2_buffer *out_vb, *cap_vb;
+	struct v4l2_m2m_dev *m2m_dev = fh->m2m_ctx->m2m_dev;
+	unsigned long flags;
 	int ret;

 	ret = v4l2_m2m_ioctl_stateless_try_decoder_cmd(file, priv, dc);
 	if (ret < 0)
 		return ret;

+	spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
 	out_vb = v4l2_m2m_last_src_buf(fh->m2m_ctx);
 	cap_vb = v4l2_m2m_last_dst_buf(fh->m2m_ctx);

-	if (out_vb)
+	if (out_vb && (out_vb->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF)) {
 		out_vb->flags &= ~V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
-	else if (cap_vb && cap_vb->is_held)
-		v4l2_m2m_buf_done(cap_vb, VB2_BUF_STATE_DONE);
+	} else if (cap_vb && cap_vb->is_held) {
+		cap_vb->is_held = false;
+		if (m2m_dev->curr_ctx) {
+			v4l2_m2m_dst_buf_remove(fh->m2m_ctx);
+			v4l2_m2m_buf_done(cap_vb, VB2_BUF_STATE_DONE);
+		}
+	}
+	spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);

 	return 0;
 }
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index 67f7d4326fc1..4e30f263b427 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -30,14 +30,7 @@ void cedrus_device_run(void *priv)
 	struct media_request *src_req;

 	run.src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
-	run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
-
-	if (v4l2_m2m_release_capture_buf(run.src, run.dst)) {
-		v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
-		v4l2_m2m_buf_done(run.dst, VB2_BUF_STATE_DONE);
-		run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
-	}
-	run.dst->is_held = run.src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
+	run.dst = v4l2_m2m_release_capture_buf(ctx->fh.m2m_ctx);

 	run.first_slice = !run.dst->vb2_buf.copied_timestamp ||
 		run.src->vb2_buf.timestamp != run.dst->vb2_buf.timestamp;
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
index 99fedec80224..242cad82cc8c 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
@@ -103,7 +103,6 @@ static irqreturn_t cedrus_irq(int irq, void *data)
 {
 	struct cedrus_dev *dev = data;
 	struct cedrus_ctx *ctx;
-	struct vb2_v4l2_buffer *src_buf, *dst_buf;
 	enum vb2_buffer_state state;
 	enum cedrus_irq_status status;

@@ -121,26 +120,12 @@ static irqreturn_t cedrus_irq(int irq, void *data)
 	dev->dec_ops[ctx->current_codec]->irq_disable(ctx);
 	dev->dec_ops[ctx->current_codec]->irq_clear(ctx);

-	src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
-	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
-
-	if (!src_buf || !dst_buf) {
-		v4l2_err(&dev->v4l2_dev,
-			 "Missing source and/or destination buffers\n");
-		return IRQ_HANDLED;
-	}
-
 	if (status == CEDRUS_IRQ_ERROR)
 		state = VB2_BUF_STATE_ERROR;
 	else
 		state = VB2_BUF_STATE_DONE;

-	v4l2_m2m_buf_done(src_buf, state);
-	if (!dst_buf->is_held) {
-		v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
-		v4l2_m2m_buf_done(dst_buf, state);
-	}
-	v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
+	v4l2_m2m_job_finish_held(ctx->dev->m2m_dev, ctx->fh.m2m_ctx, state);

 	return IRQ_HANDLED;
 }
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index 8ae2f56c7fa3..48ca7d3eaa3d 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -173,6 +173,10 @@ void v4l2_m2m_try_schedule(struct v4l2_m2m_ctx *m2m_ctx);
 void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
 			 struct v4l2_m2m_ctx *m2m_ctx);

+void v4l2_m2m_job_finish_held(struct v4l2_m2m_dev *m2m_dev,
+			 struct v4l2_m2m_ctx *m2m_ctx,
+			 enum vb2_buffer_state state);
+
 static inline void
 v4l2_m2m_buf_done(struct vb2_v4l2_buffer *buf, enum vb2_buffer_state state)
 {
@@ -644,47 +648,7 @@ void v4l2_m2m_buf_copy_metadata(const struct vb2_v4l2_buffer *out_vb,
 				struct vb2_v4l2_buffer *cap_vb,
 				bool copy_frame_flags);

-/**
- * v4l2_m2m_release_capture_buf() - check if the capture buffer should be
- * released
- *
- * @out_vb: the output buffer
- * @cap_vb: the capture buffer
- *
- * This helper function returns true if the current capture buffer should
- * be released to vb2. This is the case if the output buffer specified that
- * the capture buffer should be held (i.e. not returned to vb2) AND if the
- * timestamp of the capture buffer differs from the output buffer timestamp.
- *
- * This helper is to be called at the start of the device_run callback:
- *
- * .. code-block:: c
- *
- *	if (v4l2_m2m_release_capture_buf(out_vb, cap_vb)) {
- *		v4l2_m2m_dst_buf_remove(m2m_ctx);
- *		v4l2_m2m_buf_done(cap_vb, VB2_BUF_STATE_DONE);
- *		cap_vb = v4l2_m2m_next_dst_buf(m2m_ctx);
- *	}
- *	cap_vb->is_held = out_vb->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
- *
- *	...
- *
- *	v4l2_m2m_buf_done(out_vb, VB2_BUF_STATE_DONE);
- *	if (!cap_vb->is_held) {
- *		v4l2_m2m_dst_buf_remove(m2m_ctx);
- *		v4l2_m2m_buf_done(cap_vb, VB2_BUF_STATE_DONE);
- *	}
- *
- * This allows for multiple output buffers to be used to fill in a single
- * capture buffer. This is typically used by stateless decoders where
- * multiple e.g. H.264 slices contribute to a single decoded frame.
- */
-static inline bool v4l2_m2m_release_capture_buf(const struct vb2_v4l2_buffer *out_vb,
-						const struct vb2_v4l2_buffer *cap_vb)
-{
-	return cap_vb->is_held && cap_vb->vb2_buf.copied_timestamp &&
-	       out_vb->vb2_buf.timestamp != cap_vb->vb2_buf.timestamp;
-}
+struct vb2_v4l2_buffer *v4l2_m2m_release_capture_buf(struct v4l2_m2m_ctx *m2m_ctx);

 /* v4l2 request helper */