From: Ming Qian <ming.qian@xxxxxxx> When using VB2_DMABUF, the relationship between dma-buf and v4l2 buffer may not one-to-one, a single dma-buf may be queued via different v4l2 buffers, and different dma-bufs may be queued via the same v4l2 buffer, so it's not appropriate to use the v4l2 buffer index as the frame store id. We can generate a frame store id according to the dma address. Then for a given dma-buf, the id is fixed. Driver now manages the frame store and vb2-buffer states independently. When a dmabuf is queued via another v4l2 buffer before the buffer is released by firmware, need to pend it until firmware release it. Signed-off-by: Ming Qian <ming.qian@xxxxxxx> --- drivers/media/platform/amphion/vdec.c | 232 ++++++++++++++++++---- drivers/media/platform/amphion/vpu.h | 7 +- drivers/media/platform/amphion/vpu_dbg.c | 14 +- drivers/media/platform/amphion/vpu_v4l2.c | 11 + 4 files changed, 217 insertions(+), 47 deletions(-) diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c index 61d5598ee6a1..a26cb0c264c9 100644 --- a/drivers/media/platform/amphion/vdec.c +++ b/drivers/media/platform/amphion/vdec.c @@ -26,6 +26,7 @@ #include "vpu_cmds.h" #include "vpu_rpc.h" +#define VDEC_SLOT_CNT_DFT 32 #define VDEC_MIN_BUFFER_CAP 8 #define VDEC_MIN_BUFFER_OUT 8 @@ -41,6 +42,14 @@ struct vdec_fs_info { u32 tag; }; +struct vdec_frame_store_t { + struct vpu_vb2_buffer *curr; + struct vpu_vb2_buffer *pend; + dma_addr_t addr; + unsigned int state; + u32 tag; +}; + struct vdec_t { u32 seq_hdr_found; struct vpu_buffer udata; @@ -48,7 +57,8 @@ struct vdec_t { struct vpu_dec_codec_info codec_info; enum vpu_codec_state state; - struct vpu_vb2_buffer *slots[VB2_MAX_FRAME]; + struct vdec_frame_store_t *slots; + u32 slot_count; u32 req_frame_count; struct vdec_fs_info mbi; struct vdec_fs_info dcp; @@ -289,6 +299,64 @@ static int vdec_ctrl_init(struct vpu_inst *inst) return 0; } +static void vdec_attach_frame_store(struct vpu_inst *inst, struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); + struct vdec_t *vdec = inst->priv; + struct vdec_frame_store_t *new_slots = NULL; + dma_addr_t addr; + int i; + + addr = vpu_get_vb_phy_addr(vb, 0); + for (i = 0; i < vdec->slot_count; i++) { + if (addr == vdec->slots[i].addr) { + if (vdec->slots[i].curr && vdec->slots[i].curr != vpu_buf) { + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); + vdec->slots[i].pend = vpu_buf; + } else { + vpu_set_buffer_state(vbuf, vdec->slots[i].state); + } + vpu_buf->fs_id = i; + return; + } + } + + for (i = 0; i < vdec->slot_count; i++) { + if (!vdec->slots[i].addr) { + vdec->slots[i].addr = addr; + vpu_buf->fs_id = i; + return; + } + } + + new_slots = vzalloc(sizeof(*vdec->slots) * vdec->slot_count * 2); + if (!vdec->slots) { + dev_err(inst->dev, "fail to alloc frame store\n"); + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR); + return; + } + + memcpy(new_slots, vdec->slots, sizeof(*vdec->slots) * vdec->slot_count); + vfree(vdec->slots); + vdec->slots = new_slots; + vdec->slot_count *= 2; + + vdec->slots[i].addr = addr; + vpu_buf->fs_id = i; +} + +static void vdec_reset_frame_store(struct vpu_inst *inst) +{ + struct vdec_t *vdec = inst->priv; + + if (!vdec->slots || !vdec->slot_count) + return; + + vpu_trace(inst->dev, "inst[%d] reset slots\n", inst->id); + memset(vdec->slots, 0, sizeof(*vdec->slots) * vdec->slot_count); +} + static void vdec_handle_resolution_change(struct vpu_inst *inst) { struct vdec_t *vdec = inst->priv; @@ -745,11 +813,11 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) struct vb2_v4l2_buffer *src_buf; int ret = 0; - if (!info || info->id >= ARRAY_SIZE(vdec->slots)) + if (!info || info->id >= vdec->slot_count) return -EINVAL; vpu_inst_lock(inst); - vpu_buf = vdec->slots[info->id]; + vpu_buf = vdec->slots[info->id].curr; if (!vpu_buf) { dev_err(inst->dev, "[%d] decoded invalid frame[%d]\n", inst->id, info->id); ret = -EINVAL; @@ -770,11 +838,13 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) dev_info(inst->dev, "[%d] buf[%d] has been decoded\n", inst->id, info->id); vpu_set_buffer_state(vbuf, VPU_BUF_STATE_DECODED); + vdec->slots[info->id].state = VPU_BUF_STATE_DECODED; vdec->decoded_frame_count++; if (vdec->params.display_delay_enable) { struct vpu_format *cur_fmt; cur_fmt = vpu_get_format(inst, inst->cap_format.type); + vdec->slots[info->id].state = VPU_BUF_STATE_READY; vpu_set_buffer_state(vbuf, VPU_BUF_STATE_READY); for (int i = 0; i < vbuf->vb2_buf.num_planes; i++) vb2_set_plane_payload(&vbuf->vb2_buf, @@ -797,11 +867,11 @@ static struct vpu_vb2_buffer *vdec_find_buffer(struct vpu_inst *inst, u32 luma) struct vdec_t *vdec = inst->priv; int i; - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { - if (!vdec->slots[i]) + for (i = 0; i < vdec->slot_count; i++) { + if (!vdec->slots[i].curr) continue; - if (luma == vdec->slots[i]->luma) - return vdec->slots[i]; + if (luma == vdec->slots[i].addr) + return vdec->slots[i].curr; } return NULL; @@ -835,11 +905,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) cur_fmt = vpu_get_format(inst, inst->cap_format.type); vbuf = &vpu_buf->m2m_buf.vb; - if (vbuf->vb2_buf.index != frame->id) - dev_err(inst->dev, "[%d] buffer id(%d, %d) dismatch\n", - inst->id, vbuf->vb2_buf.index, frame->id); + if (vpu_buf->fs_id != frame->id) + dev_err(inst->dev, "[%d] buffer id(%d(%d), %d) dismatch\n", + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index, frame->id); - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_READY && vdec->params.display_delay_enable) + if (vdec->params.display_delay_enable) return; if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_DECODED) @@ -852,10 +922,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) vbuf->sequence = vdec->sequence; dev_dbg(inst->dev, "[%d][OUTPUT TS]%32lld\n", inst->id, vbuf->vb2_buf.timestamp); - v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); vpu_inst_lock(inst); + vdec->slots[vpu_buf->fs_id].state = VPU_BUF_STATE_READY; vdec->display_frame_count++; vpu_inst_unlock(inst); + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); dev_dbg(inst->dev, "[%d] decoded : %d, display : %d, sequence : %d\n", inst->id, vdec->decoded_frame_count, vdec->display_frame_count, vdec->sequence); } @@ -1083,18 +1154,30 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb if (!vbuf) return -EINVAL; - if (vdec->slots[vbuf->vb2_buf.index]) { - dev_err(inst->dev, "[%d] repeat alloc fs %d\n", - inst->id, vbuf->vb2_buf.index); + vpu_buf = to_vpu_vb2_buffer(vbuf); + if (vpu_buf->fs_id < 0 || vpu_buf->fs_id >= vdec->slot_count) { + dev_err(inst->dev, "invalid fs %d for v4l2 buffer %d\n", + vpu_buf->fs_id, vbuf->vb2_buf.index); return -EINVAL; } + if (vdec->slots[vpu_buf->fs_id].curr) { + if (vdec->slots[vpu_buf->fs_id].curr != vpu_buf) { + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); + vdec->slots[vpu_buf->fs_id].pend = vpu_buf; + } else { + vpu_set_buffer_state(vbuf, vdec->slots[vpu_buf->fs_id].state); + } + dev_err(inst->dev, "[%d] repeat alloc fs %d (v4l2 index %d)\n", + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index); + return -EAGAIN; + } + dev_dbg(inst->dev, "[%d] state = %s, alloc fs %d, tag = 0x%x\n", inst->id, vpu_codec_state_name(inst->state), vbuf->vb2_buf.index, vdec->seq_tag); - vpu_buf = to_vpu_vb2_buffer(vbuf); memset(&info, 0, sizeof(info)); - info.id = vbuf->vb2_buf.index; + info.id = vpu_buf->fs_id; info.type = MEM_RES_FRAME; info.tag = vdec->seq_tag; info.luma_addr = vpu_get_vb_phy_addr(&vbuf->vb2_buf, 0); @@ -1109,12 +1192,13 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb if (ret) return ret; - vpu_buf->tag = info.tag; vpu_buf->luma = info.luma_addr; vpu_buf->chroma_u = info.chroma_addr; vpu_buf->chroma_v = 0; vpu_set_buffer_state(vbuf, VPU_BUF_STATE_INUSE); - vdec->slots[info.id] = vpu_buf; + vdec->slots[info.id].tag = info.tag; + vdec->slots[info.id].curr = vpu_buf; + vdec->slots[info.id].state = VPU_BUF_STATE_INUSE; vdec->req_frame_count--; return 0; @@ -1175,25 +1259,47 @@ static void vdec_recycle_buffer(struct vpu_inst *inst, struct vb2_v4l2_buffer *v v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf); } -static void vdec_clear_slots(struct vpu_inst *inst) +static void vdec_release_curr_frame_store(struct vpu_inst *inst, u32 id) { struct vdec_t *vdec = inst->priv; struct vpu_vb2_buffer *vpu_buf; struct vb2_v4l2_buffer *vbuf; + + if (id >= vdec->slot_count) + return; + if (!vdec->slots[id].curr) + return; + + vpu_buf = vdec->slots[id].curr; + vbuf = &vpu_buf->m2m_buf.vb; + + vdec_response_fs_release(inst, id, vdec->slots[id].tag); + if (vpu_buf->fs_id == id) { + if (vpu_buf->state != VPU_BUF_STATE_READY) + vdec_recycle_buffer(inst, vbuf); + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); + } + + vdec->slots[id].curr = NULL; + vdec->slots[id].state = VPU_BUF_STATE_IDLE; + + if (vdec->slots[id].pend) { + vpu_set_buffer_state(&vdec->slots[id].pend->m2m_buf.vb, VPU_BUF_STATE_IDLE); + vdec->slots[id].pend = NULL; + } +} + +static void vdec_clear_slots(struct vpu_inst *inst) +{ + struct vdec_t *vdec = inst->priv; int i; - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { - if (!vdec->slots[i]) + for (i = 0; i < vdec->slot_count; i++) { + if (!vdec->slots[i].curr) continue; - vpu_buf = vdec->slots[i]; - vbuf = &vpu_buf->m2m_buf.vb; - vpu_trace(inst->dev, "clear slot %d\n", i); - vdec_response_fs_release(inst, i, vpu_buf->tag); - vdec_recycle_buffer(inst, vbuf); - vdec->slots[i]->state = VPU_BUF_STATE_IDLE; - vdec->slots[i] = NULL; + vdec_release_curr_frame_store(inst, i); } } @@ -1324,39 +1430,29 @@ static void vdec_event_req_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) static void vdec_evnet_rel_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) { struct vdec_t *vdec = inst->priv; - struct vpu_vb2_buffer *vpu_buf; - struct vb2_v4l2_buffer *vbuf; - if (!fs || fs->id >= ARRAY_SIZE(vdec->slots)) + if (!fs || fs->id >= vdec->slot_count) return; if (fs->type != MEM_RES_FRAME) return; - if (fs->id >= vpu_get_num_buffers(inst, inst->cap_format.type)) { + if (fs->id >= vdec->slot_count) { dev_err(inst->dev, "[%d] invalid fs(%d) to release\n", inst->id, fs->id); return; } vpu_inst_lock(inst); - vpu_buf = vdec->slots[fs->id]; - vdec->slots[fs->id] = NULL; - - if (!vpu_buf) { + if (!vdec->slots[fs->id].curr) { dev_dbg(inst->dev, "[%d] fs[%d] has bee released\n", inst->id, fs->id); goto exit; } - vbuf = &vpu_buf->m2m_buf.vb; - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) { + if (vdec->slots[fs->id].state == VPU_BUF_STATE_DECODED) { dev_dbg(inst->dev, "[%d] frame skip\n", inst->id); vdec->sequence++; } - vdec_response_fs_release(inst, fs->id, vpu_buf->tag); - if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_READY) - vdec_recycle_buffer(inst, vbuf); - - vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); + vdec_release_curr_frame_store(inst, fs->id); vpu_process_capture_buffer(inst); exit: @@ -1552,6 +1648,11 @@ static void vdec_cleanup(struct vpu_inst *inst) return; vdec = inst->priv; + if (vdec) { + vfree(vdec->slots); + vdec->slots = NULL; + vdec->slot_count = 0; + } vfree(vdec); inst->priv = NULL; vfree(inst); @@ -1683,6 +1784,38 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type) return 0; } +static int vdec_get_slot_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) +{ + struct vdec_t *vdec = inst->priv; + struct vpu_vb2_buffer *vpu_buf; + int num = -1; + + vpu_inst_lock(inst); + if (i >= vdec->slot_count || !vdec->slots[i].addr) + goto exit; + + vpu_buf = vdec->slots[i].curr; + + num = scnprintf(str, size, "slot[%2d] :", i); + if (vpu_buf) { + num += scnprintf(str + num, size - num, " %2d", + vpu_buf->m2m_buf.vb.vb2_buf.index); + num += scnprintf(str + num, size - num, "; state = %d", vdec->slots[i].state); + } else { + num += scnprintf(str + num, size - num, " -1"); + } + + if (vdec->slots[i].pend) + num += scnprintf(str + num, size - num, "; %d", + vdec->slots[i].pend->m2m_buf.vb.vb2_buf.index); + + num += scnprintf(str + num, size - num, "\n"); +exit: + vpu_inst_unlock(inst); + + return num; +} + static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) { struct vdec_t *vdec = inst->priv; @@ -1741,6 +1874,7 @@ static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i vdec->codec_info.vui_present); break; default: + num = vdec_get_slot_debug_info(inst, str, size, i - 10); break; } @@ -1764,6 +1898,8 @@ static struct vpu_inst_ops vdec_inst_ops = { .get_debug_info = vdec_get_debug_info, .wait_prepare = vpu_inst_unlock, .wait_finish = vpu_inst_lock, + .attach_frame_store = vdec_attach_frame_store, + .reset_frame_store = vdec_reset_frame_store, }; static void vdec_init(struct file *file) @@ -1804,6 +1940,14 @@ static int vdec_open(struct file *file) return -ENOMEM; } + vdec->slots = vzalloc(sizeof(*vdec->slots) * VDEC_SLOT_CNT_DFT); + if (!vdec->slots) { + vfree(vdec); + vfree(inst); + return -ENOMEM; + } + vdec->slot_count = VDEC_SLOT_CNT_DFT; + inst->ops = &vdec_inst_ops; inst->formats = vdec_formats; inst->type = VPU_CORE_TYPE_DEC; diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h index 22f0da26ccec..76bfd6b26170 100644 --- a/drivers/media/platform/amphion/vpu.h +++ b/drivers/media/platform/amphion/vpu.h @@ -223,6 +223,8 @@ struct vpu_inst_ops { int (*get_debug_info)(struct vpu_inst *inst, char *str, u32 size, u32 i); void (*wait_prepare)(struct vpu_inst *inst); void (*wait_finish)(struct vpu_inst *inst); + void (*attach_frame_store)(struct vpu_inst *inst, struct vb2_buffer *vb); + void (*reset_frame_store)(struct vpu_inst *inst); }; struct vpu_inst { @@ -296,7 +298,8 @@ enum { VPU_BUF_STATE_DECODED, VPU_BUF_STATE_READY, VPU_BUF_STATE_SKIP, - VPU_BUF_STATE_ERROR + VPU_BUF_STATE_ERROR, + VPU_BUF_STATE_CHANGED }; struct vpu_vb2_buffer { @@ -305,8 +308,8 @@ struct vpu_vb2_buffer { dma_addr_t chroma_u; dma_addr_t chroma_v; unsigned int state; - u32 tag; u32 average_qp; + s32 fs_id; }; void vpu_writel(struct vpu_dev *vpu, u32 reg, u32 val); diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c index 940e5bda5fa3..246b7d91d083 100644 --- a/drivers/media/platform/amphion/vpu_dbg.c +++ b/drivers/media/platform/amphion/vpu_dbg.c @@ -48,6 +48,7 @@ static char *vpu_stat_name[] = { [VPU_BUF_STATE_READY] = "ready", [VPU_BUF_STATE_SKIP] = "skip", [VPU_BUF_STATE_ERROR] = "error", + [VPU_BUF_STATE_CHANGED] = "changed", }; static inline const char *to_vpu_stat_name(int state) @@ -164,6 +165,7 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) for (i = 0; i < vb2_get_num_buffers(vq); i++) { struct vb2_buffer *vb; struct vb2_v4l2_buffer *vbuf; + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); vb = vb2_get_buffer(vq, i); if (!vb) @@ -175,11 +177,21 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) vbuf = to_vb2_v4l2_buffer(vb); num = scnprintf(str, sizeof(str), - "capture[%2d] state = %10s, %8s\n", + "capture[%2d] state = %10s, %8s", i, vb2_stat_name[vb->state], to_vpu_stat_name(vpu_get_buffer_state(vbuf))); if (seq_write(s, str, num)) return 0; + + if (vpu_buf->fs_id >= 0) { + num = scnprintf(str, sizeof(str), "; fs %d", vpu_buf->fs_id); + if (seq_write(s, str, num)) + return 0; + } + + num = scnprintf(str, sizeof(str), "\n"); + if (seq_write(s, str, num)) + return 0; } num = scnprintf(str, sizeof(str), "sequence = %d\n", inst->sequence); diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c index 45707931bc4f..74668fa362e2 100644 --- a/drivers/media/platform/amphion/vpu_v4l2.c +++ b/drivers/media/platform/amphion/vpu_v4l2.c @@ -501,14 +501,25 @@ static int vpu_vb2_queue_setup(struct vb2_queue *vq, call_void_vop(inst, release); } + if (V4L2_TYPE_IS_CAPTURE(vq->type)) + call_void_vop(inst, reset_frame_store); + return 0; } static int vpu_vb2_buf_init(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); + struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue); + vpu_buf->fs_id = -1; vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); + + if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type)) + return 0; + + call_void_vop(inst, attach_frame_store, vb); return 0; } -- 2.43.0-rc1