When using VB2_DMABUF, the relationship between dma-buf and v4l2 buffer may not one-to-one, a single dma-buf may be queued via different v4l2 buffers, and different dma-bufs may be queued via the same v4l2 buffer, so it's not appropriate to use the v4l2 buffer index as the frame store id. We can generate a frame store id according to the dma address. Then for a given dma-buf, the id is fixed. Driver now manages the frame store and vb2-buffer states independently. When a dmabuf is queued via another v4l2 buffer before the buffer is released by firmware, need to pend it until firmware release it. Signed-off-by: Ming Qian <ming.qian@xxxxxxxxxxx> --- v5 - Avoid dynamic size calculation in memory allocation, use kmalloc_array instead v4 - remove unnecessary 'out of memory' message v3 -- fix a typo in NULL pointer check v2 -- fix an uninitialized issue reported by media-ci drivers/media/platform/amphion/vdec.c | 235 +++++++++++++++++----- drivers/media/platform/amphion/vpu.h | 7 +- drivers/media/platform/amphion/vpu_dbg.c | 15 +- drivers/media/platform/amphion/vpu_v4l2.c | 11 + 4 files changed, 220 insertions(+), 48 deletions(-) diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c index b3bc9eb16d6a..f4979d537b97 100644 --- a/drivers/media/platform/amphion/vdec.c +++ b/drivers/media/platform/amphion/vdec.c @@ -26,6 +26,7 @@ #include "vpu_cmds.h" #include "vpu_rpc.h" +#define VDEC_SLOT_CNT_DFT 32 #define VDEC_MIN_BUFFER_CAP 8 #define VDEC_MIN_BUFFER_OUT 8 @@ -41,6 +42,14 @@ struct vdec_fs_info { u32 tag; }; +struct vdec_frame_store_t { + struct vpu_vb2_buffer *curr; + struct vpu_vb2_buffer *pend; + dma_addr_t addr; + unsigned int state; + u32 tag; +}; + struct vdec_t { u32 seq_hdr_found; struct vpu_buffer udata; @@ -48,7 +57,8 @@ struct vdec_t { struct vpu_dec_codec_info codec_info; enum vpu_codec_state state; - struct vpu_vb2_buffer *slots[VB2_MAX_FRAME]; + struct vdec_frame_store_t *slots; + u32 slot_count; u32 req_frame_count; struct vdec_fs_info mbi; struct vdec_fs_info dcp; @@ -289,6 +299,63 @@ static int vdec_ctrl_init(struct vpu_inst *inst) return 0; } +static void vdec_attach_frame_store(struct vpu_inst *inst, struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); + struct vdec_t *vdec = inst->priv; + struct vdec_frame_store_t *new_slots = NULL; + dma_addr_t addr; + int i; + + addr = vpu_get_vb_phy_addr(vb, 0); + for (i = 0; i < vdec->slot_count; i++) { + if (addr == vdec->slots[i].addr) { + if (vdec->slots[i].curr && vdec->slots[i].curr != vpu_buf) { + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); + vdec->slots[i].pend = vpu_buf; + } else { + vpu_set_buffer_state(vbuf, vdec->slots[i].state); + } + vpu_buf->fs_id = i; + return; + } + } + + for (i = 0; i < vdec->slot_count; i++) { + if (!vdec->slots[i].addr) { + vdec->slots[i].addr = addr; + vpu_buf->fs_id = i; + return; + } + } + + new_slots = krealloc_array(vdec->slots, vdec->slot_count * 2, + sizeof(*vdec->slots), + GFP_KERNEL | __GFP_ZERO); + if (!new_slots) { + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR); + return; + } + + vdec->slots = new_slots; + vdec->slot_count *= 2; + + vdec->slots[i].addr = addr; + vpu_buf->fs_id = i; +} + +static void vdec_reset_frame_store(struct vpu_inst *inst) +{ + struct vdec_t *vdec = inst->priv; + + if (!vdec->slots || !vdec->slot_count) + return; + + vpu_trace(inst->dev, "inst[%d] reset slots\n", inst->id); + memset(vdec->slots, 0, sizeof(*vdec->slots) * vdec->slot_count); +} + static void vdec_handle_resolution_change(struct vpu_inst *inst) { struct vdec_t *vdec = inst->priv; @@ -750,11 +817,11 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) struct vb2_v4l2_buffer *src_buf; int ret = 0; - if (!info || info->id >= ARRAY_SIZE(vdec->slots)) + if (!info || info->id >= vdec->slot_count) return -EINVAL; vpu_inst_lock(inst); - vpu_buf = vdec->slots[info->id]; + vpu_buf = vdec->slots[info->id].curr; if (!vpu_buf) { dev_err(inst->dev, "[%d] decoded invalid frame[%d]\n", inst->id, info->id); ret = -EINVAL; @@ -775,11 +842,13 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) dev_info(inst->dev, "[%d] buf[%d] has been decoded\n", inst->id, info->id); vpu_set_buffer_state(vbuf, VPU_BUF_STATE_DECODED); + vdec->slots[info->id].state = VPU_BUF_STATE_DECODED; vdec->decoded_frame_count++; if (vdec->params.display_delay_enable) { struct vpu_format *cur_fmt; cur_fmt = vpu_get_format(inst, inst->cap_format.type); + vdec->slots[info->id].state = VPU_BUF_STATE_READY; vpu_set_buffer_state(vbuf, VPU_BUF_STATE_READY); for (int i = 0; i < vbuf->vb2_buf.num_planes; i++) vb2_set_plane_payload(&vbuf->vb2_buf, @@ -802,11 +871,11 @@ static struct vpu_vb2_buffer *vdec_find_buffer(struct vpu_inst *inst, u32 luma) struct vdec_t *vdec = inst->priv; int i; - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { - if (!vdec->slots[i]) + for (i = 0; i < vdec->slot_count; i++) { + if (!vdec->slots[i].curr) continue; - if (luma == vdec->slots[i]->luma) - return vdec->slots[i]; + if (luma == vdec->slots[i].addr) + return vdec->slots[i].curr; } return NULL; @@ -840,11 +909,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) cur_fmt = vpu_get_format(inst, inst->cap_format.type); vbuf = &vpu_buf->m2m_buf.vb; - if (vbuf->vb2_buf.index != frame->id) - dev_err(inst->dev, "[%d] buffer id(%d, %d) dismatch\n", - inst->id, vbuf->vb2_buf.index, frame->id); + if (vpu_buf->fs_id != frame->id) + dev_err(inst->dev, "[%d] buffer id(%d(%d), %d) dismatch\n", + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index, frame->id); - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_READY && vdec->params.display_delay_enable) + if (vdec->params.display_delay_enable) return; if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_DECODED) @@ -857,10 +926,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) vbuf->sequence = vdec->sequence; dev_dbg(inst->dev, "[%d][OUTPUT TS]%32lld\n", inst->id, vbuf->vb2_buf.timestamp); - v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); vpu_inst_lock(inst); + vdec->slots[vpu_buf->fs_id].state = VPU_BUF_STATE_READY; vdec->display_frame_count++; vpu_inst_unlock(inst); + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); dev_dbg(inst->dev, "[%d] decoded : %d, display : %d, sequence : %d\n", inst->id, vdec->decoded_frame_count, vdec->display_frame_count, vdec->sequence); } @@ -1103,18 +1173,30 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb if (!vbuf) return -EINVAL; - if (vdec->slots[vbuf->vb2_buf.index]) { - dev_err(inst->dev, "[%d] repeat alloc fs %d\n", - inst->id, vbuf->vb2_buf.index); + vpu_buf = to_vpu_vb2_buffer(vbuf); + if (vpu_buf->fs_id < 0 || vpu_buf->fs_id >= vdec->slot_count) { + dev_err(inst->dev, "invalid fs %d for v4l2 buffer %d\n", + vpu_buf->fs_id, vbuf->vb2_buf.index); return -EINVAL; } + if (vdec->slots[vpu_buf->fs_id].curr) { + if (vdec->slots[vpu_buf->fs_id].curr != vpu_buf) { + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); + vdec->slots[vpu_buf->fs_id].pend = vpu_buf; + } else { + vpu_set_buffer_state(vbuf, vdec->slots[vpu_buf->fs_id].state); + } + dev_err(inst->dev, "[%d] repeat alloc fs %d (v4l2 index %d)\n", + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index); + return -EAGAIN; + } + dev_dbg(inst->dev, "[%d] state = %s, alloc fs %d, tag = 0x%x\n", inst->id, vpu_codec_state_name(inst->state), vbuf->vb2_buf.index, vdec->seq_tag); - vpu_buf = to_vpu_vb2_buffer(vbuf); memset(&info, 0, sizeof(info)); - info.id = vbuf->vb2_buf.index; + info.id = vpu_buf->fs_id; info.type = MEM_RES_FRAME; info.tag = vdec->seq_tag; info.luma_addr = vpu_get_vb_phy_addr(&vbuf->vb2_buf, 0); @@ -1129,12 +1211,13 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb if (ret) return ret; - vpu_buf->tag = info.tag; vpu_buf->luma = info.luma_addr; vpu_buf->chroma_u = info.chroma_addr; vpu_buf->chroma_v = 0; vpu_set_buffer_state(vbuf, VPU_BUF_STATE_INUSE); - vdec->slots[info.id] = vpu_buf; + vdec->slots[info.id].tag = info.tag; + vdec->slots[info.id].curr = vpu_buf; + vdec->slots[info.id].state = VPU_BUF_STATE_INUSE; vdec->req_frame_count--; return 0; @@ -1195,25 +1278,47 @@ static void vdec_recycle_buffer(struct vpu_inst *inst, struct vb2_v4l2_buffer *v v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf); } -static void vdec_clear_slots(struct vpu_inst *inst) +static void vdec_release_curr_frame_store(struct vpu_inst *inst, u32 id) { struct vdec_t *vdec = inst->priv; struct vpu_vb2_buffer *vpu_buf; struct vb2_v4l2_buffer *vbuf; + + if (id >= vdec->slot_count) + return; + if (!vdec->slots[id].curr) + return; + + vpu_buf = vdec->slots[id].curr; + vbuf = &vpu_buf->m2m_buf.vb; + + vdec_response_fs_release(inst, id, vdec->slots[id].tag); + if (vpu_buf->fs_id == id) { + if (vpu_buf->state != VPU_BUF_STATE_READY) + vdec_recycle_buffer(inst, vbuf); + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); + } + + vdec->slots[id].curr = NULL; + vdec->slots[id].state = VPU_BUF_STATE_IDLE; + + if (vdec->slots[id].pend) { + vpu_set_buffer_state(&vdec->slots[id].pend->m2m_buf.vb, VPU_BUF_STATE_IDLE); + vdec->slots[id].pend = NULL; + } +} + +static void vdec_clear_slots(struct vpu_inst *inst) +{ + struct vdec_t *vdec = inst->priv; int i; - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { - if (!vdec->slots[i]) + for (i = 0; i < vdec->slot_count; i++) { + if (!vdec->slots[i].curr) continue; - vpu_buf = vdec->slots[i]; - vbuf = &vpu_buf->m2m_buf.vb; - vpu_trace(inst->dev, "clear slot %d\n", i); - vdec_response_fs_release(inst, i, vpu_buf->tag); - vdec_recycle_buffer(inst, vbuf); - vdec->slots[i]->state = VPU_BUF_STATE_IDLE; - vdec->slots[i] = NULL; + vdec_release_curr_frame_store(inst, i); } } @@ -1354,39 +1459,29 @@ static void vdec_event_req_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) static void vdec_evnet_rel_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) { struct vdec_t *vdec = inst->priv; - struct vpu_vb2_buffer *vpu_buf; - struct vb2_v4l2_buffer *vbuf; - if (!fs || fs->id >= ARRAY_SIZE(vdec->slots)) + if (!fs || fs->id >= vdec->slot_count) return; if (fs->type != MEM_RES_FRAME) return; - if (fs->id >= vpu_get_num_buffers(inst, inst->cap_format.type)) { + if (fs->id >= vdec->slot_count) { dev_err(inst->dev, "[%d] invalid fs(%d) to release\n", inst->id, fs->id); return; } vpu_inst_lock(inst); - vpu_buf = vdec->slots[fs->id]; - vdec->slots[fs->id] = NULL; - - if (!vpu_buf) { + if (!vdec->slots[fs->id].curr) { dev_dbg(inst->dev, "[%d] fs[%d] has bee released\n", inst->id, fs->id); goto exit; } - vbuf = &vpu_buf->m2m_buf.vb; - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) { + if (vdec->slots[fs->id].state == VPU_BUF_STATE_DECODED) { dev_dbg(inst->dev, "[%d] frame skip\n", inst->id); vdec->sequence++; } - vdec_response_fs_release(inst, fs->id, vpu_buf->tag); - if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_READY) - vdec_recycle_buffer(inst, vbuf); - - vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); + vdec_release_curr_frame_store(inst, fs->id); vpu_process_capture_buffer(inst); exit: @@ -1582,6 +1677,11 @@ static void vdec_cleanup(struct vpu_inst *inst) return; vdec = inst->priv; + if (vdec) { + kfree(vdec->slots); + vdec->slots = NULL; + vdec->slot_count = 0; + } vfree(vdec); inst->priv = NULL; vfree(inst); @@ -1713,11 +1813,43 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type) return 0; } -static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) +static int vdec_get_slot_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) { struct vdec_t *vdec = inst->priv; + struct vpu_vb2_buffer *vpu_buf; int num = -1; + vpu_inst_lock(inst); + if (i >= vdec->slot_count || !vdec->slots[i].addr) + goto exit; + + vpu_buf = vdec->slots[i].curr; + + num = scnprintf(str, size, "slot[%2d] :", i); + if (vpu_buf) { + num += scnprintf(str + num, size - num, " %2d", + vpu_buf->m2m_buf.vb.vb2_buf.index); + num += scnprintf(str + num, size - num, "; state = %d", vdec->slots[i].state); + } else { + num += scnprintf(str + num, size - num, " -1"); + } + + if (vdec->slots[i].pend) + num += scnprintf(str + num, size - num, "; %d", + vdec->slots[i].pend->m2m_buf.vb.vb2_buf.index); + + num += scnprintf(str + num, size - num, "\n"); +exit: + vpu_inst_unlock(inst); + + return num; +} + +static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) +{ + struct vdec_t *vdec = inst->priv; + int num; + switch (i) { case 0: num = scnprintf(str, size, @@ -1771,6 +1903,7 @@ static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i vdec->codec_info.vui_present); break; default: + num = vdec_get_slot_debug_info(inst, str, size, i - 10); break; } @@ -1794,6 +1927,8 @@ static struct vpu_inst_ops vdec_inst_ops = { .get_debug_info = vdec_get_debug_info, .wait_prepare = vpu_inst_unlock, .wait_finish = vpu_inst_lock, + .attach_frame_store = vdec_attach_frame_store, + .reset_frame_store = vdec_reset_frame_store, }; static void vdec_init(struct file *file) @@ -1834,6 +1969,16 @@ static int vdec_open(struct file *file) return -ENOMEM; } + vdec->slots = kmalloc_array(VDEC_SLOT_CNT_DFT, + sizeof(*vdec->slots), + GFP_KERNEL | __GFP_ZERO); + if (!vdec->slots) { + vfree(vdec); + vfree(inst); + return -ENOMEM; + } + vdec->slot_count = VDEC_SLOT_CNT_DFT; + inst->ops = &vdec_inst_ops; inst->formats = vdec_formats; inst->type = VPU_CORE_TYPE_DEC; diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h index 978971712742..d8100da160d1 100644 --- a/drivers/media/platform/amphion/vpu.h +++ b/drivers/media/platform/amphion/vpu.h @@ -223,6 +223,8 @@ struct vpu_inst_ops { int (*get_debug_info)(struct vpu_inst *inst, char *str, u32 size, u32 i); void (*wait_prepare)(struct vpu_inst *inst); void (*wait_finish)(struct vpu_inst *inst); + void (*attach_frame_store)(struct vpu_inst *inst, struct vb2_buffer *vb); + void (*reset_frame_store)(struct vpu_inst *inst); }; struct vpu_inst { @@ -297,7 +299,8 @@ enum { VPU_BUF_STATE_DECODED, VPU_BUF_STATE_READY, VPU_BUF_STATE_SKIP, - VPU_BUF_STATE_ERROR + VPU_BUF_STATE_ERROR, + VPU_BUF_STATE_CHANGED }; struct vpu_vb2_buffer { @@ -306,8 +309,8 @@ struct vpu_vb2_buffer { dma_addr_t chroma_u; dma_addr_t chroma_v; unsigned int state; - u32 tag; u32 average_qp; + s32 fs_id; }; void vpu_writel(struct vpu_dev *vpu, u32 reg, u32 val); diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c index 940e5bda5fa3..497ae4e8a229 100644 --- a/drivers/media/platform/amphion/vpu_dbg.c +++ b/drivers/media/platform/amphion/vpu_dbg.c @@ -48,6 +48,7 @@ static char *vpu_stat_name[] = { [VPU_BUF_STATE_READY] = "ready", [VPU_BUF_STATE_SKIP] = "skip", [VPU_BUF_STATE_ERROR] = "error", + [VPU_BUF_STATE_CHANGED] = "changed", }; static inline const char *to_vpu_stat_name(int state) @@ -164,6 +165,7 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) for (i = 0; i < vb2_get_num_buffers(vq); i++) { struct vb2_buffer *vb; struct vb2_v4l2_buffer *vbuf; + struct vpu_vb2_buffer *vpu_buf; vb = vb2_get_buffer(vq, i); if (!vb) @@ -173,13 +175,24 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) continue; vbuf = to_vb2_v4l2_buffer(vb); + vpu_buf = to_vpu_vb2_buffer(vbuf); num = scnprintf(str, sizeof(str), - "capture[%2d] state = %10s, %8s\n", + "capture[%2d] state = %10s, %8s", i, vb2_stat_name[vb->state], to_vpu_stat_name(vpu_get_buffer_state(vbuf))); if (seq_write(s, str, num)) return 0; + + if (vpu_buf->fs_id >= 0) { + num = scnprintf(str, sizeof(str), "; fs %d", vpu_buf->fs_id); + if (seq_write(s, str, num)) + return 0; + } + + num = scnprintf(str, sizeof(str), "\n"); + if (seq_write(s, str, num)) + return 0; } num = scnprintf(str, sizeof(str), "sequence = %d\n", inst->sequence); diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c index 50aeb69d2c66..37ef706c29dd 100644 --- a/drivers/media/platform/amphion/vpu_v4l2.c +++ b/drivers/media/platform/amphion/vpu_v4l2.c @@ -500,14 +500,25 @@ static int vpu_vb2_queue_setup(struct vb2_queue *vq, call_void_vop(inst, release); } + if (V4L2_TYPE_IS_CAPTURE(vq->type)) + call_void_vop(inst, reset_frame_store); + return 0; } static int vpu_vb2_buf_init(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); + struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue); + vpu_buf->fs_id = -1; vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); + + if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type)) + return 0; + + call_void_vop(inst, attach_frame_store, vb); return 0; } -- 2.43.0-rc1