Le vendredi 13 décembre 2024 à 18:10 +0900, Ming Qian a écrit : > From: Ming Qian <ming.qian@xxxxxxx> > > When using VB2_DMABUF, the relationship between dma-buf and v4l2 buffer > may not one-to-one, a single dma-buf may be queued via different > v4l2 buffers, and different dma-bufs may be queued via the same > v4l2 buffer, so it's not appropriate to use the v4l2 buffer index > as the frame store id. > > We can generate a frame store id according to the dma address. > Then for a given dma-buf, the id is fixed. > > Driver now manages the frame store and vb2-buffer states independently. > > When a dmabuf is queued via another v4l2 buffer before the buffer is > released by firmware, need to pend it until firmware release it. > > Signed-off-by: Ming Qian <ming.qian@xxxxxxx> > --- > v2 > -- fix an uninitialized issue reported by media-ci > > drivers/media/platform/amphion/vdec.c | 232 ++++++++++++++++++---- > drivers/media/platform/amphion/vpu.h | 7 +- > drivers/media/platform/amphion/vpu_dbg.c | 15 +- > drivers/media/platform/amphion/vpu_v4l2.c | 11 + > 4 files changed, 218 insertions(+), 47 deletions(-) > > diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c > index 61d5598ee6a1..a26cb0c264c9 100644 > --- a/drivers/media/platform/amphion/vdec.c > +++ b/drivers/media/platform/amphion/vdec.c > @@ -26,6 +26,7 @@ > #include "vpu_cmds.h" > #include "vpu_rpc.h" > > +#define VDEC_SLOT_CNT_DFT 32 > #define VDEC_MIN_BUFFER_CAP 8 > #define VDEC_MIN_BUFFER_OUT 8 > > @@ -41,6 +42,14 @@ struct vdec_fs_info { > u32 tag; > }; > > +struct vdec_frame_store_t { > + struct vpu_vb2_buffer *curr; > + struct vpu_vb2_buffer *pend; > + dma_addr_t addr; > + unsigned int state; > + u32 tag; > +}; > + > struct vdec_t { > u32 seq_hdr_found; > struct vpu_buffer udata; > @@ -48,7 +57,8 @@ struct vdec_t { > struct vpu_dec_codec_info codec_info; > enum vpu_codec_state state; > > - struct vpu_vb2_buffer *slots[VB2_MAX_FRAME]; > + struct vdec_frame_store_t *slots; > + u32 slot_count; > u32 req_frame_count; > struct vdec_fs_info mbi; > struct vdec_fs_info dcp; > @@ -289,6 +299,64 @@ static int vdec_ctrl_init(struct vpu_inst *inst) > return 0; > } > > +static void vdec_attach_frame_store(struct vpu_inst *inst, struct vb2_buffer *vb) > +{ > + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); > + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); > + struct vdec_t *vdec = inst->priv; > + struct vdec_frame_store_t *new_slots = NULL; > + dma_addr_t addr; > + int i; > + > + addr = vpu_get_vb_phy_addr(vb, 0); > + for (i = 0; i < vdec->slot_count; i++) { > + if (addr == vdec->slots[i].addr) { > + if (vdec->slots[i].curr && vdec->slots[i].curr != vpu_buf) { > + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); > + vdec->slots[i].pend = vpu_buf; > + } else { > + vpu_set_buffer_state(vbuf, vdec->slots[i].state); > + } > + vpu_buf->fs_id = i; > + return; > + } > + } > + > + for (i = 0; i < vdec->slot_count; i++) { > + if (!vdec->slots[i].addr) { > + vdec->slots[i].addr = addr; > + vpu_buf->fs_id = i; > + return; > + } > + } > + > + new_slots = vzalloc(sizeof(*vdec->slots) * vdec->slot_count * 2); To avoid open coding arithmetics (see Documentation/process/deprecated.rst) you may be able to use flex_array_size(vdec, slots, vdec->slot_count * 2) > + if (!vdec->slots) { > + dev_err(inst->dev, "fail to alloc frame store\n"); > + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_ERROR); > + return; > + } > + > + memcpy(new_slots, vdec->slots, sizeof(*vdec->slots) * vdec->slot_count); > + vfree(vdec->slots); > + vdec->slots = new_slots; > + vdec->slot_count *= 2; > + > + vdec->slots[i].addr = addr; > + vpu_buf->fs_id = i; > +} > + > +static void vdec_reset_frame_store(struct vpu_inst *inst) > +{ > + struct vdec_t *vdec = inst->priv; > + > + if (!vdec->slots || !vdec->slot_count) > + return; > + > + vpu_trace(inst->dev, "inst[%d] reset slots\n", inst->id); > + memset(vdec->slots, 0, sizeof(*vdec->slots) * vdec->slot_count); Same here, flex_array_size() would calculate the size for you. > +} > + > static void vdec_handle_resolution_change(struct vpu_inst *inst) > { > struct vdec_t *vdec = inst->priv; > @@ -745,11 +813,11 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) > struct vb2_v4l2_buffer *src_buf; > int ret = 0; > > - if (!info || info->id >= ARRAY_SIZE(vdec->slots)) > + if (!info || info->id >= vdec->slot_count) > return -EINVAL; > > vpu_inst_lock(inst); > - vpu_buf = vdec->slots[info->id]; > + vpu_buf = vdec->slots[info->id].curr; > if (!vpu_buf) { > dev_err(inst->dev, "[%d] decoded invalid frame[%d]\n", inst->id, info->id); > ret = -EINVAL; > @@ -770,11 +838,13 @@ static int vdec_frame_decoded(struct vpu_inst *inst, void *arg) > if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) > dev_info(inst->dev, "[%d] buf[%d] has been decoded\n", inst->id, info->id); > vpu_set_buffer_state(vbuf, VPU_BUF_STATE_DECODED); > + vdec->slots[info->id].state = VPU_BUF_STATE_DECODED; > vdec->decoded_frame_count++; > if (vdec->params.display_delay_enable) { > struct vpu_format *cur_fmt; > > cur_fmt = vpu_get_format(inst, inst->cap_format.type); > + vdec->slots[info->id].state = VPU_BUF_STATE_READY; > vpu_set_buffer_state(vbuf, VPU_BUF_STATE_READY); > for (int i = 0; i < vbuf->vb2_buf.num_planes; i++) > vb2_set_plane_payload(&vbuf->vb2_buf, > @@ -797,11 +867,11 @@ static struct vpu_vb2_buffer *vdec_find_buffer(struct vpu_inst *inst, u32 luma) > struct vdec_t *vdec = inst->priv; > int i; > > - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { > - if (!vdec->slots[i]) > + for (i = 0; i < vdec->slot_count; i++) { > + if (!vdec->slots[i].curr) > continue; > - if (luma == vdec->slots[i]->luma) > - return vdec->slots[i]; > + if (luma == vdec->slots[i].addr) > + return vdec->slots[i].curr; > } > > return NULL; > @@ -835,11 +905,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) > > cur_fmt = vpu_get_format(inst, inst->cap_format.type); > vbuf = &vpu_buf->m2m_buf.vb; > - if (vbuf->vb2_buf.index != frame->id) > - dev_err(inst->dev, "[%d] buffer id(%d, %d) dismatch\n", > - inst->id, vbuf->vb2_buf.index, frame->id); > + if (vpu_buf->fs_id != frame->id) > + dev_err(inst->dev, "[%d] buffer id(%d(%d), %d) dismatch\n", > + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index, frame->id); > > - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_READY && vdec->params.display_delay_enable) > + if (vdec->params.display_delay_enable) > return; > > if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_DECODED) > @@ -852,10 +922,11 @@ static void vdec_buf_done(struct vpu_inst *inst, struct vpu_frame_info *frame) > vbuf->sequence = vdec->sequence; > dev_dbg(inst->dev, "[%d][OUTPUT TS]%32lld\n", inst->id, vbuf->vb2_buf.timestamp); > > - v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); > vpu_inst_lock(inst); > + vdec->slots[vpu_buf->fs_id].state = VPU_BUF_STATE_READY; > vdec->display_frame_count++; > vpu_inst_unlock(inst); > + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_DONE); > dev_dbg(inst->dev, "[%d] decoded : %d, display : %d, sequence : %d\n", > inst->id, vdec->decoded_frame_count, vdec->display_frame_count, vdec->sequence); > } > @@ -1083,18 +1154,30 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb > if (!vbuf) > return -EINVAL; > > - if (vdec->slots[vbuf->vb2_buf.index]) { > - dev_err(inst->dev, "[%d] repeat alloc fs %d\n", > - inst->id, vbuf->vb2_buf.index); > + vpu_buf = to_vpu_vb2_buffer(vbuf); > + if (vpu_buf->fs_id < 0 || vpu_buf->fs_id >= vdec->slot_count) { > + dev_err(inst->dev, "invalid fs %d for v4l2 buffer %d\n", > + vpu_buf->fs_id, vbuf->vb2_buf.index); > return -EINVAL; > } > > + if (vdec->slots[vpu_buf->fs_id].curr) { > + if (vdec->slots[vpu_buf->fs_id].curr != vpu_buf) { > + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_CHANGED); > + vdec->slots[vpu_buf->fs_id].pend = vpu_buf; > + } else { > + vpu_set_buffer_state(vbuf, vdec->slots[vpu_buf->fs_id].state); > + } > + dev_err(inst->dev, "[%d] repeat alloc fs %d (v4l2 index %d)\n", > + inst->id, vpu_buf->fs_id, vbuf->vb2_buf.index); > + return -EAGAIN; > + } > + > dev_dbg(inst->dev, "[%d] state = %s, alloc fs %d, tag = 0x%x\n", > inst->id, vpu_codec_state_name(inst->state), vbuf->vb2_buf.index, vdec->seq_tag); > - vpu_buf = to_vpu_vb2_buffer(vbuf); > > memset(&info, 0, sizeof(info)); > - info.id = vbuf->vb2_buf.index; > + info.id = vpu_buf->fs_id; > info.type = MEM_RES_FRAME; > info.tag = vdec->seq_tag; > info.luma_addr = vpu_get_vb_phy_addr(&vbuf->vb2_buf, 0); > @@ -1109,12 +1192,13 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb > if (ret) > return ret; > > - vpu_buf->tag = info.tag; > vpu_buf->luma = info.luma_addr; > vpu_buf->chroma_u = info.chroma_addr; > vpu_buf->chroma_v = 0; > vpu_set_buffer_state(vbuf, VPU_BUF_STATE_INUSE); > - vdec->slots[info.id] = vpu_buf; > + vdec->slots[info.id].tag = info.tag; > + vdec->slots[info.id].curr = vpu_buf; > + vdec->slots[info.id].state = VPU_BUF_STATE_INUSE; > vdec->req_frame_count--; > > return 0; > @@ -1175,25 +1259,47 @@ static void vdec_recycle_buffer(struct vpu_inst *inst, struct vb2_v4l2_buffer *v > v4l2_m2m_buf_queue(inst->fh.m2m_ctx, vbuf); > } > > -static void vdec_clear_slots(struct vpu_inst *inst) > +static void vdec_release_curr_frame_store(struct vpu_inst *inst, u32 id) > { > struct vdec_t *vdec = inst->priv; > struct vpu_vb2_buffer *vpu_buf; > struct vb2_v4l2_buffer *vbuf; > + > + if (id >= vdec->slot_count) > + return; > + if (!vdec->slots[id].curr) > + return; > + > + vpu_buf = vdec->slots[id].curr; > + vbuf = &vpu_buf->m2m_buf.vb; > + > + vdec_response_fs_release(inst, id, vdec->slots[id].tag); > + if (vpu_buf->fs_id == id) { > + if (vpu_buf->state != VPU_BUF_STATE_READY) > + vdec_recycle_buffer(inst, vbuf); > + vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); > + } > + > + vdec->slots[id].curr = NULL; > + vdec->slots[id].state = VPU_BUF_STATE_IDLE; > + > + if (vdec->slots[id].pend) { > + vpu_set_buffer_state(&vdec->slots[id].pend->m2m_buf.vb, VPU_BUF_STATE_IDLE); > + vdec->slots[id].pend = NULL; > + } > +} > + > +static void vdec_clear_slots(struct vpu_inst *inst) > +{ > + struct vdec_t *vdec = inst->priv; > int i; > > - for (i = 0; i < ARRAY_SIZE(vdec->slots); i++) { > - if (!vdec->slots[i]) > + for (i = 0; i < vdec->slot_count; i++) { > + if (!vdec->slots[i].curr) > continue; > > - vpu_buf = vdec->slots[i]; > - vbuf = &vpu_buf->m2m_buf.vb; > - > vpu_trace(inst->dev, "clear slot %d\n", i); > - vdec_response_fs_release(inst, i, vpu_buf->tag); > - vdec_recycle_buffer(inst, vbuf); > - vdec->slots[i]->state = VPU_BUF_STATE_IDLE; > - vdec->slots[i] = NULL; > + vdec_release_curr_frame_store(inst, i); > } > } > > @@ -1324,39 +1430,29 @@ static void vdec_event_req_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) > static void vdec_evnet_rel_fs(struct vpu_inst *inst, struct vpu_fs_info *fs) > { > struct vdec_t *vdec = inst->priv; > - struct vpu_vb2_buffer *vpu_buf; > - struct vb2_v4l2_buffer *vbuf; > > - if (!fs || fs->id >= ARRAY_SIZE(vdec->slots)) > + if (!fs || fs->id >= vdec->slot_count) > return; > if (fs->type != MEM_RES_FRAME) > return; > > - if (fs->id >= vpu_get_num_buffers(inst, inst->cap_format.type)) { > + if (fs->id >= vdec->slot_count) { > dev_err(inst->dev, "[%d] invalid fs(%d) to release\n", inst->id, fs->id); > return; > } > > vpu_inst_lock(inst); > - vpu_buf = vdec->slots[fs->id]; > - vdec->slots[fs->id] = NULL; > - > - if (!vpu_buf) { > + if (!vdec->slots[fs->id].curr) { > dev_dbg(inst->dev, "[%d] fs[%d] has bee released\n", inst->id, fs->id); > goto exit; > } > > - vbuf = &vpu_buf->m2m_buf.vb; > - if (vpu_get_buffer_state(vbuf) == VPU_BUF_STATE_DECODED) { > + if (vdec->slots[fs->id].state == VPU_BUF_STATE_DECODED) { > dev_dbg(inst->dev, "[%d] frame skip\n", inst->id); > vdec->sequence++; > } > > - vdec_response_fs_release(inst, fs->id, vpu_buf->tag); > - if (vpu_get_buffer_state(vbuf) != VPU_BUF_STATE_READY) > - vdec_recycle_buffer(inst, vbuf); > - > - vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); > + vdec_release_curr_frame_store(inst, fs->id); > vpu_process_capture_buffer(inst); > > exit: > @@ -1552,6 +1648,11 @@ static void vdec_cleanup(struct vpu_inst *inst) > return; > > vdec = inst->priv; > + if (vdec) { > + vfree(vdec->slots); > + vdec->slots = NULL; > + vdec->slot_count = 0; > + } > vfree(vdec); > inst->priv = NULL; > vfree(inst); > @@ -1683,6 +1784,38 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type) > return 0; > } > > +static int vdec_get_slot_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) > +{ > + struct vdec_t *vdec = inst->priv; > + struct vpu_vb2_buffer *vpu_buf; > + int num = -1; > + > + vpu_inst_lock(inst); > + if (i >= vdec->slot_count || !vdec->slots[i].addr) > + goto exit; > + > + vpu_buf = vdec->slots[i].curr; > + > + num = scnprintf(str, size, "slot[%2d] :", i); > + if (vpu_buf) { > + num += scnprintf(str + num, size - num, " %2d", > + vpu_buf->m2m_buf.vb.vb2_buf.index); > + num += scnprintf(str + num, size - num, "; state = %d", vdec->slots[i].state); > + } else { > + num += scnprintf(str + num, size - num, " -1"); > + } > + > + if (vdec->slots[i].pend) > + num += scnprintf(str + num, size - num, "; %d", > + vdec->slots[i].pend->m2m_buf.vb.vb2_buf.index); > + > + num += scnprintf(str + num, size - num, "\n"); > +exit: > + vpu_inst_unlock(inst); > + > + return num; > +} > + > static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i) > { > struct vdec_t *vdec = inst->priv; > @@ -1741,6 +1874,7 @@ static int vdec_get_debug_info(struct vpu_inst *inst, char *str, u32 size, u32 i > vdec->codec_info.vui_present); > break; > default: > + num = vdec_get_slot_debug_info(inst, str, size, i - 10); > break; > } > > @@ -1764,6 +1898,8 @@ static struct vpu_inst_ops vdec_inst_ops = { > .get_debug_info = vdec_get_debug_info, > .wait_prepare = vpu_inst_unlock, > .wait_finish = vpu_inst_lock, > + .attach_frame_store = vdec_attach_frame_store, > + .reset_frame_store = vdec_reset_frame_store, > }; > > static void vdec_init(struct file *file) > @@ -1804,6 +1940,14 @@ static int vdec_open(struct file *file) > return -ENOMEM; > } > > + vdec->slots = vzalloc(sizeof(*vdec->slots) * VDEC_SLOT_CNT_DFT); > + if (!vdec->slots) { > + vfree(vdec); > + vfree(inst); > + return -ENOMEM; > + } > + vdec->slot_count = VDEC_SLOT_CNT_DFT; > + > inst->ops = &vdec_inst_ops; > inst->formats = vdec_formats; > inst->type = VPU_CORE_TYPE_DEC; > diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h > index 22f0da26ccec..76bfd6b26170 100644 > --- a/drivers/media/platform/amphion/vpu.h > +++ b/drivers/media/platform/amphion/vpu.h > @@ -223,6 +223,8 @@ struct vpu_inst_ops { > int (*get_debug_info)(struct vpu_inst *inst, char *str, u32 size, u32 i); > void (*wait_prepare)(struct vpu_inst *inst); > void (*wait_finish)(struct vpu_inst *inst); > + void (*attach_frame_store)(struct vpu_inst *inst, struct vb2_buffer *vb); > + void (*reset_frame_store)(struct vpu_inst *inst); > }; > > struct vpu_inst { > @@ -296,7 +298,8 @@ enum { > VPU_BUF_STATE_DECODED, > VPU_BUF_STATE_READY, > VPU_BUF_STATE_SKIP, > - VPU_BUF_STATE_ERROR > + VPU_BUF_STATE_ERROR, > + VPU_BUF_STATE_CHANGED > }; > > struct vpu_vb2_buffer { > @@ -305,8 +308,8 @@ struct vpu_vb2_buffer { > dma_addr_t chroma_u; > dma_addr_t chroma_v; > unsigned int state; > - u32 tag; > u32 average_qp; > + s32 fs_id; > }; > > void vpu_writel(struct vpu_dev *vpu, u32 reg, u32 val); > diff --git a/drivers/media/platform/amphion/vpu_dbg.c b/drivers/media/platform/amphion/vpu_dbg.c > index 940e5bda5fa3..497ae4e8a229 100644 > --- a/drivers/media/platform/amphion/vpu_dbg.c > +++ b/drivers/media/platform/amphion/vpu_dbg.c > @@ -48,6 +48,7 @@ static char *vpu_stat_name[] = { > [VPU_BUF_STATE_READY] = "ready", > [VPU_BUF_STATE_SKIP] = "skip", > [VPU_BUF_STATE_ERROR] = "error", > + [VPU_BUF_STATE_CHANGED] = "changed", > }; > > static inline const char *to_vpu_stat_name(int state) > @@ -164,6 +165,7 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) > for (i = 0; i < vb2_get_num_buffers(vq); i++) { > struct vb2_buffer *vb; > struct vb2_v4l2_buffer *vbuf; > + struct vpu_vb2_buffer *vpu_buf; > > vb = vb2_get_buffer(vq, i); > if (!vb) > @@ -173,13 +175,24 @@ static int vpu_dbg_instance(struct seq_file *s, void *data) > continue; > > vbuf = to_vb2_v4l2_buffer(vb); > + vpu_buf = to_vpu_vb2_buffer(vbuf); > > num = scnprintf(str, sizeof(str), > - "capture[%2d] state = %10s, %8s\n", > + "capture[%2d] state = %10s, %8s", > i, vb2_stat_name[vb->state], > to_vpu_stat_name(vpu_get_buffer_state(vbuf))); > if (seq_write(s, str, num)) > return 0; > + > + if (vpu_buf->fs_id >= 0) { > + num = scnprintf(str, sizeof(str), "; fs %d", vpu_buf->fs_id); > + if (seq_write(s, str, num)) > + return 0; > + } > + > + num = scnprintf(str, sizeof(str), "\n"); > + if (seq_write(s, str, num)) > + return 0; > } > > num = scnprintf(str, sizeof(str), "sequence = %d\n", inst->sequence); > diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c > index 45707931bc4f..74668fa362e2 100644 > --- a/drivers/media/platform/amphion/vpu_v4l2.c > +++ b/drivers/media/platform/amphion/vpu_v4l2.c > @@ -501,14 +501,25 @@ static int vpu_vb2_queue_setup(struct vb2_queue *vq, > call_void_vop(inst, release); > } > > + if (V4L2_TYPE_IS_CAPTURE(vq->type)) > + call_void_vop(inst, reset_frame_store); > + > return 0; > } > > static int vpu_vb2_buf_init(struct vb2_buffer *vb) > { > struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); > + struct vpu_vb2_buffer *vpu_buf = to_vpu_vb2_buffer(vbuf); > + struct vpu_inst *inst = vb2_get_drv_priv(vb->vb2_queue); > > + vpu_buf->fs_id = -1; > vpu_set_buffer_state(vbuf, VPU_BUF_STATE_IDLE); > + > + if (!inst->ops->attach_frame_store || V4L2_TYPE_IS_OUTPUT(vb->type)) > + return 0; > + > + call_void_vop(inst, attach_frame_store, vb); > return 0; > } > Just a general question, was the choice for a flexible array because the IP does not provide enough per-codec information to calculate the number of needed slots or to actually avoid needing to do per codec array sizing ? Nicolas