Hi Umang, On Mon, Oct 28, 2024 at 07:47:46PM +0530, Umang Jain wrote: > On 27/10/24 8:10 pm, Laurent Pinchart wrote: > > On Sat, Oct 26, 2024 at 09:52:43PM +0200, Xavier Roumegue wrote: > >> On 10/22/24 8:31 AM, Umang Jain wrote: > >>> Currently, vertex maps cannot be updated dynamically while dw100 > >>> is streaming. This patch enables the support to update the vertex > >>> map dynamically at runtime. > >>> > >>> To support this functionality, we need to allocate and track two > >>> sets of DMA-allocated vertex maps, one for the currently applied map > >>> and another for the updated (pending) map. Before the start of next > >>> frame, if a new user-supplied vertex map is available, the hardware > >>> mapping is changed to use new vertex map, thus enabling the user to > >>> update the vertex map at runtime. > > > > How do you synchronize the new map with the jobs ? That doesn't seem to > > be supported by the patch, is it a feature that you don't need ? > > > >>> We should ensure no race occurs when the vertex map is updated multiple > >>> times when a frame is processing. Hence, vertex map is never updated to > >>> the applied vertex map index in .s_ctrl(). It is always updated on the > >>> pending vertex map slot, with `maps_mutex` lock held. `maps_mutex` lock > >>> is also held when the pending vertex map is applied to the hardware in > >>> dw100_start(). > >>> > >>> Ability to update the vertex map at runtime, enables abritrary rotation > > > > s/abritrary/arbitrary/ > > > >>> and digital zoom features for the input frames, through the dw100 > >>> hardware. > >>> > >>> Signed-off-by: Umang Jain <umang.jain@xxxxxxxxxxxxxxxx> > >>> --- > >>> drivers/media/platform/nxp/dw100/dw100.c | 73 ++++++++++++++++++------ > >>> 1 file changed, 56 insertions(+), 17 deletions(-) > >>> > >>> diff --git a/drivers/media/platform/nxp/dw100/dw100.c b/drivers/media/platform/nxp/dw100/dw100.c > >>> index 54ebf59df682..42712ccff754 100644 > >>> --- a/drivers/media/platform/nxp/dw100/dw100.c > >>> +++ b/drivers/media/platform/nxp/dw100/dw100.c > >>> @@ -83,6 +83,11 @@ struct dw100_q_data { > >>> struct v4l2_rect crop; > >>> }; > >>> > >>> +struct dw100_map { > >>> + unsigned int *map; > >>> + dma_addr_t map_dma; > > > > I would have called the field just 'dma' as it's already qualified by > > the structure name or the field name in dw100_ctx. > > > >>> +}; > >>> + > >>> struct dw100_ctx { > >>> struct v4l2_fh fh; > >>> struct dw100_device *dw_dev; > >>> @@ -92,12 +97,14 @@ struct dw100_ctx { > >>> struct mutex vq_mutex; > >>> > >>> /* Look Up Table for pixel remapping */ > >>> - unsigned int *map; > >>> - dma_addr_t map_dma; > >>> + struct dw100_map maps[2]; > >>> + unsigned int applied_map_id; > >>> size_t map_size; > >>> unsigned int map_width; > >>> unsigned int map_height; > >>> bool user_map_is_set; > >>> + bool user_map_is_updated; > >>> + struct mutex maps_mutex; > >>> > >>> /* Source and destination queue data */ > >>> struct dw100_q_data q_data[2]; > >>> @@ -308,24 +315,31 @@ static int dw100_create_mapping(struct dw100_ctx *ctx) > >>> { > >>> u32 *user_map; > >>> > >>> - if (ctx->map) > >>> - dma_free_coherent(&ctx->dw_dev->pdev->dev, ctx->map_size, > >>> - ctx->map, ctx->map_dma); > >>> + for (unsigned int i = 0; i < 2; i++) { > > > > for (unsigned int i = 0; i < ARRAY_SIZE(ctx->maps); i++) { > > struct dw100_map *map = &ctx->maps[i]; > > > > and use map below. > > > > > >>> + if (ctx->maps[i].map) > >>> + dma_free_coherent(&ctx->dw_dev->pdev->dev, ctx->map_size, > >>> + ctx->maps[i].map, ctx->maps[i].map_dma); > >>> > >>> - ctx->map = dma_alloc_coherent(&ctx->dw_dev->pdev->dev, ctx->map_size, > >>> - &ctx->map_dma, GFP_KERNEL); > >>> + ctx->maps[i].map = dma_alloc_coherent(&ctx->dw_dev->pdev->dev, ctx->map_size, > >>> + &ctx->maps[i].map_dma, GFP_KERNEL); > >>> > >>> - if (!ctx->map) > >>> - return -ENOMEM; > >>> + if (!ctx->maps[i].map) > >>> + return -ENOMEM; > >>> + } > >>> > >>> user_map = dw100_get_user_map(ctx); > >>> - memcpy(ctx->map, user_map, ctx->map_size); > >>> + > >>> + mutex_lock(&ctx->maps_mutex); > >>> + ctx->applied_map_id = 0; > >>> + memcpy(ctx->maps[ctx->applied_map_id].map, user_map, ctx->map_size); > >>> + mutex_unlock(&ctx->maps_mutex); > >>> > >>> dev_dbg(&ctx->dw_dev->pdev->dev, > >>> "%ux%u %s mapping created (d:%pad-c:%p) for stream %ux%u->%ux%u\n", > >>> ctx->map_width, ctx->map_height, > >>> ctx->user_map_is_set ? "user" : "identity", > >>> - &ctx->map_dma, ctx->map, > >>> + &ctx->maps[ctx->applied_map_id].map_dma, > >>> + ctx->maps[ctx->applied_map_id].map, > >>> ctx->q_data[DW100_QUEUE_SRC].pix_fmt.width, > >>> ctx->q_data[DW100_QUEUE_DST].pix_fmt.height, > >>> ctx->q_data[DW100_QUEUE_SRC].pix_fmt.width, > >>> @@ -336,10 +350,12 @@ static int dw100_create_mapping(struct dw100_ctx *ctx) > >>> > >>> static void dw100_destroy_mapping(struct dw100_ctx *ctx) > >>> { > >>> - if (ctx->map) { > >>> - dma_free_coherent(&ctx->dw_dev->pdev->dev, ctx->map_size, > >>> - ctx->map, ctx->map_dma); > >>> - ctx->map = NULL; > >>> + for (unsigned int i = 0; i < 2; i++) { > > > > for (unsigned int i = 0; i < ARRAY_SIZE(ctx->maps); i++) { > > struct dw100_map *map = &ctx->maps[i]; > > > > and use map below. > > > >>> + if (ctx->maps[i].map) > >>> + dma_free_coherent(&ctx->dw_dev->pdev->dev, ctx->map_size, > >>> + ctx->maps[i].map, ctx->maps[i].map_dma); > >>> + > >>> + ctx->maps[i].map = NULL; > >>> } > >>> } > >>> > >>> @@ -350,6 +366,15 @@ static int dw100_s_ctrl(struct v4l2_ctrl *ctrl) > >>> > >>> switch (ctrl->id) { > >>> case V4L2_CID_DW100_DEWARPING_16x16_VERTEX_MAP: > >>> + u32 *user_map = ctrl->p_new.p_u32; > >> > >> A warning to fix here. > >> > >>> + unsigned int id; > >>> + > >>> + mutex_lock(&ctx->maps_mutex); > >>> + id = ctx->applied_map_id ? 0 : 1; > >>> + memcpy(ctx->maps[id].map, user_map, ctx->map_size); > >>> + ctx->user_map_is_updated = true; > >> > >> If you call the control before to start the stream, the dma mapping is > >> not yet done(dw100_create_mapping not yet called). Hence, copying the > >> user map to a NULL pointer. > > > > The maps could be allocated in dw100_open() when creating the context. > > That would likely require moving the initialization of ctx->map_width, > > ctx->map_height and ctx->map_size as well. The handling of the identity > > map would probably need to be rewritten too. > > The ctx->map_width, ctx->map_height and ctx->map_size would be updated > on s_fmt(). I saw that ctx->map_width, ctx->map_height and ctx->map_size are set in dw100_ctrl_dewarping_map_init(), with mw = ctrl->dims[0]; mh = ctrl->dims[1]; [...] ctx->map_width = mw; ctx->map_height = mh; ctx->map_size = mh * mw * sizeof(u32); but overlooked the fact that the dimensions are set in dw100_s_fmt(). > I think we can solve the NULL pointer issue by allocating when creating > the context (open()), however, it would require updating (re-allocation) > again before the map can be memcpy()ed before streaming. Because the map > dimensions would have changed. If the map dimensions change, that invalidates the map contents set by userspace. This is currently handled by dw100_ctrl_dewarping_map_init(). You won't be able to just memcpy() the previous control to the new one. > See dw100_s_fmt() > > ... > dims[0] = dw100_get_n_vertices_from_length(q_data->pix_fmt.width); > dims[1] = dw100_get_n_vertices_from_length(q_data->pix_fmt.height); > > ret = v4l2_ctrl_modify_dimensions(ctrl, dims); > ``` > > I checked the v4l2_ctrl_modify_dimensions definition to check if it > issues a call v4l2_ctrl_type_ops.init (where the map dimensions are > updated for dw100) and it does. > > So, I think I will have to introduce allocations in dw100_open() so that > NULL pointer issue doesn't occur and let the dma allocation get > re-allocated with new dimensions just before stream start. That seems a bit pointless, if the map will be invalidated by a call to VIDIOC_S_FMT anyway. The only case where it would be useful is if userspace sets the control before starting streaming and doesn't call VIDIOC_S_FMT. I'm increasingly thinking the driver should use the request API to synchronize the control with the jobs. > Also, we do not have to move the ctx->map_width, ctx->height abd > ctx->map_size inititlisation, since they are already gets initialised to > defaults, on the open() path when v4l2_ctrl_new_custom() is done. > > >>> + mutex_unlock(&ctx->maps_mutex); > >>> + > >>> ctx->user_map_is_set = true; > >>> break; > >>> } > >>> @@ -655,6 +680,8 @@ static int dw100_open(struct file *file) > >>> > >>> v4l2_fh_add(&ctx->fh); > >>> > >>> + mutex_init(&ctx->maps_mutex); > >>> + > >>> return 0; > >>> > >>> err: > >>> @@ -675,6 +702,7 @@ static int dw100_release(struct file *file) > >>> v4l2_ctrl_handler_free(&ctx->hdl); > >>> v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); > >>> mutex_destroy(&ctx->vq_mutex); > >>> + mutex_destroy(&ctx->maps_mutex); > >>> kfree(ctx); > >>> > >>> return 0; > >>> @@ -1453,8 +1481,19 @@ static void dw100_start(struct dw100_ctx *ctx, struct vb2_v4l2_buffer *in_vb, > >>> dw100_hw_set_destination(dw_dev, &ctx->q_data[DW100_QUEUE_DST], > >>> ctx->q_data[DW100_QUEUE_SRC].fmt, > >>> &out_vb->vb2_buf); > >>> - dw100_hw_set_mapping(dw_dev, ctx->map_dma, > >>> - ctx->map_width, ctx->map_height); > >>> + > >>> + > >>> + mutex_lock(&ctx->maps_mutex); > >>> + if (ctx->user_map_is_updated) { > >> > >> The hardware register must unconditionally be updated while starting a > >> new context, as a v4l2 m2m supports multi context operations. Otherwise, > >> you may be running with the user mapping used by the previous context. > >> > >> Moreover, the hardware mapping will not be set in case you use the > >> driver as a simple scaler without user mapping, which causes the process > >> to hang as the run does not start and never completes. > >> > >>> + unsigned int id = ctx->applied_map_id ? 0 : 1; > >>> + > >>> + dw100_hw_set_mapping(dw_dev, ctx->maps[id].map_dma, > >>> + ctx->map_width, ctx->map_height); > >>> + ctx->applied_map_id = id; > >>> + ctx->user_map_is_updated = false; > >>> + } > >>> + mutex_unlock(&ctx->maps_mutex); > >>> + > >>> dw100_hw_enable_irq(dw_dev); > >>> dw100_hw_dewarp_start(dw_dev); > >>> > >> > >> It sounds as this patch requires a collaborative application for running > >> well. All my simple tests failed. > >> > >> You can test a simple scaler/pixfmt conversion operation with v4l2 utils: > >> > >> > >> v4l2-ctl \ > >> -d 0 \ > >> --set-fmt-video-out width=640,height=480,pixelformat=NV12,field=none \ > >> --set-fmt-video width=640,height=480,pixelformat=NV21,field=none \ > >> --stream-out-pattern 3 \ > >> --set-selection-output\ > >> target=crop,top=0,left=0,width=640,height=480,flags= \ > >> --stream-count 100 \ > >> --stream-mmap \ > >> --stream-out-mmap -- Regards, Laurent Pinchart