Hi Xavier, Thank you for the patch. On Tue, May 03, 2022 at 11:39:24AM +0200, Xavier Roumegue wrote: > Add a V4L2 mem-to-mem driver for the Vivante DW100 Dewarp Processor IP > core found on i.MX8MP SoC. > > The processor core applies a programmable geometrical transformation on > input image to correct distorsion introduced by lenses. s/input image/input images/ > The transformation function is exposed as a grid map with 16x16 pixel > macroblocks indexed using X, Y vertex coordinates. > > The dewarping map can be set from application through dedicated a v4l2 s/dedicated a v4l2/a dedicated V4L2/ > control. If not set or invalid, the driver computes an identity map > prior to start the processing engine. s/start/starting/ > > The driver supports scaling, cropping and pixel format conversion. > > Signed-off-by: Xavier Roumegue <xavier.roumegue@xxxxxxxxxxx> > --- > drivers/media/platform/nxp/Kconfig | 1 + > drivers/media/platform/nxp/Makefile | 1 + > drivers/media/platform/nxp/dw100/Kconfig | 16 + > drivers/media/platform/nxp/dw100/Makefile | 3 + > drivers/media/platform/nxp/dw100/dw100.c | 1782 +++++++++++++++++ > drivers/media/platform/nxp/dw100/dw100_regs.h | 118 ++ > 6 files changed, 1921 insertions(+) > create mode 100644 drivers/media/platform/nxp/dw100/Kconfig > create mode 100644 drivers/media/platform/nxp/dw100/Makefile > create mode 100644 drivers/media/platform/nxp/dw100/dw100.c > create mode 100644 drivers/media/platform/nxp/dw100/dw100_regs.h > > diff --git a/drivers/media/platform/nxp/Kconfig b/drivers/media/platform/nxp/Kconfig > index a0810df751dc..e5085e5e585b 100644 > --- a/drivers/media/platform/nxp/Kconfig > +++ b/drivers/media/platform/nxp/Kconfig > @@ -52,4 +52,5 @@ config VIDEO_MX2_EMMAPRP > memory to memory. Operations include resizing and format > conversion. > > +source "drivers/media/platform/nxp/dw100/Kconfig" > source "drivers/media/platform/nxp/imx-jpeg/Kconfig" > diff --git a/drivers/media/platform/nxp/Makefile b/drivers/media/platform/nxp/Makefile > index efc38c6578ce..22ba28ac6d63 100644 > --- a/drivers/media/platform/nxp/Makefile > +++ b/drivers/media/platform/nxp/Makefile > @@ -1,5 +1,6 @@ > # SPDX-License-Identifier: GPL-2.0-only > > +obj-y += dw100/ > obj-y += imx-jpeg/ > > obj-$(CONFIG_VIDEO_IMX_MIPI_CSIS) += imx-mipi-csis.o > diff --git a/drivers/media/platform/nxp/dw100/Kconfig b/drivers/media/platform/nxp/dw100/Kconfig > new file mode 100644 > index 000000000000..86ccac6fd120 > --- /dev/null > +++ b/drivers/media/platform/nxp/dw100/Kconfig > @@ -0,0 +1,16 @@ > +# SPDX-License-Identifier: GPL-2.0-only > + > +config VIDEO_DW100 > + tristate "NXP i.MX DW100 dewarper" > + depends on V4L_MEM2MEM_DRIVERS > + depends on VIDEO_DEV > + depends on ARCH_MXC || COMPILE_TEST > + select VIDEOBUF2_DMA_CONTIG > + select V4L2_MEM2MEM_DEV > + help > + DW100 is a memory-to-memory engine performing geometrical > + transformation on source image through a programmable dewarping map. s/source image/source images/ > + > + To compile this driver as a module, choose M here: the module > + will be called dw100. > + > diff --git a/drivers/media/platform/nxp/dw100/Makefile b/drivers/media/platform/nxp/dw100/Makefile > new file mode 100644 > index 000000000000..49db80589e9a > --- /dev/null > +++ b/drivers/media/platform/nxp/dw100/Makefile > @@ -0,0 +1,3 @@ > +# SPDX-License-Identifier: GPL-2.0+ > + > +obj-$(CONFIG_VIDEO_DW100) += dw100.o > diff --git a/drivers/media/platform/nxp/dw100/dw100.c b/drivers/media/platform/nxp/dw100/dw100.c > new file mode 100644 > index 000000000000..869ca12e0bcd > --- /dev/null > +++ b/drivers/media/platform/nxp/dw100/dw100.c > @@ -0,0 +1,1782 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +/* > + * DW100 Hardware dewarper > + * > + * Copyright 2022 NXP > + * Author: Xavier Roumegue (xavier.roumegue@xxxxxxxxxxx) > + * > + */ > + > +#include <linux/clk.h> > +#include <linux/debugfs.h> > +#include <linux/interrupt.h> > +#include <linux/minmax.h> > +#include <linux/module.h> > +#include <linux/of.h> > +#include <linux/platform_device.h> > +#include <linux/pm_runtime.h> > + > +#include <media/v4l2-ctrls.h> > +#include <media/v4l2-device.h> > +#include <media/v4l2-event.h> > +#include <media/v4l2-ioctl.h> > +#include <media/v4l2-mem2mem.h> > +#include <media/videobuf2-dma-contig.h> > + > +#include <uapi/linux/dw100.h> > + > +#include "dw100_regs.h" > + > +#define DRV_NAME "dw100" > + > +#define MIN_W 176 > +#define MIN_H 144 > +#define MAX_W 4096 > +#define MAX_H 3072 > +#define ALIGN_W 3 > +#define ALIGN_H 3 Please add a DW100_ prefix to these macros to avoid possible future namespace clashes. > + > +#define DW100_BLOCK_SIZE 16 > + > +#define DW100_MIN_LUT_NELEMS (((MIN_W / DW100_BLOCK_SIZE) + 1) \ > + * ((MIN_H / DW100_BLOCK_SIZE) + 1)) > + > +#define DW100_MAX_LUT_NELEMS (((MAX_W / DW100_BLOCK_SIZE) + 1) \ > + * ((MAX_H / DW100_BLOCK_SIZE) + 1)) > + > +/* > + * 16 controls have been reserved for this driver for future extension, but > + * let's limit the related driver allocation to the effective number of controls > + * in use. > + */ > +#define DW100_MAX_CTRLS 1 > +#define DW100_CTRL_DEWARPING_MAP 0 Aligning all macros could make this more readable. > + > +static unsigned int debug; > +module_param(debug, uint, 0644); > +MODULE_PARM_DESC(debug, "Activate debug info"); > + > +#define dprintk(lvl, dev, fmt, arg...) \ > + v4l2_dbg(lvl, debug, &(dev)->v4l2_dev, "%s: " fmt, __func__, ## arg) I'd just use dev_dbg() and drop the debug module parameter. Up to you. > + > +enum { > + V4L2_M2M_SRC = 0, > + V4L2_M2M_DST = 1, > +}; > + > +enum { > + V4L2_M2M_CAPTURE = 1, BIT(0) > + V4L2_M2M_OUTPUT = 2, and BIT(1) to emphesize they're bitflags. > +}; Those names are also too generic, a DW100_ prefix would fix that. I'd name the first two DW100_QUEUE_SRC and DW100_QUEUE_DST, and the last two DW100_FMT_CAPTURE and DW100_FMT_OUTPUT. > + > +static struct dw100_fmt { static const > + u32 fourcc; > + int depth; > + u32 types; > + u32 reg_format; > + bool reg_swap_uv; > +} formats[] = { > + { > + .fourcc = V4L2_PIX_FMT_NV16, > + .depth = 16, > + .types = V4L2_M2M_OUTPUT | V4L2_M2M_CAPTURE, > + .reg_format = DW100_DEWARP_CTRL_FORMAT_YUV422_SP, > + .reg_swap_uv = false, > + }, { > + .fourcc = V4L2_PIX_FMT_NV61, > + .depth = 16, > + .types = V4L2_M2M_CAPTURE, > + .reg_format = DW100_DEWARP_CTRL_FORMAT_YUV422_SP, > + .reg_swap_uv = true, > + }, { > + .fourcc = V4L2_PIX_FMT_YUYV, > + .depth = 16, > + .types = V4L2_M2M_OUTPUT | V4L2_M2M_CAPTURE, > + .reg_format = DW100_DEWARP_CTRL_FORMAT_YUV422_PACKED, > + .reg_swap_uv = false, > + }, { > + .fourcc = V4L2_PIX_FMT_UYVY, > + .depth = 16, > + .types = V4L2_M2M_OUTPUT | V4L2_M2M_CAPTURE, > + .reg_format = DW100_DEWARP_CTRL_FORMAT_YUV422_PACKED, > + .reg_swap_uv = true, > + }, { > + .fourcc = V4L2_PIX_FMT_NV12, > + .depth = 12, > + .types = V4L2_M2M_OUTPUT | V4L2_M2M_CAPTURE, > + .reg_format = DW100_DEWARP_CTRL_FORMAT_YUV420_SP, > + .reg_swap_uv = false, > + }, { > + .fourcc = V4L2_PIX_FMT_NV21, > + .depth = 12, > + .types = V4L2_M2M_OUTPUT | V4L2_M2M_CAPTURE, > + .reg_format = DW100_DEWARP_CTRL_FORMAT_YUV420_SP, > + .reg_swap_uv = true, > + }, > +}; > + > +static inline int to_dw100_fmt_type(enum v4l2_buf_type type) > +{ > + if (V4L2_TYPE_IS_OUTPUT(type)) > + return V4L2_M2M_OUTPUT; > + else > + return V4L2_M2M_CAPTURE; > +} > + > +#define NUM_FORMATS ARRAY_SIZE(formats) I'd drop the macro and use ARRAY_SIZE(formats) directly in the two locations where NUM_FORMATS is used. Same for NUM_CTRLS. > + > +static struct dw100_fmt *find_pixel_format(u32 pixel_format, int fmt_type) static const struct dw100_ prefix for the function name. Same below where applicable. > +{ > + struct dw100_fmt *fmt; const (and where applicable as indicated by the compiler in the rest of the code) It's often a good practice to move declaration of variables to the scope they're used in. > + unsigned int k; Any reason not to use the customary "i" variable ? > + > + for (k = 0; k < NUM_FORMATS; k++) { > + fmt = &formats[k]; > + if (fmt->fourcc == pixel_format && fmt->types & fmt_type) > + return fmt; > + } > + > + return NULL; > +} > + > +static struct dw100_fmt *find_format(struct v4l2_format *f) > +{ > + return find_pixel_format(f->fmt.pix.pixelformat, > + to_dw100_fmt_type(f->type)); > +} > + > +static inline u32 dw100_bytesperline(struct dw100_fmt *fmt, u32 width) You can drop the inline and let the compiler decide. > +{ > + switch (fmt->reg_format) { > + case DW100_DEWARP_CTRL_FORMAT_YUV422_SP: > + case DW100_DEWARP_CTRL_FORMAT_YUV420_SP: > + return width; > + case DW100_DEWARP_CTRL_FORMAT_YUV422_PACKED: > + default: > + return (fmt->depth * width) >> 3; > + } > +} > + > +static inline u32 dw100_sizeimage(struct dw100_fmt *fmt, u32 width, u32 height) > +{ > + return (fmt->depth * width * height) >> 3; > +} > + > +struct dw100_device { > + struct platform_device *pdev; > + struct v4l2_m2m_dev *m2m_dev; > + struct v4l2_device v4l2_dev; > + struct video_device vfd; > +#ifdef CONFIG_MEDIA_CONTROLLER > + struct media_device mdev; > +#endif > + struct mutex vfd_mutex; > + spinlock_t irqlock; > + void __iomem *mmio; > + struct clk_bulk_data *clks; > + int num_clks; > + struct dentry *debugfs_root; > +}; Could you please move all structure definitions before the functions ? > + > +static int dw100_dump_regs(struct dw100_device *dw_dev) > +{ > +#define __DECLARE_REG(x) { #x, x } > + int i; > + struct reg_desc { > + const char * const name; > + unsigned int addr; > + } dw100_regs[] = { > + __DECLARE_REG(DW100_DEWARP_ID), > + __DECLARE_REG(DW100_DEWARP_CTRL), > + __DECLARE_REG(DW100_MAP_LUT_ADDR), > + __DECLARE_REG(DW100_MAP_LUT_SIZE), > + __DECLARE_REG(DW100_MAP_LUT_ADDR2), > + __DECLARE_REG(DW100_MAP_LUT_SIZE2), > + __DECLARE_REG(DW100_SRC_IMG_Y_BASE), > + __DECLARE_REG(DW100_SRC_IMG_UV_BASE), > + __DECLARE_REG(DW100_SRC_IMG_SIZE), > + __DECLARE_REG(DW100_SRC_IMG_STRIDE), > + __DECLARE_REG(DW100_DST_IMG_Y_BASE), > + __DECLARE_REG(DW100_DST_IMG_UV_BASE), > + __DECLARE_REG(DW100_DST_IMG_SIZE), > + __DECLARE_REG(DW100_DST_IMG_STRIDE), > + __DECLARE_REG(DW100_DST_IMG_Y_SIZE1), > + __DECLARE_REG(DW100_DST_IMG_UV_SIZE1), > + __DECLARE_REG(DW100_SRC_IMG_Y_BASE2), > + __DECLARE_REG(DW100_SRC_IMG_UV_BASE2), > + __DECLARE_REG(DW100_SRC_IMG_SIZE2), > + __DECLARE_REG(DW100_SRC_IMG_STRIDE2), > + __DECLARE_REG(DW100_DST_IMG_Y_BASE2), > + __DECLARE_REG(DW100_DST_IMG_UV_BASE2), > + __DECLARE_REG(DW100_DST_IMG_SIZE2), > + __DECLARE_REG(DW100_DST_IMG_STRIDE2), > + __DECLARE_REG(DW100_DST_IMG_Y_SIZE2), > + __DECLARE_REG(DW100_DST_IMG_UV_SIZE2), > + __DECLARE_REG(DW100_SWAP_CONTROL), > + __DECLARE_REG(DW100_VERTICAL_SPLIT_LINE), > + __DECLARE_REG(DW100_HORIZON_SPLIT_LINE), > + __DECLARE_REG(DW100_SCALE_FACTOR), > + __DECLARE_REG(DW100_ROI_START), > + __DECLARE_REG(DW100_BOUNDARY_PIXEL), > + __DECLARE_REG(DW100_INTERRUPT_STATUS), > + __DECLARE_REG(DW100_BUS_CTRL), > + __DECLARE_REG(DW100_BUS_CTRL1), > + __DECLARE_REG(DW100_BUS_TIME_OUT_CYCLE), > + }; > + > + for (i = 0; i < ARRAY_SIZE(dw100_regs); i++) { > + dev_info(&dw_dev->pdev->dev, "%s: %#x\n", > + dw100_regs[i].name, > + readl(dw_dev->mmio + dw100_regs[i].addr)); readl()/writel() wrappers would be nice, something along the lines of static inline u32 dw100_read(struct dw100_device *dw_dev, u32 reg) { return readl(dw_dev->mmio + reg); } static inline void dw100_write(struct dw100_device *dw_dev, u32 reg, u32 val) { write(val, dw_dev->mmio + reg); } > + } As this is read through debugfs, I recommend using the seq_file API to dump the registers to the debugfs file instead of the kernel log. > + > + return 0; > +} > + > +struct dw100_q_data { > + unsigned int width; > + unsigned int height; > + unsigned int bytesperline; > + unsigned int sizeimage; > + unsigned int sequence; > + struct dw100_fmt *fmt; > + struct v4l2_rect crop; > +}; > + > +struct dw100_ctx { > + struct v4l2_fh fh; > + struct dw100_device *dw_dev; > + struct v4l2_ctrl_handler hdl; > + struct v4l2_ctrl *ctrls[DW100_MAX_CTRLS]; > + > + /* Look Up Table for pixel remapping */ > + unsigned int *map; > + dma_addr_t map_dma; > + size_t map_size; > + unsigned int map_width; > + unsigned int map_height; > + > + /* Related colorspace properties propagated from input to output */ > + enum v4l2_colorspace colorspace; > + enum v4l2_xfer_func xfer_func; > + enum v4l2_ycbcr_encoding ycbcr_enc; > + enum v4l2_quantization quant; > + > + /* Source and destination queue data */ > + struct dw100_q_data q_data[2]; > +}; > + > +static inline struct dw100_ctx *file2ctx(struct file *file) > +{ > + return container_of(file->private_data, struct dw100_ctx, fh); > +} > + > +static struct dw100_q_data *get_q_data(struct dw100_ctx *ctx, > + enum v4l2_buf_type type) > +{ > + if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT) > + return &ctx->q_data[V4L2_M2M_SRC]; > + else > + return &ctx->q_data[V4L2_M2M_DST]; > +} > + > +static u32 dw100_get_n_vertices_from_length(u32 length) > +{ > + return DIV_ROUND_UP(length, DW100_BLOCK_SIZE) + 1; > +} > + > +static u16 dw_map_convert_to_UQ12_4(u32 a) s/dw_/dw100_/ (and where applicable below) s/UQ/uq/ > +{ > + return (u16)((a & 0xfff) << 4); > +} > + > +static u32 dw_map_format_coordinates(u16 xq, u16 yq) > +{ > + return (u32)((yq << 16) | xq); > +} > + > +static u32 *dw100_get_user_map(struct dw100_ctx *ctx) > +{ > + struct v4l2_ctrl *ctrl = ctx->ctrls[DW100_CTRL_DEWARPING_MAP]; > + No need for a blank line. > + size_t user_map_size; > + > + if (!ctrl) { > + v4l2_err(&ctx->dw_dev->v4l2_dev, "Mapping control not found !"); > + return NULL; > + } I don't think this can ever happen. > + > + if (ctrl->elems < DW100_MIN_LUT_NELEMS || > + ctrl->elems > DW100_MAX_LUT_NELEMS) > + return NULL; > + > + user_map_size = ctrl->elems * ctrl->elem_size; > + > + if ((ctrl->elems * ctrl->elem_size) == ctx->map_size) if (user_map_size == ctx->map_size) > + return ctrl->p_cur.p_u32; > + > + v4l2_warn(&ctx->dw_dev->v4l2_dev, > + "Skipping invalid user map (%zu != %zu)\n", > + user_map_size, > + ctx->map_size); The last two lines hold in a single line. This should be a dev_dbg(), to avoid flooding the kernel log with userspace-triggered messages. I'd also write if (user_map_size != ctx->map_size) { dev_dbg(...); return NULL; } return ctrl->p_cur.p_u32; to match the pattern of the previous error check. > + > + return NULL; > +} > + > +/* > + * Create an identity map if not set by the application This comment is a bit misleading, I initially thought the function's purpose was to create an identity mapping. You can write * Create the dewarp map from the V4L2 control. If the control hasn't been set * by the application, generate an identity mapping. > + * > + * A 16 pixels cell size grid is mapped on the destination image. > + * The last cells width/height might be lesser than 16 if the destination image > + * width/height is not divisible by 16. This dewarping grid map specifies the > + * source image pixel location (x, y) on each grid intersection point. > + * Bilinear interpolation is used to compute inner cell points locations. > + * > + * The coordinates are saved in UQ12.4 fixed point format. > + * Extra blank line. > + */ > +static int dw100_create_mapping(struct dw100_ctx *ctx) > +{ > + u32 sw, sh, dw, dh, mw, mh, i, j; > + u16 qx, qy, qdx, qdy, qsh, qsw; > + bool is_user_map = false; > + u32 *user_map; > + > + sw = ctx->q_data[V4L2_M2M_SRC].width; > + dw = ctx->q_data[V4L2_M2M_DST].width; > + sh = ctx->q_data[V4L2_M2M_SRC].height; > + dh = ctx->q_data[V4L2_M2M_DST].height; > + > + mw = dw100_get_n_vertices_from_length(dw); > + mh = dw100_get_n_vertices_from_length(dh); > + > + qdx = dw_map_convert_to_UQ12_4(sw) / (mw - 1); > + qdy = dw_map_convert_to_UQ12_4(sh) / (mh - 1); These two lines can be moved just before the for loop as you don't need those variables when the user has supplied a mapping. You can then write them qdx = qsw / (mw - 1); qdy = qsh / (mh - 1); > + qsh = dw_map_convert_to_UQ12_4(sh); > + qsw = dw_map_convert_to_UQ12_4(sw); Swap the last two lines. > + > + if (ctx->map) > + dma_free_coherent(&ctx->dw_dev->pdev->dev, > + ctx->map_size, > + ctx->map, > + ctx->map_dma); This holds on two liens. > + > + ctx->map_width = mw; > + ctx->map_height = mh; > + ctx->map_size = mh * mw * sizeof(u32); > + > + ctx->map = dma_alloc_coherent(&ctx->dw_dev->pdev->dev, > + ctx->map_size, > + &ctx->map_dma, > + GFP_KERNEL); Same here. > + > + if (!ctx->map) > + return -ENOMEM; > + > + user_map = dw100_get_user_map(ctx); > + if (user_map) { > + is_user_map = true; > + memcpy(ctx->map, user_map, ctx->map_size); > + goto out; > + } > + > + for (i = 0, qy = 0, qx = 0; i < mh; i++, qy += qdy, qx = 0) { > + if (qy > qsh) > + qy = qsh; Can this happen ? > + for (j = 0; j < mw; j++, qx += qdx) { for (j = 0, qx = 0; j < mw; j++, qx += qdx) { and drop qw handling from the outer loop. > + if (qx > qsw) > + qx = qsw; > + ctx->map[i * mw + j] = dw_map_format_coordinates(qx, qy); This could be made a bit more efficient by declaring a u32 *map; variable, setting it to map = ctx->map; just before the loop, and writing *map++ = dw_map_format_coordinates(qx, qy); here. > + } > + } > +out: > + dprintk(1, ctx->dw_dev, > + "%dx%d %s mapping created (d:%pa-c:%p) for stream %dx%d->%dx%d\n", %u instead of %d for unsigned integers. > + mw, mh, is_user_map ? "user" : "identity", > + &ctx->map_dma, ctx->map, sw, sh, dw, dh); > + > + return 0; > +} > + > +static const struct v4l2_ctrl_config controls[] = { > + [DW100_CTRL_DEWARPING_MAP] = { > + .id = V4L2_CID_DW100_DEWARPING_16x16_VERTEX_MAP, > + .name = "Look-Up Table", > + .type = V4L2_CTRL_TYPE_U32, > + .min = 0x00000000, > + .max = 0xffffffff, > + .step = 1, > + .def = 0, > + .dims = { DW100_MAX_LUT_NELEMS }, > + .flags = V4L2_CTRL_FLAG_DYNAMIC_ARRAY, > + }, > +}; > + > +#define NUM_CTRLS ARRAY_SIZE(controls) > + > +static int dw100_queue_setup(struct vb2_queue *vq, > + unsigned int *nbuffers, unsigned int *nplanes, > + unsigned int sizes[], struct device *alloc_devs[]) > +{ > + struct dw100_ctx *ctx = vb2_get_drv_priv(vq); > + struct dw100_q_data *q_data; > + unsigned int size, count = *nbuffers; > + > + q_data = get_q_data(ctx, vq->type); > + > + size = q_data->sizeimage; > + > + *nbuffers = count; > + > + if (*nplanes) > + return sizes[0] < size ? -EINVAL : 0; > + > + *nplanes = 1; > + sizes[0] = size; > + > + dprintk(1, ctx->dw_dev, "Queue %p: get %d buffer(s) of size %d each.\n", > + vq, count, size); I think I'd drop this message, or have you found it useful to debug application issues ? > + > + return 0; > +} > + > +static int dw100_buf_prepare(struct vb2_buffer *vb) > +{ > + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); > + struct dw100_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); > + struct dw100_device *dw_dev = ctx->dw_dev; > + struct dw100_q_data *q_data; > + > + dprintk(1, dw_dev, "Queue %p: Preparing buffer %p, type: %d\n", > + vb->vb2_queue, vb, vb->vb2_queue->type); Same here. > + > + q_data = get_q_data(ctx, vb->vb2_queue->type); > + if (V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type)) { > + if (vbuf->field == V4L2_FIELD_ANY) This can't happen, https://linuxtv.org/downloads/v4l-dvb-apis/userspace-api/v4l/field-order.html#c.v4l2_field states "struct v4l2_buffer field can never be V4L2_FIELD_ANY". > + vbuf->field = V4L2_FIELD_NONE; > + if (vbuf->field != V4L2_FIELD_NONE) { > + v4l2_err(&dw_dev->v4l2_dev, "%x field isn't supported\n", > + vbuf->field); dev_dbg() > + return -EINVAL; > + } > + } > + > + if (vb2_plane_size(vb, 0) < q_data->sizeimage) { > + v4l2_err(&dw_dev->v4l2_dev, > + "%s data will not fit into plane (%lu < %lu)\n", > + __func__, vb2_plane_size(vb, 0), > + (long)q_data->sizeimage); dev_dbg() > + return -EINVAL; > + } > + > + vb2_set_plane_payload(vb, 0, q_data->sizeimage); > + > + return 0; > +} > + > +static void dw100_buf_queue(struct vb2_buffer *vb) > +{ > + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); > + struct dw100_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); > + > + dprintk(2, ctx->dw_dev, "Queue %p: Queuing buffer %p.\n", > + vb->vb2_queue, vb); > + v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); > +} > + > +static int dw100_start_streaming(struct vb2_queue *q, unsigned int count) > +{ > + struct dw100_ctx *ctx = vb2_get_drv_priv(q); > + struct dw100_q_data *q_data = get_q_data(ctx, q->type); > + > + dprintk(1, ctx->dw_dev, "Queue %p: Start Streaming.\n", q); > + > + q_data->sequence = 0; > + > + return pm_runtime_resume_and_get(&ctx->dw_dev->pdev->dev); > +} > + > +static void dw100_stop_streaming(struct vb2_queue *q) > +{ > + struct dw100_ctx *ctx = vb2_get_drv_priv(q); > + struct vb2_v4l2_buffer *vbuf; > + unsigned long flags; > + > + dprintk(1, ctx->dw_dev, "Queue %p: Stop Streaming.\n", q); > + for (;;) { > + if (V4L2_TYPE_IS_OUTPUT(q->type)) > + vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); > + else > + vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); > + if (!vbuf) > + break; > + spin_lock_irqsave(&ctx->dw_dev->irqlock, flags); I don't think you need the lock here, as this can't race with the IRQ, can it ? > + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR); > + spin_unlock_irqrestore(&ctx->dw_dev->irqlock, flags); > + } > + > + pm_runtime_put_sync(&ctx->dw_dev->pdev->dev); > + > + if (ctx->map) { > + dma_free_coherent(&ctx->dw_dev->pdev->dev, > + ctx->map_size, > + ctx->map, > + ctx->map_dma); > + ctx->map = NULL; There's a mismatch between start and stop, could we create the mapping in dw100_start_streaming() instead of dw100_start() ? > + } > +} > + > +static const struct vb2_ops dw100_qops = { > + .queue_setup = dw100_queue_setup, > + .buf_prepare = dw100_buf_prepare, > + .buf_queue = dw100_buf_queue, > + .start_streaming = dw100_start_streaming, > + .stop_streaming = dw100_stop_streaming, > + .wait_prepare = vb2_ops_wait_prepare, > + .wait_finish = vb2_ops_wait_finish, > +}; > + > +static int dw100_m2m_queue_init(void *priv, struct vb2_queue *src_vq, > + struct vb2_queue *dst_vq) > +{ > + struct dw100_ctx *ctx = priv; > + int ret; > + > + src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; > + src_vq->io_modes = VB2_MMAP | VB2_DMABUF; > + src_vq->drv_priv = ctx; > + src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); > + src_vq->ops = &dw100_qops; > + src_vq->mem_ops = &vb2_dma_contig_memops; > + src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; > + src_vq->lock = &ctx->dw_dev->vfd_mutex; This will serialize queue operations across all contexts. Wouldn't it be better to have a per-context lock ? > + src_vq->dev = ctx->dw_dev->v4l2_dev.dev; > + > + ret = vb2_queue_init(src_vq); > + if (ret) > + return ret; > + > + dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; > + dst_vq->io_modes = VB2_MMAP | VB2_DMABUF; > + dst_vq->drv_priv = ctx; > + dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); > + dst_vq->ops = &dw100_qops; > + dst_vq->mem_ops = &vb2_dma_contig_memops; > + dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; > + dst_vq->lock = &ctx->dw_dev->vfd_mutex; > + dst_vq->dev = ctx->dw_dev->v4l2_dev.dev; > + > + return vb2_queue_init(dst_vq); > +} > + > +static int dw100_open(struct file *file) > +{ > + struct dw100_device *dw_dev = video_drvdata(file); > + struct dw100_ctx *ctx; > + struct v4l2_ctrl_handler *hdl; > + int ret = 0, i; > + > + if (mutex_lock_interruptible(&dw_dev->vfd_mutex)) > + return -ERESTARTSYS; > + > + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); > + if (!ctx) { > + ret = -ENOMEM; > + goto open_unlock; > + } This can be moved before the mutex_lock call (with a return -ENOMEM instead of the goto). Actually I don't think you need the lock at all in this function. > + > + v4l2_fh_init(&ctx->fh, video_devdata(file)); > + file->private_data = &ctx->fh; > + ctx->dw_dev = dw_dev; > + > + hdl = &ctx->hdl; > + v4l2_ctrl_handler_init(hdl, NUM_CTRLS); > + for (i = 0; i < NUM_CTRLS; i++) { > + ctx->ctrls[i] = v4l2_ctrl_new_custom(hdl, &controls[i], NULL); > + if (hdl->error) { > + v4l2_err(&ctx->dw_dev->v4l2_dev, > + "Adding control (%d) failed\n", i); > + ret = hdl->error; > + v4l2_ctrl_handler_free(hdl); > + v4l2_fh_exit(&ctx->fh); > + kfree(ctx); > + goto open_unlock; > + } > + } > + ctx->fh.ctrl_handler = hdl; > + > + ctx->q_data[V4L2_M2M_SRC].fmt = &formats[0]; > + ctx->q_data[V4L2_M2M_SRC].width = 640; > + ctx->q_data[V4L2_M2M_SRC].height = 480; > + ctx->q_data[V4L2_M2M_SRC].bytesperline = > + dw100_bytesperline(&formats[0], 640); > + ctx->q_data[V4L2_M2M_SRC].sizeimage = > + dw100_sizeimage(&formats[0], 640, 480); > + > + ctx->q_data[V4L2_M2M_SRC].crop.top = 0; > + ctx->q_data[V4L2_M2M_SRC].crop.left = 0; > + ctx->q_data[V4L2_M2M_SRC].crop.width = 640; > + ctx->q_data[V4L2_M2M_SRC].crop.height = 480; > + > + ctx->q_data[V4L2_M2M_DST] = ctx->q_data[V4L2_M2M_SRC]; > + > + ctx->colorspace = V4L2_COLORSPACE_REC709; > + ctx->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(ctx->colorspace); > + ctx->ycbcr_enc = V4L2_MAP_YCBCR_ENC_DEFAULT(ctx->colorspace); > + ctx->quant = V4L2_MAP_QUANTIZATION_DEFAULT(false, > + ctx->colorspace, > + ctx->ycbcr_enc); > + > + ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dw_dev->m2m_dev, > + ctx, &dw100_m2m_queue_init); > + > + if (IS_ERR(ctx->fh.m2m_ctx)) { > + ret = PTR_ERR(ctx->fh.m2m_ctx); > + > + v4l2_ctrl_handler_free(hdl); > + v4l2_fh_exit(&ctx->fh); > + kfree(ctx); > + goto open_unlock; > + } > + > + v4l2_fh_add(&ctx->fh); > + > + dprintk(1, dw_dev, "M2M instance created: %p", ctx->fh.m2m_ctx); I'd drop this message too. > + > +open_unlock: > + mutex_unlock(&dw_dev->vfd_mutex); > + return ret; > +} > + > +static int dw100_release(struct file *file) > +{ > + struct dw100_device *dw_dev = video_drvdata(file); > + struct dw100_ctx *ctx = file2ctx(file); > + > + dprintk(1, dw_dev, "Releasing M2M instance: %p", ctx->fh.m2m_ctx); > + > + v4l2_fh_del(&ctx->fh); > + v4l2_fh_exit(&ctx->fh); > + v4l2_ctrl_handler_free(&ctx->hdl); > + > + mutex_lock(&dw_dev->vfd_mutex); > + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); > + mutex_unlock(&dw_dev->vfd_mutex); > + > + if (ctx->map) > + dma_free_coherent(&ctx->dw_dev->pdev->dev, > + ctx->map_size, > + ctx->map, > + ctx->map_dma); Can this happen, doesn't the M2M framework guarantee that streaming is stopped, which then guarantees that the .stop_streaming() handler has freed the mapping ? > + > + kfree(ctx); > + > + return 0; > +} > + > +static const struct v4l2_file_operations dw100_fops = { > + .owner = THIS_MODULE, > + .open = dw100_open, > + .release = dw100_release, > + .poll = v4l2_m2m_fop_poll, > + .unlocked_ioctl = video_ioctl2, > + .mmap = v4l2_m2m_fop_mmap, > +}; > + > +static int dw100_querycap(struct file *file, void *priv, > + struct v4l2_capability *cap) > +{ > + strscpy(cap->driver, DRV_NAME, sizeof(cap->driver)); > + strscpy(cap->card, "DW100 dewarper", sizeof(cap->card)); > + snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s", DRV_NAME); Not needed anymore, the core handles it in v4l_querycap() (and the value here is incorrect, it should use the device name, not hardcode the driver name). > + > + return 0; > +} > + > +static int dw100_enum_fmt(struct v4l2_fmtdesc *f) > +{ > + int i, num = 0; > + > + for (i = 0; i < NUM_FORMATS; i++) { > + if (formats[i].types & to_dw100_fmt_type(f->type)) { > + if (num == f->index) { > + f->pixelformat = formats[i].fourcc; > + return 0; > + } > + ++num; > + } > + } > + > + return -EINVAL; > +} > + > +static int dw100_enum_fmt_vid_cap(struct file *file, void *priv, > + struct v4l2_fmtdesc *f) > +{ > + return dw100_enum_fmt(f); > +} > + > +static int dw100_enum_fmt_vid_out(struct file *file, void *priv, > + struct v4l2_fmtdesc *f) > +{ > + return dw100_enum_fmt(f); > +} You can merge those three functions into a single one. > + > +static int dw100_enum_framesizes(struct file *file, void *priv, > + struct v4l2_frmsizeenum *fsize) > +{ > + struct dw100_fmt *fmt; > + > + if (fsize->index) > + return -EINVAL; > + > + fmt = find_pixel_format(fsize->pixel_format, > + V4L2_M2M_OUTPUT | V4L2_M2M_CAPTURE); I think you should restrict the format type based on fsize->type. > + if (!fmt) > + return -EINVAL; > + > + fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; > + fsize->stepwise.min_width = MIN_W; > + fsize->stepwise.min_height = MIN_H; > + fsize->stepwise.max_width = MAX_W; > + fsize->stepwise.max_height = MAX_H; > + fsize->stepwise.step_width = (1UL << ALIGN_W); > + fsize->stepwise.step_height = (1UL << ALIGN_H); No need for the parentheses. > + > + return 0; > +} > + > +static int dw100_g_fmt(struct dw100_ctx *ctx, struct v4l2_format *f) > +{ > + struct vb2_queue *vq; > + struct dw100_q_data *q_data; > + > + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); > + if (!vq) > + return -EINVAL; > + > + q_data = get_q_data(ctx, f->type); > + > + f->fmt.pix.width = q_data->width; > + f->fmt.pix.height = q_data->height; > + f->fmt.pix.field = V4L2_FIELD_NONE; > + f->fmt.pix.pixelformat = q_data->fmt->fourcc; > + f->fmt.pix.bytesperline = q_data->bytesperline; > + f->fmt.pix.sizeimage = q_data->sizeimage; > + f->fmt.pix.colorspace = ctx->colorspace; > + f->fmt.pix.xfer_func = ctx->xfer_func; > + f->fmt.pix.ycbcr_enc = ctx->ycbcr_enc; > + f->fmt.pix.quantization = ctx->quant; > + > + return 0; > +} > + > +static int dw100_g_fmt_vid_out(struct file *file, void *priv, > + struct v4l2_format *f) > +{ > + return dw100_g_fmt(file2ctx(file), f); > +} > + > +static int dw100_g_fmt_vid_cap(struct file *file, void *priv, > + struct v4l2_format *f) > +{ > + return dw100_g_fmt(file2ctx(file), f); > +} You can also merge those three functions into a single one. > + > +static int dw100_try_fmt(struct v4l2_format *f, struct dw100_fmt *fmt) > +{ > + v4l_bound_align_image(&f->fmt.pix.width, MIN_W, MAX_W, ALIGN_W, > + &f->fmt.pix.height, MIN_H, MAX_H, ALIGN_H, 0); > + > + f->fmt.pix.bytesperline = dw100_bytesperline(fmt, f->fmt.pix.width); > + f->fmt.pix.sizeimage = dw100_sizeimage(fmt, f->fmt.pix.width, > + f->fmt.pix.height); No need to support a custom line stride ? > + f->fmt.pix.field = V4L2_FIELD_NONE; > + > + return 0; > +} > + > +static int dw100_s_fmt(struct dw100_ctx *ctx, struct v4l2_format *f) > +{ > + struct dw100_q_data *q_data; > + struct vb2_queue *vq; > + > + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); > + if (!vq) > + return -EINVAL; > + > + q_data = get_q_data(ctx, f->type); > + if (!q_data) > + return -EINVAL; > + > + if (vb2_is_busy(vq)) { > + v4l2_err(&ctx->dw_dev->v4l2_dev, "%s queue busy\n", __func__); > + return -EBUSY; > + } > + > + q_data->fmt = find_format(f); > + q_data->width = f->fmt.pix.width; > + q_data->height = f->fmt.pix.height; > + q_data->bytesperline = f->fmt.pix.bytesperline; > + q_data->sizeimage = f->fmt.pix.sizeimage; It could be easier to store an instance of v4l2_pix_format in dw100_q_data. > + > + q_data->crop.top = 0; > + q_data->crop.left = 0; > + q_data->crop.width = f->fmt.pix.width; > + q_data->crop.height = f->fmt.pix.height; > + > + dprintk(1, ctx->dw_dev, > + "Setting format for type %d, wxh: %dx%d, fmt: %d\n", > + f->type, q_data->width, q_data->height, q_data->fmt->fourcc); > + > + return 0; > +} > + > +static int dw100_try_fmt_vid_cap(struct file *file, void *priv, > + struct v4l2_format *f) > +{ > + struct dw100_fmt *fmt; > + struct dw100_ctx *ctx = file2ctx(file); > + > + if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) > + return -EINVAL; > + > + fmt = find_format(f); > + if (!fmt) { > + f->fmt.pix.pixelformat = formats[0].fourcc; > + fmt = find_format(f); > + } > + > + f->fmt.pix.colorspace = ctx->colorspace; > + f->fmt.pix.xfer_func = ctx->xfer_func; > + f->fmt.pix.ycbcr_enc = ctx->ycbcr_enc; > + f->fmt.pix.quantization = ctx->quant; > + > + return dw100_try_fmt(f, fmt); > +} > + > +static int dw100_s_fmt_vid_cap(struct file *file, void *priv, > + struct v4l2_format *f) > +{ > + struct dw100_ctx *ctx = file2ctx(file); > + int ret; > + > + ret = dw100_try_fmt_vid_cap(file, priv, f); > + if (ret) > + return ret; > + > + ret = dw100_s_fmt(ctx, f); > + if (ret) > + return ret; > + > + return 0; > +} > + > +static int dw100_try_fmt_vid_out(struct file *file, void *priv, > + struct v4l2_format *f) > +{ > + struct dw100_fmt *fmt; > + > + if (f->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) > + return -EINVAL; > + > + fmt = find_format(f); > + if (!fmt) { > + f->fmt.pix.pixelformat = formats[0].fourcc; > + fmt = find_format(f); > + } > + > + if (!f->fmt.pix.colorspace) > + f->fmt.pix.colorspace = V4L2_COLORSPACE_REC709; > + > + return dw100_try_fmt(f, fmt); > +} > + > +static int dw100_s_fmt_vid_out(struct file *file, void *priv, > + struct v4l2_format *f) > +{ > + struct dw100_ctx *ctx = file2ctx(file); > + int ret; > + > + ret = dw100_try_fmt_vid_out(file, priv, f); > + if (ret) > + return ret; > + > + ret = dw100_s_fmt(ctx, f); > + if (ret) > + return ret; > + > + ctx->colorspace = f->fmt.pix.colorspace; > + ctx->xfer_func = f->fmt.pix.xfer_func; > + ctx->ycbcr_enc = f->fmt.pix.ycbcr_enc; > + ctx->quant = f->fmt.pix.quantization; > + > + return 0; > +} > + > +static int dw100_g_selection(struct file *file, void *fh, > + struct v4l2_selection *sel) > +{ > + struct dw100_ctx *ctx = file2ctx(file); > + struct dw100_q_data *src_q_data, *dst_q_data; > + > + if (sel->type != V4L2_BUF_TYPE_VIDEO_OUTPUT && > + sel->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) > + return -EINVAL; > + > + src_q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); > + dst_q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); > + > + switch (sel->target) { > + case V4L2_SEL_TGT_CROP_DEFAULT: > + case V4L2_SEL_TGT_CROP_BOUNDS: > + sel->r.top = 0; > + sel->r.left = 0; > + sel->r.width = src_q_data->width; > + sel->r.height = src_q_data->height; > + break; > + case V4L2_SEL_TGT_CROP: > + sel->r.top = src_q_data->crop.top; > + sel->r.left = src_q_data->crop.left; > + sel->r.width = src_q_data->crop.width; > + sel->r.height = src_q_data->crop.height; > + break; > + case V4L2_SEL_TGT_COMPOSE_DEFAULT: > + case V4L2_SEL_TGT_COMPOSE_BOUNDS: > + case V4L2_SEL_TGT_COMPOSE: > + sel->r.top = 0; > + sel->r.left = 0; > + sel->r.width = dst_q_data->width; > + sel->r.height = dst_q_data->height; > + break; As far as I understand the driver doesn't support composition, maybe you could just return -EINVAL ? > + default: > + return -EINVAL; > + } > + > + dprintk(1, ctx->dw_dev, > + "<<< Buffer Type: %d Target: %d Rect: %dx%d@%d.%d\n", > + sel->type, sel->target, > + sel->r.width, sel->r.height, sel->r.left, sel->r.top); > + > + return 0; > +} > + > +static int dw100_s_selection(struct file *file, void *fh, Replace tab with space. > + struct v4l2_selection *sel) > +{ > + struct dw100_ctx *ctx = file2ctx(file); > + struct dw100_q_data *src_q_data, *dst_q_data; > + u32 qscalex, qscaley, qscale; > + int x, y, w, h; > + > + if (sel->type != V4L2_BUF_TYPE_VIDEO_OUTPUT && > + sel->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) > + return -EINVAL; > + > + src_q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT); > + dst_q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE); > + > + dprintk(1, ctx->dw_dev, > + ">>> Buffer Type: %d Target: %d Rect: %dx%d@%d.%d\n", > + sel->type, sel->target, > + sel->r.width, sel->r.height, sel->r.left, sel->r.top); > + > + if (sel->r.top < 0 || sel->r.left < 0) > + return -EINVAL; Clamp the top and left values to the supported range, .s_selection() must adjust unsupported values, not return an error. > + > + switch (sel->target) { > + case V4L2_SEL_TGT_CROP: > + /* UQ16.16 for float operations */ > + if ((sel->r.left + sel->r.width > src_q_data->width) || > + (sel->r.top + sel->r.height > src_q_data->height)) > + return -EINVAL; Same. > + qscalex = (sel->r.width << 16) / src_q_data->width; > + qscaley = (sel->r.height << 16) / src_q_data->height; > + y = sel->r.top; > + x = sel->r.left; > + if (qscalex == qscaley) { > + qscale = qscalex; > + } else { > + switch (sel->flags) { > + case 0: > + qscale = (qscalex + qscaley) / 2; > + break; > + case V4L2_SEL_FLAG_GE: > + qscale = max(qscaley, qscalex); > + break; > + case V4L2_SEL_FLAG_LE: > + qscale = min(qscaley, qscalex); > + break; > + case V4L2_SEL_FLAG_LE | V4L2_SEL_FLAG_GE: > + return -ERANGE; > + default: > + return -EINVAL; > + } > + } > + > + w = (u32)((((u64)src_q_data->width << 16) * qscale) >> 32); > + h = (u32)((((u64)src_q_data->height << 16) * qscale) >> 32); > + x = x + (sel->r.width - w) / 2; > + y = y + (sel->r.height - h) / 2; > + x = min(src_q_data->width - w, (unsigned int)max(0, x)); > + y = min(src_q_data->height - h, (unsigned int)max(0, y)); > + > + sel->r.top = y; > + sel->r.left = x; > + sel->r.width = w; > + sel->r.height = h; > + > + src_q_data->crop.top = sel->r.top; > + src_q_data->crop.left = sel->r.left; > + src_q_data->crop.width = sel->r.width; > + src_q_data->crop.height = sel->r.height; > + break; > + case V4L2_SEL_TGT_COMPOSE: > + if ((sel->r.left + sel->r.width > dst_q_data->width) || > + (sel->r.top + sel->r.height > dst_q_data->height)) > + return -EINVAL; > + sel->r.top = 0; > + sel->r.left = 0; > + sel->r.width = dst_q_data->width; > + sel->r.height = dst_q_data->height; > + break; > + case V4L2_SEL_TGT_CROP_DEFAULT: > + case V4L2_SEL_TGT_CROP_BOUNDS: > + case V4L2_SEL_TGT_COMPOSE_DEFAULT: > + case V4L2_SEL_TGT_COMPOSE_BOUNDS: > + default: > + return -EINVAL; > + } > + > + dprintk(1, ctx->dw_dev, > + "<<< Buffer Type: %d Target: %d Rect: %dx%d@%d.%d\n", > + sel->type, sel->target, > + sel->r.width, sel->r.height, sel->r.left, sel->r.top); > + > + return 0; > +} > + > +static const struct v4l2_ioctl_ops dw100_ioctl_ops = { > + .vidioc_querycap = dw100_querycap, > + > + .vidioc_enum_fmt_vid_cap = dw100_enum_fmt_vid_cap, > + .vidioc_enum_framesizes = dw100_enum_framesizes, > + .vidioc_g_fmt_vid_cap = dw100_g_fmt_vid_cap, > + .vidioc_try_fmt_vid_cap = dw100_try_fmt_vid_cap, > + .vidioc_s_fmt_vid_cap = dw100_s_fmt_vid_cap, > + > + .vidioc_enum_fmt_vid_out = dw100_enum_fmt_vid_out, > + .vidioc_g_fmt_vid_out = dw100_g_fmt_vid_out, > + .vidioc_try_fmt_vid_out = dw100_try_fmt_vid_out, > + .vidioc_s_fmt_vid_out = dw100_s_fmt_vid_out, > + > + .vidioc_g_selection = dw100_g_selection, > + .vidioc_s_selection = dw100_s_selection, > + .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, > + .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, > + .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, > + .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, > + .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, > + .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, > + .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, > + > + .vidioc_streamon = v4l2_m2m_ioctl_streamon, > + .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, > + > + .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, > + .vidioc_unsubscribe_event = v4l2_event_unsubscribe, > +}; > + > +static void dw100_job_finish(struct dw100_device *dw_dev, bool with_error) > +{ > + struct dw100_ctx *curr_ctx; > + struct vb2_v4l2_buffer *src_vb, *dst_vb; > + unsigned long flags; > + enum vb2_buffer_state buf_state; > + > + curr_ctx = v4l2_m2m_get_curr_priv(dw_dev->m2m_dev); > + > + if (!curr_ctx) { > + v4l2_err(&dw_dev->v4l2_dev, > + "Instance released before the end of transaction\n"); > + return; > + } > + > + src_vb = v4l2_m2m_src_buf_remove(curr_ctx->fh.m2m_ctx); > + dst_vb = v4l2_m2m_dst_buf_remove(curr_ctx->fh.m2m_ctx); > + > + if (likely(!with_error)) > + buf_state = VB2_BUF_STATE_DONE; > + else > + buf_state = VB2_BUF_STATE_ERROR; > + > + spin_lock_irqsave(&dw_dev->irqlock, flags); > + v4l2_m2m_buf_done(src_vb, buf_state); > + v4l2_m2m_buf_done(dst_vb, buf_state); > + spin_unlock_irqrestore(&dw_dev->irqlock, flags); And the lock could be droppped from here if it's dropped from dw100_stop_streaming(). > + > + dprintk(2, dw_dev, "Finishing transaction with%s error(s)\n", > + with_error ? "" : "out"); > + > + v4l2_m2m_job_finish(dw_dev->m2m_dev, curr_ctx->fh.m2m_ctx); > +} > + > +static void dw100_hw_reset(struct dw100_device *dw_dev) > +{ > + u32 val; > + > + val = readl(dw_dev->mmio + DW100_DEWARP_CTRL); > + val |= DW100_DEWARP_CTRL_ENABLE; > + val |= DW100_DEWARP_CTRL_SOFT_RESET; > + writel(val, dw_dev->mmio + DW100_DEWARP_CTRL); > + val &= ~DW100_DEWARP_CTRL_SOFT_RESET; > + writel(val, dw_dev->mmio + DW100_DEWARP_CTRL); > +} > + > +static void _dw100_hw_set_master_bus_enable(struct dw100_device *dw_dev, > + unsigned int enable) > +{ > + u32 val; > + void __iomem *addr = dw_dev->mmio + DW100_BUS_CTRL; > + > + dprintk(3, dw_dev, "%sable master bus\n", enable ? "En" : "Dis"); > + > + val = readl(addr); > + > + if (enable) > + val |= DW100_BUS_CTRL_AXI_MASTER_ENABLE; > + else > + val &= ~DW100_BUS_CTRL_AXI_MASTER_ENABLE; > + > + writel(val, addr); > +} > + > +static void dw100_hw_master_bus_enable(struct dw100_device *dw_dev) > +{ > + _dw100_hw_set_master_bus_enable(dw_dev, 1); > +} > + > +static void dw100_hw_master_bus_disable(struct dw100_device *dw_dev) > +{ > + _dw100_hw_set_master_bus_enable(dw_dev, 0); > +} > + > +static void dw100_hw_dewarp_start(struct dw100_device *dw_dev) > +{ > + u32 val; > + void __iomem *addr = dw_dev->mmio + DW100_DEWARP_CTRL; > + > + val = readl(addr); > + > + dprintk(3, dw_dev, "Starting Hardware CTRL:%x\n", val); > + writel(val | DW100_DEWARP_CTRL_START, addr); > + writel(val, addr); > +} > + > +static void dw100_hw_init_ctrl(struct dw100_device *dw_dev) > +{ > + u32 val; > + void __iomem *addr = dw_dev->mmio + DW100_DEWARP_CTRL; > + /* > + * Input format YUV422_SP > + * Output format YUV422_SP > + * No hardware handshake (SW) > + * No automatic double src buffering (Single) > + * No automatic double dst buffering (Single) > + * No Black Line > + * Prefetch image pixel traversal > + */ > + > + val = DW100_DEWARP_CTRL_ENABLE > + /* Valid only for auto prefetch mode*/ > + | DW100_DEWARP_CTRL_PREFETCH_THRESHOLD(32); Align | below the =. > + > + /* > + * Calculation mode required to support any scaling factor, > + * but x4 slower than traversal mode. > + * > + * DW100_DEWARP_CTRL_PREFETCH_MODE_TRAVERSAL > + * DW100_DEWARP_CTRL_PREFETCH_MODE_CALCULATION > + * DW100_DEWARP_CTRL_PREFETCH_MODE_AUTO > + * > + * TODO: Find heuristics requiring calculation mode > + * Extra blank line. > + */ > + val |= DW100_DEWARP_CTRL_PREFETCH_MODE_CALCULATION; > + > + writel(val, addr); > +} > + > +static void dw100_hw_set_pixel_boundary(struct dw100_device *dw_dev) > +{ > + u32 val; > + void __iomem *addr = dw_dev->mmio + DW100_BOUNDARY_PIXEL; > + > + val = DW100_BOUNDARY_PIXEL_V(128) > + | DW100_BOUNDARY_PIXEL_U(128) > + | DW100_BOUNDARY_PIXEL_Y(0); > + > + writel(val, addr); > +} > + > +static void dw100_hw_set_scale(struct dw100_device *dw_dev, u8 scale) > +{ > + void __iomem *addr = dw_dev->mmio + DW100_SCALE_FACTOR; > + > + dprintk(1, dw_dev, "Setting scale factor to %d\n", scale); > + > + writel(scale, addr); > +} > + > +static void dw100_hw_set_roi(struct dw100_device *dw_dev, u32 x, u32 y) > +{ > + u32 val; > + void __iomem *addr = dw_dev->mmio + DW100_ROI_START; > + > + dprintk(1, dw_dev, "Setting ROI region to %d.%d\n", x, y); > + > + val = DW100_ROI_START_X(x) | DW100_ROI_START_Y(y); > + > + writel(val, addr); > +} > + > +static void dw100_hw_set_src_crop(struct dw100_device *dw_dev, > + struct dw100_q_data *src_q_data, > + struct dw100_q_data *dst_q_data) const > +{ > + struct v4l2_rect *rect = &src_q_data->crop; > + u32 src_scale, qscale, left_scale, top_scale; > + > + /* HW Scale is UQ1.7 encoded */ > + src_scale = (rect->width << 7) / src_q_data->width; > + dw100_hw_set_scale(dw_dev, src_scale); > + > + qscale = (dst_q_data->width << 7) / src_q_data->width; > + > + left_scale = (((rect->left << 7) * qscale) >> 14); > + top_scale = (((rect->top << 7) * qscale) >> 14); No need for the outer parentheses. > + > + dw100_hw_set_roi(dw_dev, left_scale, top_scale); > +} > + > +static void dw100_hw_set_source(struct dw100_device *dw_dev, > + struct dw100_q_data *q_data, > + dma_addr_t addr) > +{ > + u32 width, height, stride, fourcc, val; > + struct dw100_fmt *fmt = q_data->fmt; > + > + width = q_data->width; > + height = q_data->height; > + stride = q_data->bytesperline; > + fourcc = q_data->fmt->fourcc; > + > + dprintk(3, dw_dev, "Set HW source registers for %dx%d - stride %d, pixfmt: %x, dma:%pa\n", > + width, height, stride, fourcc, &addr); > + > + /* Pixel Format */ > + val = readl(dw_dev->mmio + DW100_DEWARP_CTRL); > + > + val &= ~DW100_DEWARP_CTRL_INPUT_FORMAT_MASK; > + val |= DW100_DEWARP_CTRL_INPUT_FORMAT(fmt->reg_format); > + > + writel(val, dw_dev->mmio + DW100_DEWARP_CTRL); > + > + /* Swap */ > + val = readl(dw_dev->mmio + DW100_SWAP_CONTROL); > + > + val &= ~DW100_SWAP_CONTROL_SRC_MASK; > + /* > + * Data swapping is performed only on Y plane for source image. > + */ > + if (fmt->reg_swap_uv && > + fmt->reg_format == DW100_DEWARP_CTRL_FORMAT_YUV422_PACKED) > + val |= DW100_SWAP_CONTROL_SRC(DW100_SWAP_CONTROL_Y > + (DW100_SWAP_CONTROL_BYTE)); > + > + writel(val, dw_dev->mmio + DW100_SWAP_CONTROL); > + > + /* Image resolution */ > + writel(DW100_IMG_SIZE_WIDTH(width) | DW100_IMG_SIZE_HEIGHT(height), > + dw_dev->mmio + DW100_SRC_IMG_SIZE); > + > + writel(stride, dw_dev->mmio + DW100_SRC_IMG_STRIDE); > + > + /* Buffers */ > + writel(DW100_IMG_Y_BASE(addr), dw_dev->mmio + DW100_SRC_IMG_Y_BASE); > + writel(DW100_IMG_UV_BASE((addr + (stride * height))), > + dw_dev->mmio + DW100_SRC_IMG_UV_BASE); As the hardware supports non-contiguous planes, we should use the mplane API already, even if we support the single planar formats only for now. > +} > + > +static void dw100_hw_set_destination(struct dw100_device *dw_dev, > + struct dw100_q_data *q_data, > + struct dw100_fmt *ifmt, > + dma_addr_t addr) > +{ > + u32 width, height, stride, fourcc, val; > + struct dw100_fmt *fmt = q_data->fmt; > + > + width = q_data->width; > + height = q_data->height; > + stride = q_data->bytesperline; > + fourcc = q_data->fmt->fourcc; > + > + dprintk(3, dw_dev, "Set HW source registers for %dx%d - stride %d, pixfmt: %x, dma:%pa\n", > + width, height, stride, fourcc, &addr); > + > + /* Pixel Format */ > + val = readl(dw_dev->mmio + DW100_DEWARP_CTRL); > + > + val &= ~DW100_DEWARP_CTRL_OUTPUT_FORMAT_MASK; > + val |= DW100_DEWARP_CTRL_OUTPUT_FORMAT(fmt->reg_format); > + > + writel(val, dw_dev->mmio + DW100_DEWARP_CTRL); > + > + /* Swap */ > + val = readl(dw_dev->mmio + DW100_SWAP_CONTROL); > + > + val &= ~DW100_SWAP_CONTROL_DST_MASK; > + > + /* > + * Avoid to swap twice > + */ > + if (fmt->reg_swap_uv ^ > + (ifmt->reg_swap_uv && ifmt->reg_format != > + DW100_DEWARP_CTRL_FORMAT_YUV422_PACKED)) { > + if (fmt->reg_format == DW100_DEWARP_CTRL_FORMAT_YUV422_PACKED) > + val |= DW100_SWAP_CONTROL_DST(DW100_SWAP_CONTROL_Y > + (DW100_SWAP_CONTROL_BYTE)); > + else > + val |= DW100_SWAP_CONTROL_DST(DW100_SWAP_CONTROL_UV > + (DW100_SWAP_CONTROL_BYTE)); > + } > + > + writel(val, dw_dev->mmio + DW100_SWAP_CONTROL); > + > + /* Image resolution */ > + writel(DW100_IMG_SIZE_WIDTH(width) | DW100_IMG_SIZE_HEIGHT(height), > + dw_dev->mmio + DW100_DST_IMG_SIZE); > + writel(stride, dw_dev->mmio + DW100_DST_IMG_STRIDE); > + > + val = ALIGN(stride * height, 16); > + writel(DW100_IMG_Y_BASE(addr), dw_dev->mmio + DW100_DST_IMG_Y_BASE); > + writel(DW100_IMG_UV_BASE(addr + val), > + dw_dev->mmio + DW100_DST_IMG_UV_BASE); > + writel(DW100_DST_IMG_Y_SIZE(val), dw_dev->mmio + DW100_DST_IMG_Y_SIZE1); > + > + if (fmt->reg_format == DW100_DEWARP_CTRL_FORMAT_YUV420_SP) > + val /= 2; > + > + writel(DW100_DST_IMG_UV_SIZE(val), > + dw_dev->mmio + DW100_DST_IMG_UV_SIZE1); > +} > + > +static void dw100_hw_set_mapping(struct dw100_device *dw_dev, dma_addr_t addr, > + u32 width, u32 height) > +{ > + dprintk(1, dw_dev, "Set HW mapping registers for %dx%d addr:%pa", width, > + height, &addr); > + > + writel(DW100_MAP_LUT_ADDR_ADDR(addr), dw_dev->mmio + DW100_MAP_LUT_ADDR); > + writel(DW100_MAP_LUT_SIZE_WIDTH(width) > + | DW100_MAP_LUT_SIZE_HEIGHT(height), > + dw_dev->mmio + DW100_MAP_LUT_SIZE); > +} > + > +static void dw100_hw_clear_irq(struct dw100_device *dw_dev, unsigned int irq) > +{ > + writel(DW100_INTERRUPT_STATUS_INT_CLEAR(irq), > + dw_dev->mmio + DW100_INTERRUPT_STATUS); > +} > + > +static void dw100_hw_enable_irq(struct dw100_device *dw_dev) > +{ > + writel(DW100_INTERRUPT_STATUS_INT_ENABLE_MASK, > + dw_dev->mmio + DW100_INTERRUPT_STATUS); > +} > + > +static void dw100_hw_disable_irq(struct dw100_device *dw_dev) > +{ > + writel(0, dw_dev->mmio + DW100_INTERRUPT_STATUS); > +} > + > +static bool dw100_hw_is_frame_running(struct dw100_device *dw_dev) > +{ > + bool is_running = readl(dw_dev->mmio + DW100_INTERRUPT_STATUS) > + & DW100_INTERRUPT_STATUS_FRAME_BUSY; > + > + return is_running; > +} > + > +static u32 dw_hw_get_pending_irqs(struct dw100_device *dw_dev) > +{ > + u32 val; > + > + val = readl(dw_dev->mmio + DW100_INTERRUPT_STATUS); > + > + return DW100_INTERRUPT_STATUS_INT_STATUS(val); > +} > + > +static irqreturn_t dw100_irq_handler(int irq, void *dev_id) > +{ > + struct dw100_device *dw_dev = dev_id; > + u32 pending_irqs, err_irqs, frame_done_irq; > + bool with_error = true; > + > + pending_irqs = dw_hw_get_pending_irqs(dw_dev); > + frame_done_irq = pending_irqs & DW100_INTERRUPT_STATUS_INT_FRAME_DONE; > + err_irqs = DW100_INTERRUPT_STATUS_INT_ERR_STATUS(pending_irqs); > + > + if (frame_done_irq) { > + dprintk(3, dw_dev, "Frame done interrupt\n"); > + with_error = false; > + err_irqs &= ~DW100_INTERRUPT_STATUS_INT_ERR_STATUS > + (DW100_INTERRUPT_STATUS_INT_ERR_FRAME_DONE); > + } > + > + if (err_irqs) > + v4l2_err(&dw_dev->v4l2_dev, "Interrupt error: %#x\n", err_irqs); > + > + dw100_hw_disable_irq(dw_dev); > + dw100_hw_master_bus_disable(dw_dev); > + dw100_hw_clear_irq(dw_dev, pending_irqs | > + DW100_INTERRUPT_STATUS_INT_ERR_TIME_OUT); > + > + dw100_job_finish(dw_dev, with_error); > + > + return IRQ_HANDLED; > +} > + > +static void dw100_start(struct dw100_ctx *ctx, struct vb2_v4l2_buffer *in_vb, > + struct vb2_v4l2_buffer *out_vb) > +{ > + struct dw100_device *dw_dev = ctx->dw_dev; > + dma_addr_t p_in, p_out; > + > + p_in = vb2_dma_contig_plane_dma_addr(&in_vb->vb2_buf, 0); > + p_out = vb2_dma_contig_plane_dma_addr(&out_vb->vb2_buf, 0); > + if (!p_in || !p_out) { > + v4l2_err(&dw_dev->v4l2_dev, > + "Acquiring DMA addresses of buffers failed\n"); > + return; > + } > + > + out_vb->sequence = > + get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE)->sequence++; > + in_vb->sequence = > + get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT)->sequence++; > + > + dprintk(1, ctx->dw_dev, > + "Starting queues %p->%p buffers d:%pa->d:%pa, sequence %d->%d\n", > + v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT), > + v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE), > + &p_in, &p_out, in_vb->sequence, out_vb->sequence); > + > + out_vb->vb2_buf.timestamp = in_vb->vb2_buf.timestamp; > + out_vb->field = in_vb->field; > + if (in_vb->flags & V4L2_BUF_FLAG_TIMECODE) > + out_vb->timecode = in_vb->timecode; > + out_vb->flags = in_vb->flags & > + (V4L2_BUF_FLAG_TIMECODE | > + V4L2_BUF_FLAG_KEYFRAME | > + V4L2_BUF_FLAG_PFRAME | > + V4L2_BUF_FLAG_BFRAME | > + V4L2_BUF_FLAG_TIMESTAMP_MASK | > + V4L2_BUF_FLAG_TSTAMP_SRC_MASK); v4l2_m2m_buf_copy_metadata() will handle this for you. I think you can call it here, or in dw100_job_finish(). > + > + /* Now, let's deal with hardware ... */ > + dw100_hw_master_bus_disable(dw_dev); > + if (!ctx->map) > + dw100_create_mapping(ctx); > + > + dw100_hw_init_ctrl(dw_dev); > + dw100_hw_set_pixel_boundary(dw_dev); > + dw100_hw_set_src_crop(dw_dev, &ctx->q_data[V4L2_M2M_SRC], > + &ctx->q_data[V4L2_M2M_DST]); > + dw100_hw_set_source(dw_dev, &ctx->q_data[V4L2_M2M_SRC], p_in); > + dw100_hw_set_destination(dw_dev, &ctx->q_data[V4L2_M2M_DST], > + ctx->q_data[V4L2_M2M_SRC].fmt, p_out); > + dw100_hw_set_mapping(dw_dev, ctx->map_dma, > + ctx->map_width, ctx->map_height); > + dw100_hw_enable_irq(dw_dev); > + dw100_hw_dewarp_start(dw_dev); > + > + /* Enable Bus */ > + dw100_hw_master_bus_enable(dw_dev); > +} > + > +static void dw100_device_run(void *priv) > +{ > + struct dw100_ctx *ctx = priv; > + struct vb2_v4l2_buffer *src_buf, *dst_buf; > + > + src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); > + dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); > + > + dw100_start(ctx, src_buf, dst_buf); > +} > + > +static int dw100_job_ready(void *priv) > +{ > + struct dw100_ctx *ctx = priv; > + > + if (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) < 1 || > + v4l2_m2m_num_dst_bufs_ready(ctx->fh.m2m_ctx) < 1) { > + dprintk(1, ctx->dw_dev, "Not enough buffers available\n"); > + return 0; > + } > + > + if (dw100_hw_is_frame_running(ctx->dw_dev)) { > + dprintk(1, ctx->dw_dev, "HW still running a frame\n"); > + return 0; > + } > + > + return 1; > +} As far as I understand you could drop this function, as the default implementation when not provided is to assume that one source buffer and one destination buffer are all that is required. > + > +static const struct v4l2_m2m_ops dw100_m2m_ops = { > + .device_run = dw100_device_run, > + .job_ready = dw100_job_ready, > +}; > + > +#ifdef CONFIG_MEDIA_CONTROLLER > +static const struct media_device_ops dw100_media_ops = { > + .req_validate = vb2_request_validate, > + .req_queue = v4l2_m2m_request_queue, > +}; Do we need the request API ? I think you can drop this. > +#endif > + > +static struct video_device *dw100_init_video_device(struct dw100_device *dw_dev) > +{ > + struct video_device *vfd = &dw_dev->vfd; > + > + vfd->vfl_dir = VFL_DIR_M2M; > + vfd->fops = &dw100_fops; > + vfd->device_caps = V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING; > + vfd->ioctl_ops = &dw100_ioctl_ops; > + vfd->minor = -1; > + vfd->release = video_device_release_empty; > + vfd->v4l2_dev = &dw_dev->v4l2_dev; > + vfd->lock = &dw_dev->vfd_mutex; > + > + strscpy(vfd->name, DRV_NAME, sizeof(vfd->name)); > + mutex_init(vfd->lock); Missing corresponding mutex_destroy(). > + video_set_drvdata(vfd, dw_dev); > + > + return vfd; > +} > + > +static int dw100_dump_regs_show(struct seq_file *m, void *private) > +{ > + struct dw100_device *dw_dev = m->private; > + int ret; > + > + ret = pm_runtime_resume_and_get(&dw_dev->pdev->dev); > + if (ret < 0) > + return ret; > + > + ret = dw100_dump_regs(dw_dev); I'd inline the function here as there's a single caller. > + > + pm_runtime_put_sync(&dw_dev->pdev->dev); > + > + return ret; > +} > +DEFINE_SHOW_ATTRIBUTE(dw100_dump_regs); > + > +static void dw100_debugfs_init(struct dw100_device *dw_dev) > +{ > + dw_dev->debugfs_root = > + debugfs_create_dir(dev_name(&dw_dev->pdev->dev), NULL); > + > + debugfs_create_file("dump_regs", 0600, dw_dev->debugfs_root, dw_dev, > + &dw100_dump_regs_fops); > +} > + > +static void dw100_debugfs_exit(struct dw100_device *dw_dev) > +{ > + debugfs_remove_recursive(dw_dev->debugfs_root); > +} > + > +static int dw100_probe(struct platform_device *pdev) > +{ > + struct dw100_device *dw_dev; > + struct video_device *vfd; > + struct resource *res; > + int ret = 0; No need to initialize ret to 0. > + int irq; > + > + dw_dev = devm_kzalloc(&pdev->dev, sizeof(*dw_dev), GFP_KERNEL); > + if (!dw_dev) > + return -ENOMEM; > + dw_dev->pdev = pdev; > + > + ret = devm_clk_bulk_get_all(&pdev->dev, &dw_dev->clks); > + if (ret < 0) { > + dev_err(&pdev->dev, "Unable to get clocks: %d\n", ret); > + return ret; > + } > + dw_dev->num_clks = ret; > + > + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + dw_dev->mmio = devm_ioremap_resource(&pdev->dev, res); > + if (IS_ERR(dw_dev->mmio)) > + return PTR_ERR(dw_dev->mmio); > + > + irq = platform_get_irq(pdev, 0); > + if (irq < 0) > + return irq; > + > + ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, dw100_irq_handler, Why do we need a threaded IRQ handler ? > + IRQF_ONESHOT, dev_name(&pdev->dev), > + dw_dev); > + if (ret < 0) { > + dev_err(&pdev->dev, "Failed to request irq: %d\n", ret); > + return ret; > + } I'm always a bit worried when requesting an IRQ before we ensure the device won't generate an interrupt. > + > + platform_set_drvdata(pdev, dw_dev); > + > + pm_runtime_enable(&pdev->dev); > + ret = pm_runtime_resume_and_get(&pdev->dev); > + if (ret < 0) { > + dev_err(&pdev->dev, "Unable to enable clks: %d\n", ret); > + goto err_clk; > + } > + > + dw100_hw_reset(dw_dev); > + > + pm_runtime_put_sync(&pdev->dev); I wonder if the device should be reset every time runtime PM resumes. The dw100_hw_reset() call could move to a runtime PM resume handler, and this could then be dropped. > + > + spin_lock_init(&dw_dev->irqlock); > + > + ret = v4l2_device_register(&pdev->dev, &dw_dev->v4l2_dev); > + if (ret) > + goto err_clk; > + > + vfd = dw100_init_video_device(dw_dev); > + > + dw_dev->m2m_dev = v4l2_m2m_init(&dw100_m2m_ops); > + if (IS_ERR(dw_dev->m2m_dev)) { > + v4l2_err(&dw_dev->v4l2_dev, "Failed to init mem2mem device\n"); > + ret = PTR_ERR(dw_dev->m2m_dev); > + goto err_v4l2; > + } > + > +#ifdef CONFIG_MEDIA_CONTROLLER Let's make this unconditional as the MC API is needed by libcamera to enumerate media devices. > + dw_dev->mdev.dev = &pdev->dev; > + strscpy(dw_dev->mdev.model, "dw100", sizeof(dw_dev->mdev.model)); > + strscpy(dw_dev->mdev.bus_info, "platform:dw100", > + sizeof(dw_dev->mdev.bus_info)); No needed anymore either, it's handled by the core. > + media_device_init(&dw_dev->mdev); > + dw_dev->mdev.ops = &dw100_media_ops; > + dw_dev->v4l2_dev.mdev = &dw_dev->mdev; > +#endif > + > + ret = video_register_device(vfd, VFL_TYPE_VIDEO, -1); > + if (ret) { > + v4l2_err(&dw_dev->v4l2_dev, "Failed to register video device\n"); > + goto err_m2m; > + } > + > +#ifdef CONFIG_MEDIA_CONTROLLER > + ret = v4l2_m2m_register_media_controller(dw_dev->m2m_dev, vfd, > + MEDIA_ENT_F_PROC_VIDEO_SCALER); > + if (ret) { > + v4l2_err(&dw_dev->v4l2_dev, "Failed to init mem2mem media controller\n"); I'd use dev_err() (and dev_info(), ...) through the driver. > + goto error_v4l2; > + } > + > + ret = media_device_register(&dw_dev->mdev); > + if (ret) { > + v4l2_err(&dw_dev->v4l2_dev, "Failed to register mem2mem media device\n"); > + goto error_m2m_mc; > + } > +#endif > + > + dw100_debugfs_init(dw_dev); > + > + v4l2_info(&dw_dev->v4l2_dev, > + "dw100 v4l2 m2m registered as /dev/video%d\n", vfd->num); > + > + return 0; A blank line would be nice. > +#ifdef CONFIG_MEDIA_CONTROLLER > +error_m2m_mc: > + v4l2_m2m_unregister_media_controller(dw_dev->m2m_dev); > +error_v4l2: > + video_unregister_device(vfd); > +#endif > +err_m2m: > + v4l2_m2m_release(dw_dev->m2m_dev); > +err_v4l2: > + v4l2_device_unregister(&dw_dev->v4l2_dev); > +err_clk: > + pm_runtime_disable(&pdev->dev); > + > + return ret; > +} > + > +static int dw100_remove(struct platform_device *pdev) > +{ > + struct dw100_device *dw_dev = platform_get_drvdata(pdev); > + > + dw100_debugfs_exit(dw_dev); > + > + pm_runtime_disable(&pdev->dev); > + > +#ifdef CONFIG_MEDIA_CONTROLLER > + media_device_unregister(&dw_dev->mdev); > + v4l2_m2m_unregister_media_controller(dw_dev->m2m_dev); > + media_device_cleanup(&dw_dev->mdev); > +#endif > + > + video_unregister_device(&dw_dev->vfd); > + v4l2_m2m_release(dw_dev->m2m_dev); > + v4l2_device_unregister(&dw_dev->v4l2_dev); > + > + return 0; > +} > + > +static int __maybe_unused dw100_runtime_suspend(struct device *dev) > +{ > + struct dw100_device *dw_dev = dev_get_drvdata(dev); > + > + clk_bulk_disable_unprepare(dw_dev->num_clks, dw_dev->clks); > + > + return 0; > +} > + > +static int __maybe_unused dw100_runtime_resume(struct device *dev) > +{ > + struct dw100_device *dw_dev = dev_get_drvdata(dev); > + > + return clk_bulk_prepare_enable(dw_dev->num_clks, dw_dev->clks); > +} > + > +static const struct dev_pm_ops dw100_pm = { > + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, > + pm_runtime_force_resume) > + SET_RUNTIME_PM_OPS(dw100_runtime_suspend, > + dw100_runtime_resume, NULL) > +}; > + > +static const struct of_device_id dw100_dt_ids[] = { > + { .compatible = "nxp,imx8mp-dw100", .data = NULL }, > + { }, > +}; > +MODULE_DEVICE_TABLE(of, dw100_dt_ids); > + > +static struct platform_driver dw100_driver = { > + .probe = dw100_probe, > + .remove = dw100_remove, > + .driver = { > + .name = DRV_NAME, > + .pm = &dw100_pm, > + .of_match_table = dw100_dt_ids, > + }, > +}; > + > +module_platform_driver(dw100_driver); > + > +MODULE_DESCRIPTION("DW100 Hardware dewarper"); > +MODULE_AUTHOR("Xavier Roumegue <Xavier.Roumegue@xxxxxxxxxxx>"); > +MODULE_LICENSE("GPL"); > diff --git a/drivers/media/platform/nxp/dw100/dw100_regs.h b/drivers/media/platform/nxp/dw100/dw100_regs.h > new file mode 100644 > index 000000000000..c540f6c638db > --- /dev/null > +++ b/drivers/media/platform/nxp/dw100/dw100_regs.h > @@ -0,0 +1,118 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > +/* > + * DW100 Hardware dewarper > + * > + * Copyright 2022 NXP > + * Author: Xavier Roumegue (xavier.roumegue@xxxxxxxxxxx) > + * Extra blank line. > + */ > + > +#ifndef _DW100_REGS_H_ > +#define _DW100_REGS_H_ > + > +/* AHB register offset */ > +#define DW100_DEWARP_ID 0x00 > +#define DW100_DEWARP_CTRL 0x04 > +#define DW100_DEWARP_CTRL_ENABLE BIT(0) > +#define DW100_DEWARP_CTRL_START BIT(1) > +#define DW100_DEWARP_CTRL_SOFT_RESET BIT(2) > +#define DW100_DEWARP_CTRL_FORMAT_YUV422_SP (0UL) No need for parentheses. Same below where applicable. > +#define DW100_DEWARP_CTRL_FORMAT_YUV422_PACKED (1UL) > +#define DW100_DEWARP_CTRL_FORMAT_YUV420_SP (2UL) > +#define DW100_DEWARP_CTRL_INPUT_FORMAT_MASK GENMASK(5, 4) > +#define DW100_DEWARP_CTRL_INPUT_FORMAT(x) ((x) << 4) > +#define DW100_DEWARP_CTRL_OUTPUT_FORMAT(x) ((x) << 6) > +#define DW100_DEWARP_CTRL_OUTPUT_FORMAT_MASK GENMASK(7, 6) > +#define DW100_DEWARP_CTRL_SRC_AUTO_SHADOW BIT(8) > +#define DW100_DEWARP_CTRL_HW_HANDSHAKE BIT(9) > +#define DW100_DEWARP_CTRL_DST_AUTO_SHADOW BIT(10) > +#define DW100_DEWARP_CTRL_SPLIT_LINE BIT(11) > +#define DW100_DEWARP_CTRL_PREFETCH_MODE_MASK GENMASK(17, 16) > +#define DW100_DEWARP_CTRL_PREFETCH_MODE_TRAVERSAL (0UL << 16) > +#define DW100_DEWARP_CTRL_PREFETCH_MODE_CALCULATION (1UL << 16) > +#define DW100_DEWARP_CTRL_PREFETCH_MODE_AUTO (2UL << 16) > +#define DW100_DEWARP_CTRL_PREFETCH_THRESHOLD_MASK GENMASK(24, 18) > +#define DW100_DEWARP_CTRL_PREFETCH_THRESHOLD(x) ((x) << 18) > + > +#define DW100_MAP_LUT_ADDR 0x08 > +#define DW100_MAP_LUT_ADDR_ADDR(addr) (((addr) >> 4) & GENMASK(29, 0)) > +#define DW100_MAP_LUT_SIZE 0x0C I usually ask for lower-case hex constants as that's what is often done in V4L2, but maybe it's too much nit-picking :-) > +#define DW100_MAP_LUT_SIZE_WIDTH(w) (((w) & GENMASK(10, 0)) << 0) > +#define DW100_MAP_LUT_SIZE_HEIGHT(h) (((h) & GENMASK(10, 0)) << 16) > +#define DW100_SRC_IMG_Y_BASE 0x10 > +#define DW100_IMG_Y_BASE(base) (((base) >> 4) & GENMASK(29, 0)) > +#define DW100_SRC_IMG_UV_BASE 0x14 > +#define DW100_IMG_UV_BASE(base) (((base) >> 4) & GENMASK(29, 0)) > +#define DW100_SRC_IMG_SIZE 0x18 > +#define DW100_IMG_SIZE_WIDTH(w) (((w) & GENMASK(12, 0)) << 0) > +#define DW100_IMG_SIZE_HEIGHT(h) (((h) & GENMASK(12, 0)) << 16) > + > +#define DW100_SRC_IMG_STRIDE 0x1C > +#define DW100_MAP_LUT_ADDR2 0x20 > +#define DW100_MAP_LUT_SIZE2 0x24 > +#define DW100_SRC_IMG_Y_BASE2 0x28 > +#define DW100_SRC_IMG_UV_BASE2 0x2C > +#define DW100_SRC_IMG_SIZE2 0x30 > +#define DW100_SRC_IMG_STRIDE2 0x34 > +#define DW100_DST_IMG_Y_BASE 0x38 > +#define DW100_DST_IMG_UV_BASE 0x3C > +#define DW100_DST_IMG_SIZE 0x40 > +#define DW100_DST_IMG_STRIDE 0x44 > +#define DW100_DST_IMG_Y_BASE2 0x48 > +#define DW100_DST_IMG_UV_BASE2 0x4C > +#define DW100_DST_IMG_SIZE2 0x50 > +#define DW100_DST_IMG_STRIDE2 0x54 > +#define DW100_SWAP_CONTROL 0x58 > +#define DW100_SWAP_CONTROL_BYTE BIT(0) > +#define DW100_SWAP_CONTROL_SHORT BIT(1) > +#define DW100_SWAP_CONTROL_WORD BIT(2) > +#define DW100_SWAP_CONTROL_LONG BIT(3) > +#define DW100_SWAP_CONTROL_Y(x) (((x) & GENMASK(3, 0)) << 0) > +#define DW100_SWAP_CONTROL_UV(x) (((x) & GENMASK(3, 0)) << 4) > +#define DW100_SWAP_CONTROL_SRC(x) (((x) & GENMASK(7, 0)) << 0) > +#define DW100_SWAP_CONTROL_DST(x) (((x) & GENMASK(7, 0)) << 8) > +#define DW100_SWAP_CONTROL_SRC2(x) (((x) & GENMASK(7, 0)) << 16) > +#define DW100_SWAP_CONTROL_DST2(x) (((x) & GENMASK(7, 0)) << 24) > +#define DW100_SWAP_CONTROL_SRC_MASK GENMASK(7, 0) > +#define DW100_SWAP_CONTROL_DST_MASK GENMASK(15, 8) > +#define DW100_SWAP_CONTROL_SRC2_MASK GENMASK(23, 16) > +#define DW100_SWAP_CONTROL_DST2_MASK GENMASK(31, 24) > +#define DW100_VERTICAL_SPLIT_LINE 0x5C > +#define DW100_HORIZON_SPLIT_LINE 0x60 > +#define DW100_SCALE_FACTOR 0x64 > +#define DW100_ROI_START 0x68 > +#define DW100_ROI_START_X(x) (((x) & GENMASK(12, 0)) << 0) > +#define DW100_ROI_START_Y(y) (((y) & GENMASK(12, 0)) << 16) > +#define DW100_BOUNDARY_PIXEL 0x6C > +#define DW100_BOUNDARY_PIXEL_V(v) (((v) & GENMASK(7, 0)) << 0) > +#define DW100_BOUNDARY_PIXEL_U(u) (((u) & GENMASK(7, 0)) << 8) > +#define DW100_BOUNDARY_PIXEL_Y(y) (((y) & GENMASK(7, 0)) << 16) > + > +#define DW100_INTERRUPT_STATUS 0x70 > +#define DW100_INTERRUPT_STATUS_INT_FRAME_DONE BIT(0) > +#define DW100_INTERRUPT_STATUS_INT_ERR_TIME_OUT BIT(1) > +#define DW100_INTERRUPT_STATUS_INT_ERR_AXI_RESP BIT(2) > +#define DW100_INTERRUPT_STATUS_INT_ERR_X BIT(3) > +#define DW100_INTERRUPT_STATUS_INT_ERR_MB_FETCH BIT(4) > +#define DW100_INTERRUPT_STATUS_INT_ERR_FRAME2 BIT(5) > +#define DW100_INTERRUPT_STATUS_INT_ERR_FRAME3 BIT(6) > +#define DW100_INTERRUPT_STATUS_INT_ERR_FRAME_DONE BIT(7) > +#define DW100_INTERRUPT_STATUS_INT_ERR_STATUS(x) (((x) >> 1) & 0x7f) > +#define DW100_INTERRUPT_STATUS_INT_STATUS(x) ((x) & 0xff) > + > +#define DW100_INTERRUPT_STATUS_INT_ENABLE_MASK GENMASK(15, 8) > +#define DW100_INTERRUPT_STATUS_INT_ENABLE(x) (((x) & GENMASK(7, 0)) << 8) > +#define DW100_INTERRUPT_STATUS_FRAME_BUSY BIT(16) > +#define DW100_INTERRUPT_STATUS_INT_CLEAR(x) (((x) & GENMASK(7, 0)) << 24) > +#define DW100_BUS_CTRL 0x74 > +#define DW100_BUS_CTRL_AXI_MASTER_ENABLE BIT(31) > +#define DW100_BUS_CTRL1 0x78 > +#define DW100_BUS_TIME_OUT_CYCLE 0x7C > +#define DW100_DST_IMG_Y_SIZE1 0x80 > +#define DW100_DST_IMG_Y_SIZE(sz) (((sz) >> 4) & GENMASK(29, 0)) > +#define DW100_DST_IMG_UV_SIZE(sz) (((sz) >> 4) & GENMASK(29, 0)) > +#define DW100_DST_IMG_UV_SIZE1 0x84 > +#define DW100_DST_IMG_Y_SIZE2 0x88 > +#define DW100_DST_IMG_UV_SIZE2 0x8C > + > +#endif /* _DW100_REGS_H_ */ -- Regards, Laurent Pinchart