Hi Jacopo, Thank you for the patch. On Tue, Aug 27, 2024 at 09:40:17AM +0200, Jacopo Mondi wrote: > Currently the 'pispbe_schedule()' function does two things: > > 1) Tries to assemble a job by inspecting all the video node queues > to make sure all the required buffers are available > 2) Submit the job to the hardware > > The pispbe_schedule() function is called at: > > - video device start_streaming() time > - video device qbuf() time > - irq handler > > As assembling a job requires inspecting all queues, it is a rather > time consuming operation which is better not run in IRQ context. > > To avoid the executing the time consuming job creation in interrupt > context split the job creation and job scheduling in two distinct > operations. When a well-formed job is created, append it to the > newly introduced 'pispbe->job_queue' where it will be dequeued from > by the scheduling routine. > > At start_streaming() and qbuf() time immediately try to schedule a job > if one has been created as the irq handler routing is only called when > a job has completed, and we can't solely rely on it for scheduling new > jobs. > > Signed-off-by: Jacopo Mondi <jacopo.mondi@xxxxxxxxxxxxxxxx> > --- > .../platform/raspberrypi/pisp_be/pisp_be.c | 137 +++++++++++------- > 1 file changed, 83 insertions(+), 54 deletions(-) > > diff --git a/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c b/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c > index 73a5c88e25d0..f42541bb4827 100644 > --- a/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c > +++ b/drivers/media/platform/raspberrypi/pisp_be/pisp_be.c > @@ -190,6 +190,8 @@ struct pispbe_hw_enables { > > /* Records a job configuration and memory addresses. */ > struct pispbe_job_descriptor { > + struct list_head queue; > + struct pispbe_buffer *buffers[PISPBE_NUM_NODES]; > dma_addr_t hw_dma_addrs[N_HW_ADDRESSES]; > struct pisp_be_tiles_config *config; A candidate for a separate patch, I think storage for pisp_be_tiles_config should be moved from pisbbe_dev.config to pispbe_buffer, like we do for the rkisp1 driver. The memory would be allocated at buffer allocation time (for config node buffers only, of course). > struct pispbe_hw_enables hw_enables; > @@ -215,8 +217,10 @@ struct pispbe_dev { > unsigned int sequence; > u32 streaming_map; > struct pispbe_job queued_job, running_job; > - spinlock_t hw_lock; /* protects "hw_busy" flag and streaming_map */ > + /* protects "hw_busy" flag, streaming_map and job_queue*/ s/job_queue/job_queue / I don't think streaming_map needs to be protected by the spinlock anymore. It is now accessed only from the qbuf, start_streaming and stop_streaming operations, which are all protected by the vb2 queue mutex... scratch that, we use a different mutex for different video devices. > + spinlock_t hw_lock; > bool hw_busy; /* non-zero if a job is queued or is being started */ > + struct list_head job_queue; > int irq; > u32 hw_version; > u8 done, started; > @@ -440,41 +444,50 @@ static void pispbe_xlate_addrs(struct pispbe_dev *pispbe, > * For Output0, Output1, Tdn and Stitch, a buffer only needs to be > * available if the blocks are enabled in the config. > * > - * Needs to be called with hw_lock held. > + * If all the buffers required to form a job are available, append the > + * job descriptor to the job queue to be later queued to the HW. > * > * Returns 0 if a job has been successfully prepared, < 0 otherwise. > */ > -static int pispbe_prepare_job(struct pispbe_dev *pispbe, > - struct pispbe_job_descriptor *job) > +static int pispbe_prepare_job(struct pispbe_dev *pispbe) > { > struct pispbe_buffer *buf[PISPBE_NUM_NODES] = {}; > + struct pispbe_job_descriptor *job; > + unsigned int streaming_map; > unsigned int config_index; > struct pispbe_node *node; > - unsigned long flags; > > - lockdep_assert_held(&pispbe->hw_lock); > + scoped_guard(spinlock_irqsave, &pispbe->hw_lock) { Do you need the irqsave version ? > + static const u32 mask = BIT(CONFIG_NODE) | BIT(MAIN_INPUT_NODE); > > - memset(job, 0, sizeof(struct pispbe_job_descriptor)); > + if ((pispbe->streaming_map & mask) != mask) > + return -ENODEV; > > - if (((BIT(CONFIG_NODE) | BIT(MAIN_INPUT_NODE)) & > - pispbe->streaming_map) != > - (BIT(CONFIG_NODE) | BIT(MAIN_INPUT_NODE))) > - return -ENODEV; > + /* > + * Take a copy of streaming_map: nodes activated after this > + * point are ignored when preparing this job. > + */ > + streaming_map = pispbe->streaming_map; > + } > + > + job = kzalloc(sizeof(*job), GFP_KERNEL); > + if (!job) > + return -ENOMEM; > > node = &pispbe->node[CONFIG_NODE]; > - spin_lock_irqsave(&node->ready_lock, flags); > - buf[CONFIG_NODE] = list_first_entry_or_null(&node->ready_queue, > - struct pispbe_buffer, > - ready_list); > - if (buf[CONFIG_NODE]) { > + > + scoped_guard(spinlock_irqsave, &node->ready_lock) { Same here. > + buf[CONFIG_NODE] = list_first_entry_or_null(&node->ready_queue, > + struct pispbe_buffer, > + ready_list); > + if (!buf[CONFIG_NODE]) { > + kfree(job); > + return -ENODEV; > + } > + > list_del(&buf[CONFIG_NODE]->ready_list); > - pispbe->queued_job.buf[CONFIG_NODE] = buf[CONFIG_NODE]; > + job->buffers[CONFIG_NODE] = buf[CONFIG_NODE]; > } > - spin_unlock_irqrestore(&node->ready_lock, flags); > - > - /* Exit early if no config buffer has been queued. */ > - if (!buf[CONFIG_NODE]) > - return -ENODEV; > > config_index = buf[CONFIG_NODE]->vb.vb2_buf.index; > job->config = &pispbe->config[config_index]; > @@ -495,7 +508,7 @@ static int pispbe_prepare_job(struct pispbe_dev *pispbe, > continue; > > buf[i] = NULL; > - if (!(pispbe->streaming_map & BIT(i))) > + if (!(streaming_map & BIT(i))) > continue; > > if ((!(rgb_en & PISP_BE_RGB_ENABLE_OUTPUT0) && > @@ -522,25 +535,27 @@ static int pispbe_prepare_job(struct pispbe_dev *pispbe, > node = &pispbe->node[i]; > > /* Pull a buffer from each V4L2 queue to form the queued job */ > - spin_lock_irqsave(&node->ready_lock, flags); > + spin_lock(&node->ready_lock); > buf[i] = list_first_entry_or_null(&node->ready_queue, > struct pispbe_buffer, > ready_list); > if (buf[i]) { > list_del(&buf[i]->ready_list); > - pispbe->queued_job.buf[i] = buf[i]; > + job->buffers[i] = buf[i]; > } > - spin_unlock_irqrestore(&node->ready_lock, flags); > + spin_unlock(&node->ready_lock); > > if (!buf[i] && !ignore_buffers) The ignore_buffers logic seems weird to me. I don't understand why we take buffers out of the ready_queue for the nodes that are not used by this job. The problem isn't related to this patch, so it can be fixed separately. > goto err_return_buffers; > } > > - pispbe->queued_job.valid = true; > - > /* Convert buffers to DMA addresses for the hardware */ > pispbe_xlate_addrs(pispbe, job, buf); > > + spin_lock(&pispbe->hw_lock); > + list_add_tail(&job->queue, &pispbe->job_queue); > + spin_unlock(&pispbe->hw_lock); > + > return 0; > > err_return_buffers: > @@ -551,33 +566,41 @@ static int pispbe_prepare_job(struct pispbe_dev *pispbe, > continue; > > /* Return the buffer to the ready_list queue */ > - spin_lock_irqsave(&n->ready_lock, flags); > + spin_lock(&n->ready_lock); > list_add(&buf[i]->ready_list, &n->ready_queue); > - spin_unlock_irqrestore(&n->ready_lock, flags); > + spin_unlock(&n->ready_lock); Should pispbe_node_buffer_queue() and pispbe_node_stop_streaming() switch away from spin_lock_irqsave() too then ? Actually, can't we drop the lock completely, now that ready_queue is only accessed from vb2 ops all protected by the vb2 queue lock ? > } > > - memset(&pispbe->queued_job, 0, sizeof(pispbe->queued_job)); > + kfree(job); > > return -ENODEV; > } > > static void pispbe_schedule(struct pispbe_dev *pispbe, bool clear_hw_busy) > { > - struct pispbe_job_descriptor job; > - unsigned long flags; > - int ret; > + struct pispbe_job_descriptor *job; > > - spin_lock_irqsave(&pispbe->hw_lock, flags); > + scoped_guard(spinlock_irqsave, &pispbe->hw_lock) { > + if (clear_hw_busy) > + pispbe->hw_busy = false; > > - if (clear_hw_busy) > - pispbe->hw_busy = false; > + if (pispbe->hw_busy) > + return; > > - if (pispbe->hw_busy) > - goto unlock_and_return; > + job = list_first_entry_or_null(&pispbe->job_queue, > + struct pispbe_job_descriptor, > + queue); > + if (!job) > + return; > > - ret = pispbe_prepare_job(pispbe, &job); > - if (ret) > - goto unlock_and_return; > + list_del(&job->queue); > + > + for (unsigned int i = 0; i < PISPBE_NUM_NODES; i++) > + pispbe->queued_job.buf[i] = job->buffers[i]; > + pispbe->queued_job.valid = true; Ideally this should be replaced with a pointer to the job descriptor, but it can be done in a second step. Ideally also, this should go to pispbe_queue_job(). I however don't understand the logic that handles queued_job in pispbe_isr() and why the interrupt handler can complete the queued job in some circumstances, so I'm not sure how it would interact with moving this code to the pispbe_queue_job() function. > + > + pispbe->hw_busy = true; > + } > > /* > * We can kick the job off without the hw_lock, as this can > @@ -585,16 +608,8 @@ static void pispbe_schedule(struct pispbe_dev *pispbe, bool clear_hw_busy) > * only when the following job has been queued and an interrupt > * is rised. > */ > - pispbe->hw_busy = true; > - spin_unlock_irqrestore(&pispbe->hw_lock, flags); > - > - pispbe_queue_job(pispbe, &job); > - > - return; > - > -unlock_and_return: > - /* No job has been queued, just release the lock and return. */ > - spin_unlock_irqrestore(&pispbe->hw_lock, flags); > + pispbe_queue_job(pispbe, job); > + kfree(job); > } > > static void pispbe_isr_jobdone(struct pispbe_dev *pispbe, > @@ -857,7 +872,8 @@ static void pispbe_node_buffer_queue(struct vb2_buffer *buf) > * Every time we add a buffer, check if there's now some work for the hw > * to do. > */ > - pispbe_schedule(pispbe, false); > + if (!pispbe_prepare_job(pispbe)) > + pispbe_schedule(pispbe, false); > } > > static int pispbe_node_start_streaming(struct vb2_queue *q, unsigned int count) > @@ -883,7 +899,8 @@ static int pispbe_node_start_streaming(struct vb2_queue *q, unsigned int count) > node->pispbe->streaming_map); > > /* Maybe we're ready to run. */ > - pispbe_schedule(pispbe, false); > + if (!pispbe_prepare_job(pispbe)) > + pispbe_schedule(pispbe, false); > > return 0; > > @@ -935,6 +952,16 @@ static void pispbe_node_stop_streaming(struct vb2_queue *q) > > spin_lock_irqsave(&pispbe->hw_lock, flags); > pispbe->streaming_map &= ~BIT(node->id); > + > + /* Release all jobs once all nodes have stopped streaming. */ > + if (pispbe->streaming_map == 0) { > + struct pispbe_job_descriptor *job, *temp; > + > + list_for_each_entry_safe(job, temp, &pispbe->job_queue, queue) { > + list_del(&job->queue); > + kfree(job); > + } > + } > spin_unlock_irqrestore(&pispbe->hw_lock, flags); > > pm_runtime_mark_last_busy(pispbe->dev); > @@ -1677,6 +1704,8 @@ static int pispbe_probe(struct platform_device *pdev) > if (!pispbe) > return -ENOMEM; > > + INIT_LIST_HEAD(&pispbe->job_queue); > + > dev_set_drvdata(&pdev->dev, pispbe); > pispbe->dev = &pdev->dev; > platform_set_drvdata(pdev, pispbe); -- Regards, Laurent Pinchart