On 12/12, Maíra Canal wrote: > `bo->seqno`, `bo->write_seqno`, and `exec->bin_dep_seqno` are leftovers > from a time when VC4 didn't support DMA Reservation Objects. When they > were introduced, it made sense to think about tracking the correspondence > between the BOs and the jobs through the job's seqno. > > This is no longer needed, as VC4 already has support for DMA Reservation > Objects and attaches the "job done" fence to the BOs. > > Signed-off-by: Maíra Canal <mcanal@xxxxxxxxxx> > --- > drivers/gpu/drm/vc4/vc4_crtc.c | 22 +++++++++++----------- > drivers/gpu/drm/vc4/vc4_drv.h | 13 ------------- > drivers/gpu/drm/vc4/vc4_gem.c | 18 ++---------------- > drivers/gpu/drm/vc4/vc4_validate.c | 11 ----------- > 4 files changed, 13 insertions(+), 51 deletions(-) > > diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c > index cf40a53ad42e..3a5b5743cb2f 100644 > --- a/drivers/gpu/drm/vc4/vc4_crtc.c > +++ b/drivers/gpu/drm/vc4/vc4_crtc.c > @@ -919,10 +919,11 @@ vc4_async_page_flip_complete(struct vc4_async_flip_state *flip_state) > kfree(flip_state); > } > > -static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb) > +static void vc4_async_page_flip_seqno_complete(struct dma_fence *fence, > + struct dma_fence_cb *cb) > { > struct vc4_async_flip_state *flip_state = > - container_of(cb, struct vc4_async_flip_state, cb.seqno); > + container_of(cb, struct vc4_async_flip_state, cb.fence); hmm... I didn't get why you still call this function vc4_async_page_flip_seqno_complete if you are not using seqno anymore. On top of that, can we just use vc4_async_page_flip_fence_complete instead? I mean, looks like we don't need two different async page flip paths according to the hardware anymore. > struct vc4_bo *bo = NULL; > > if (flip_state->old_fb) { > @@ -961,16 +962,15 @@ static int vc4_async_set_fence_cb(struct drm_device *dev, > { > struct drm_framebuffer *fb = flip_state->fb; > struct drm_gem_dma_object *dma_bo = drm_fb_dma_get_gem_obj(fb, 0); > + dma_fence_func_t async_page_flip_complete_function; > struct vc4_dev *vc4 = to_vc4_dev(dev); > struct dma_fence *fence; > int ret; > > - if (vc4->gen == VC4_GEN_4) { > - struct vc4_bo *bo = to_vc4_bo(&dma_bo->base); > - > - return vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno, > - vc4_async_page_flip_seqno_complete); > - } > + if (vc4->gen == VC4_GEN_4) > + async_page_flip_complete_function = vc4_async_page_flip_seqno_complete; > + else > + async_page_flip_complete_function = vc4_async_page_flip_fence_complete; > > ret = dma_resv_get_singleton(dma_bo->base.resv, DMA_RESV_USAGE_READ, &fence); > if (ret) > @@ -978,14 +978,14 @@ static int vc4_async_set_fence_cb(struct drm_device *dev, > > /* If there's no fence, complete the page flip immediately */ > if (!fence) { > - vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence); > + async_page_flip_complete_function(fence, &flip_state->cb.fence); > return 0; > } > > /* If the fence has already been completed, complete the page flip */ > if (dma_fence_add_callback(fence, &flip_state->cb.fence, > - vc4_async_page_flip_fence_complete)) > - vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence); > + async_page_flip_complete_function)) > + async_page_flip_complete_function(fence, &flip_state->cb.fence); > > return 0; > } > diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h > index 03ed40ab9a93..ff8048991030 100644 > --- a/drivers/gpu/drm/vc4/vc4_drv.h > +++ b/drivers/gpu/drm/vc4/vc4_drv.h > @@ -247,16 +247,6 @@ struct vc4_dev { > struct vc4_bo { > struct drm_gem_dma_object base; > > - /* seqno of the last job to render using this BO. */ > - uint64_t seqno; > - > - /* seqno of the last job to use the RCL to write to this BO. > - * > - * Note that this doesn't include binner overflow memory > - * writes. > - */ > - uint64_t write_seqno; > - > bool t_format; > > /* List entry for the BO's position in either > @@ -695,9 +685,6 @@ struct vc4_exec_info { > /* Sequence number for this bin/render job. */ > uint64_t seqno; > > - /* Latest write_seqno of any BO that binning depends on. */ > - uint64_t bin_dep_seqno; > - > struct dma_fence *fence; > > /* Last current addresses the hardware was processing when the > diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c > index 4037c65eb269..1cbd95c4f9ef 100644 > --- a/drivers/gpu/drm/vc4/vc4_gem.c > +++ b/drivers/gpu/drm/vc4/vc4_gem.c > @@ -553,27 +553,19 @@ vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) > } > > static void > -vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) > +vc4_attach_fences(struct vc4_exec_info *exec) > { > struct vc4_bo *bo; > unsigned i; > > for (i = 0; i < exec->bo_count; i++) { > bo = to_vc4_bo(exec->bo[i]); > - bo->seqno = seqno; > - > dma_resv_add_fence(bo->base.base.resv, exec->fence, > DMA_RESV_USAGE_READ); > } > > - list_for_each_entry(bo, &exec->unref_list, unref_head) { > - bo->seqno = seqno; > - } > - > for (i = 0; i < exec->rcl_write_bo_count; i++) { > bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); > - bo->write_seqno = seqno; > - > dma_resv_add_fence(bo->base.base.resv, exec->fence, > DMA_RESV_USAGE_WRITE); > } > @@ -647,7 +639,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > if (out_sync) > drm_syncobj_replace_fence(out_sync, exec->fence); > > - vc4_update_bo_seqnos(exec, seqno); > + vc4_attach_fences(exec); > > drm_exec_fini(exec_ctx); > > @@ -845,12 +837,6 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) > goto fail; > } > > - /* Block waiting on any previous rendering into the CS's VBO, > - * IB, or textures, so that pixels are actually written by the > - * time we try to read them. > - */ > - ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); Don't we still need waiting for fences here? > - > fail: > kvfree(temp); > return ret; > diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c > index 5bf134968ade..1e7bdda55698 100644 > --- a/drivers/gpu/drm/vc4/vc4_validate.c > +++ b/drivers/gpu/drm/vc4/vc4_validate.c > @@ -284,9 +284,6 @@ validate_indexed_prim_list(VALIDATE_ARGS) > if (!ib) > return -EINVAL; > > - exec->bin_dep_seqno = max(exec->bin_dep_seqno, > - to_vc4_bo(&ib->base)->write_seqno); > - > if (offset > ib->base.size || > (ib->base.size - offset) / index_size < length) { > DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n", > @@ -738,11 +735,6 @@ reloc_tex(struct vc4_exec_info *exec, > > *validated_p0 = tex->dma_addr + p0; > > - if (is_cs) { > - exec->bin_dep_seqno = max(exec->bin_dep_seqno, > - to_vc4_bo(&tex->base)->write_seqno); > - } > - > return true; > fail: > DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0); > @@ -904,9 +896,6 @@ validate_gl_shader_rec(struct drm_device *dev, > uint32_t stride = *(uint8_t *)(pkt_u + o + 5); > uint32_t max_index; > > - exec->bin_dep_seqno = max(exec->bin_dep_seqno, > - to_vc4_bo(&vbo->base)->write_seqno); > - > if (state->addr & 0x8) > stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff; > > -- > 2.47.1 >