On Thu, 2019-09-12 at 10:25 -0700, Eric Anholt wrote: > Iago Toral Quiroga <itoral@xxxxxxxxxx> writes: > > > Extends the user space ioctl for CL submissions so it can include a > > request > > to flush the cache once the CL execution has completed. Fixes > > memory > > write violation messages reported by the kernel in workloads > > involving > > shader memory writes (SSBOs, shader images, scratch, etc) which > > sometimes > > also lead to GPU resets during Piglit and CTS workloads. > > Some context for any other reviewers: This patch is the interface > change > necessary to expose GLES 3.1 on V3D. It turns out the HW packets for > flushing the caches were broken in multiple ways. > > > Signed-off-by: Iago Toral Quiroga <itoral@xxxxxxxxxx> > > --- > > drivers/gpu/drm/v3d/v3d_gem.c | 51 +++++++++++++++++++++++++++++ > > ------ > > include/uapi/drm/v3d_drm.h | 7 ++--- > > 2 files changed, 47 insertions(+), 11 deletions(-) > > > > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c > > b/drivers/gpu/drm/v3d/v3d_gem.c > > index 5d80507b539b..530fe9d9d5bd 100644 > > --- a/drivers/gpu/drm/v3d/v3d_gem.c > > +++ b/drivers/gpu/drm/v3d/v3d_gem.c > > @@ -530,13 +530,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev, > > void *data, > > struct drm_v3d_submit_cl *args = data; > > struct v3d_bin_job *bin = NULL; > > struct v3d_render_job *render; > > + struct v3d_job *clean_job = NULL; > > + struct v3d_job *last_job; > > struct ww_acquire_ctx acquire_ctx; > > int ret = 0; > > > > trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args- > > >rcl_end); > > > > - if (args->pad != 0) { > > - DRM_INFO("pad must be zero: %d\n", args->pad); > > + if (args->flags != 0 && > > + args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) { > > + DRM_INFO("invalid flags: %d\n", args->flags); > > return -EINVAL; > > } > > > > @@ -575,12 +578,28 @@ v3d_submit_cl_ioctl(struct drm_device *dev, > > void *data, > > bin->render = render; > > } > > > > - ret = v3d_lookup_bos(dev, file_priv, &render->base, > > + if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) { > > + clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL); > > + if (!clean_job) { > > + ret = -ENOMEM; > > + goto fail; > > + } > > + > > + ret = v3d_job_init(v3d, file_priv, clean_job, > > v3d_job_free, 0); > > + if (ret) > > + goto fail; > > Only issue I see: If v3d_job_init() fails, we need to not > v3d_job_put() > it. I'm fine with either kfree() it and NULL the ptr before jumping > to > fail, or open code the bin/render puts. It seems we also call v3d_job_put() for the bin job when v3d_job_init() fails, which also returns immediately in that case instead of jumping to fail to v3d_job_put the render job, so I guess we need the same treatment there. Shall I fix that in this patch too or would you rather see a different patch sent separately for that? > With that, > > Reviewed-by: Eric Anholt <eric@xxxxxxxxxx> > > > + > > + last_job = clean_job; > > + } else { > > + last_job = &render->base; > > + } > > + > > + ret = v3d_lookup_bos(dev, file_priv, last_job, > > args->bo_handles, args->bo_handle_count); > > if (ret) > > goto fail; > > > > - ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx); > > + ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); > > if (ret) > > goto fail; > > > > @@ -599,28 +618,44 @@ v3d_submit_cl_ioctl(struct drm_device *dev, > > void *data, > > ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); > > if (ret) > > goto fail_unreserve; > > + > > + if (clean_job) { > > + struct dma_fence *render_fence = > > + dma_fence_get(render->base.done_fence); > > + ret = drm_gem_fence_array_add(&clean_job->deps, > > render_fence); > > + if (ret) > > + goto fail_unreserve; > > + ret = v3d_push_job(v3d_priv, clean_job, > > V3D_CACHE_CLEAN); > > + if (ret) > > + goto fail_unreserve; > > + } > > + > > mutex_unlock(&v3d->sched_lock); > > > > v3d_attach_fences_and_unlock_reservation(file_priv, > > - &render->base, > > + last_job, > > &acquire_ctx, > > args->out_sync, > > - render- > > >base.done_fence); > > + last_job->done_fence); > > > > if (bin) > > v3d_job_put(&bin->base); > > v3d_job_put(&render->base); > > + if (clean_job) > > + v3d_job_put(clean_job); > > > > return 0; > > > > fail_unreserve: > > mutex_unlock(&v3d->sched_lock); > > - drm_gem_unlock_reservations(render->base.bo, > > - render->base.bo_count, > > &acquire_ctx); > > + drm_gem_unlock_reservations(last_job->bo, > > + last_job->bo_count, &acquire_ctx); > > fail: > > if (bin) > > v3d_job_put(&bin->base); > > v3d_job_put(&render->base); > > + if (clean_job) > > + v3d_job_put(clean_job); > > > > return ret; > > } > > diff --git a/include/uapi/drm/v3d_drm.h > > b/include/uapi/drm/v3d_drm.h > > index 58fbe48c91e9..58d2040ea48c 100644 > > --- a/include/uapi/drm/v3d_drm.h > > +++ b/include/uapi/drm/v3d_drm.h > > @@ -48,6 +48,8 @@ extern "C" { > > #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE > > + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) > > #define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE > > + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd) > > > > +#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG 0x01 > > + > > /** > > * struct drm_v3d_submit_cl - ioctl argument for submitting > > commands to the 3D > > * engine. > > @@ -61,7 +63,7 @@ extern "C" { > > * flushed by the time the render done IRQ happens, which is the > > * trigger for out_sync. Any dirtying of cachelines by the job > > (only > > * possible using TMU writes) must be flushed by the caller using > > the > > - * CL's cache flush commands. > > + * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag. > > */ > > struct drm_v3d_submit_cl { > > /* Pointer to the binner command list. > > @@ -124,8 +126,7 @@ struct drm_v3d_submit_cl { > > /* Number of BO handles passed in (size is that times 4). */ > > __u32 bo_handle_count; > > > > - /* Pad, must be zero-filled. */ > > - __u32 pad; > > + __u32 flags; > > }; > > > > /** > > -- > > 2.17.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel