Iago Toral Quiroga <itoral@xxxxxxxxxx> writes: > Extends the user space ioctl for CL submissions so it can include a request > to flush the cache once the CL execution has completed. Fixes memory > write violation messages reported by the kernel in workloads involving > shader memory writes (SSBOs, shader images, scratch, etc) which sometimes > also lead to GPU resets during Piglit and CTS workloads. Some context for any other reviewers: This patch is the interface change necessary to expose GLES 3.1 on V3D. It turns out the HW packets for flushing the caches were broken in multiple ways. > Signed-off-by: Iago Toral Quiroga <itoral@xxxxxxxxxx> > --- > drivers/gpu/drm/v3d/v3d_gem.c | 51 +++++++++++++++++++++++++++++------ > include/uapi/drm/v3d_drm.h | 7 ++--- > 2 files changed, 47 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c > index 5d80507b539b..530fe9d9d5bd 100644 > --- a/drivers/gpu/drm/v3d/v3d_gem.c > +++ b/drivers/gpu/drm/v3d/v3d_gem.c > @@ -530,13 +530,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, > struct drm_v3d_submit_cl *args = data; > struct v3d_bin_job *bin = NULL; > struct v3d_render_job *render; > + struct v3d_job *clean_job = NULL; > + struct v3d_job *last_job; > struct ww_acquire_ctx acquire_ctx; > int ret = 0; > > trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); > > - if (args->pad != 0) { > - DRM_INFO("pad must be zero: %d\n", args->pad); > + if (args->flags != 0 && > + args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) { > + DRM_INFO("invalid flags: %d\n", args->flags); > return -EINVAL; > } > > @@ -575,12 +578,28 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, > bin->render = render; > } > > - ret = v3d_lookup_bos(dev, file_priv, &render->base, > + if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) { > + clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL); > + if (!clean_job) { > + ret = -ENOMEM; > + goto fail; > + } > + > + ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0); > + if (ret) > + goto fail; Only issue I see: If v3d_job_init() fails, we need to not v3d_job_put() it. I'm fine with either kfree() it and NULL the ptr before jumping to fail, or open code the bin/render puts. With that, Reviewed-by: Eric Anholt <eric@xxxxxxxxxx> > + > + last_job = clean_job; > + } else { > + last_job = &render->base; > + } > + > + ret = v3d_lookup_bos(dev, file_priv, last_job, > args->bo_handles, args->bo_handle_count); > if (ret) > goto fail; > > - ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx); > + ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); > if (ret) > goto fail; > > @@ -599,28 +618,44 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, > ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); > if (ret) > goto fail_unreserve; > + > + if (clean_job) { > + struct dma_fence *render_fence = > + dma_fence_get(render->base.done_fence); > + ret = drm_gem_fence_array_add(&clean_job->deps, render_fence); > + if (ret) > + goto fail_unreserve; > + ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN); > + if (ret) > + goto fail_unreserve; > + } > + > mutex_unlock(&v3d->sched_lock); > > v3d_attach_fences_and_unlock_reservation(file_priv, > - &render->base, > + last_job, > &acquire_ctx, > args->out_sync, > - render->base.done_fence); > + last_job->done_fence); > > if (bin) > v3d_job_put(&bin->base); > v3d_job_put(&render->base); > + if (clean_job) > + v3d_job_put(clean_job); > > return 0; > > fail_unreserve: > mutex_unlock(&v3d->sched_lock); > - drm_gem_unlock_reservations(render->base.bo, > - render->base.bo_count, &acquire_ctx); > + drm_gem_unlock_reservations(last_job->bo, > + last_job->bo_count, &acquire_ctx); > fail: > if (bin) > v3d_job_put(&bin->base); > v3d_job_put(&render->base); > + if (clean_job) > + v3d_job_put(clean_job); > > return ret; > } > diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h > index 58fbe48c91e9..58d2040ea48c 100644 > --- a/include/uapi/drm/v3d_drm.h > +++ b/include/uapi/drm/v3d_drm.h > @@ -48,6 +48,8 @@ extern "C" { > #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) > #define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd) > > +#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG 0x01 > + > /** > * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D > * engine. > @@ -61,7 +63,7 @@ extern "C" { > * flushed by the time the render done IRQ happens, which is the > * trigger for out_sync. Any dirtying of cachelines by the job (only > * possible using TMU writes) must be flushed by the caller using the > - * CL's cache flush commands. > + * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag. > */ > struct drm_v3d_submit_cl { > /* Pointer to the binner command list. > @@ -124,8 +126,7 @@ struct drm_v3d_submit_cl { > /* Number of BO handles passed in (size is that times 4). */ > __u32 bo_handle_count; > > - /* Pad, must be zero-filled. */ > - __u32 pad; > + __u32 flags; > }; > > /** > -- > 2.17.1
Attachment:
signature.asc
Description: PGP signature
_______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel