The from_cache flag was actually "the BO is invisible to userspace", so we can repurpose to just zero out a cached BO and return it to userspace. Improves wall time for a loop of 5 glsl-algebraic-add-add-1 by -1.44989% +/- 0.862891% (n=28, 1 outlier removed from each that appeared to be other system noise) Note that there's an intel-gpu-tools test to check for the proper zeroing behavior here, which we continue to pass. Signed-off-by: Eric Anholt <eric@xxxxxxxxxx> --- drivers/gpu/drm/vc4/vc4_bo.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 3f6704cf6608..5ec14f25625d 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -208,21 +208,22 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) } struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, - bool from_cache) + bool allow_unzeroed) { size_t size = roundup(unaligned_size, PAGE_SIZE); struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_gem_cma_object *cma_obj; + struct vc4_bo *bo; if (size == 0) return ERR_PTR(-EINVAL); /* First, try to get a vc4_bo from the kernel BO cache. */ - if (from_cache) { - struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size); - - if (bo) - return bo; + bo = vc4_bo_get_from_cache(dev, size); + if (bo) { + if (!allow_unzeroed) + memset(bo->base.vaddr, 0, bo->base.base.size); + return bo; } cma_obj = drm_gem_cma_create(dev, size); -- 2.11.0 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel