In preparation for supporting huge gtt pages for the ppgtt, we introduce page size members for gem objects. We fill in the page sizes by scanning the sg table. v2: pass the sg_mask to set_pages v3: calculate the sg_mask inline with populating the sg_table where possible, and pass to set_pages along with the pages. v4: bunch of improvements from Joonas Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Daniel Vetter <daniel@xxxxxxxx> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 5 ++- drivers/gpu/drm/i915/i915_gem.c | 41 +++++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 9 +++++- drivers/gpu/drm/i915/i915_gem_internal.c | 5 ++- drivers/gpu/drm/i915/i915_gem_object.h | 17 ++++++++++ drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 9 +++++- drivers/gpu/drm/i915/selftests/huge_gem_object.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 5 ++- 9 files changed, 83 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 869e46306c49..a3bd510c434d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3084,6 +3084,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt) #define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2) #define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3) +#define HAS_PAGE_SIZES(dev_priv, sizes) \ + ((sizes) && (((sizes) & ~(dev_priv)->info.page_sizes)) == 0) #define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \ @@ -3498,7 +3500,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages); + struct sg_table *pages, + unsigned int sg_mask); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __must_check diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9c46f1f91f7d..ae7b683437f1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -228,7 +228,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) obj->phys_handle = phys; - __i915_gem_object_set_pages(obj, st); + __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -2266,6 +2266,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, if (!IS_ERR(pages)) obj->ops->put_pages(obj, pages); + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + unlock: mutex_unlock(&obj->mm.lock); } @@ -2308,6 +2310,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); + unsigned int sg_mask; gfp_t noreclaim; int ret; @@ -2339,6 +2342,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) sg = st->sgl; st->nents = 0; + sg_mask = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, @@ -2391,8 +2395,10 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) + if (i) { + sg_mask |= sg->length; sg = sg_next(sg); + } st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -2403,8 +2409,10 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) /* Check that the i965g/gm workaround works. */ WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); } - if (sg) /* loop terminated early; short sg table */ + if (sg) { /* loop terminated early; short sg table */ + sg_mask |= sg->length; sg_mark_end(sg); + } /* Trim unused sg entries to avoid wasting memory. */ i915_sg_trim(st); @@ -2433,7 +2441,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - __i915_gem_object_set_pages(obj, st); + __i915_gem_object_set_pages(obj, st, sg_mask); return 0; @@ -2460,8 +2468,13 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) } void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *pages, + unsigned int sg_mask) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned long supported_page_sizes = INTEL_INFO(i915)->page_sizes; + int i; + lockdep_assert_held(&obj->mm.lock); obj->mm.get_page.sg_pos = pages->sgl; @@ -2475,6 +2488,24 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } + + GEM_BUG_ON(!sg_mask); + obj->mm.page_sizes.phys = sg_mask; + + /* Calculate the supported page-sizes which fit into the given sg_mask. + * This will give us the page-sizes which we may be able to use + * opportunistically when later inserting into the GTT. For example if + * phys=2G, then in theory we should be able to use 1G, 2M, 64K or 4K + * pages, although in practice this will depend on a number of other + * factors. + */ + obj->mm.page_sizes.sg = 0; + for_each_set_bit(i, &supported_page_sizes, BITS_PER_LONG) { + if (obj->mm.page_sizes.phys & ~0u << i) + obj->mm.page_sizes.sg |= BIT(i); + } + + GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); } static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 4c4dc85159fb..2f80b07d7f61 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -259,13 +259,20 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { struct sg_table *pages; + struct scatterlist *sg; + unsigned int sg_mask; + int n; pages = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL); if (IS_ERR(pages)) return PTR_ERR(pages); - __i915_gem_object_set_pages(obj, pages); + sg_mask = 0; + for_each_sg(pages->sgl, sg, pages->nents, n) + sg_mask |= sg->length; + + __i915_gem_object_set_pages(obj, pages, sg_mask); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index f59764da4254..bdc23c4c8783 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -49,6 +49,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; + unsigned int sg_mask; unsigned int npages; int max_order; gfp_t gfp; @@ -87,6 +88,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) sg = st->sgl; st->nents = 0; + sg_mask = 0; do { int order = min(fls(npages) - 1, max_order); @@ -104,6 +106,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) } while (1); sg_set_page(sg, page, PAGE_SIZE << order, 0); + sg_mask |= PAGE_SIZE << order; st->nents++; npages -= 1 << order; @@ -132,7 +135,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) */ obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st); + __i915_gem_object_set_pages(obj, st, sg_mask); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 036e847b27f0..110672952a1c 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -169,6 +169,23 @@ struct drm_i915_gem_object { struct sg_table *pages; void *mapping; + struct i915_page_sizes { + /** + * The sg mask of the pages sg_table. i.e the mask of + * of the lengths for each sg entry. + */ + unsigned int phys; + + /** + * The gtt page sizes we are allowed to use given the + * sg mask and the supported page sizes. This will + * express the smallest unit we can use for the whole + * object, as well as the larger sizes we may be able + * to use opportunistically. + */ + unsigned int sg; + } page_sizes; + struct i915_gem_object_page_iter { struct scatterlist *sg_pos; unsigned int sg_idx; /* in pages, but 32bit eek! */ diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 537ecb224db0..54fd4cfa9d07 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -548,7 +548,7 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) if (IS_ERR(pages)) return PTR_ERR(pages); - __i915_gem_object_set_pages(obj, pages); + __i915_gem_object_set_pages(obj, pages, obj->stolen->size); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 70ad7489827d..ad5abca1f794 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -405,6 +405,9 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, { unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_mask; + int n; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -434,7 +437,11 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, return ERR_PTR(ret); } - __i915_gem_object_set_pages(obj, st); + sg_mask = 0; + for_each_sg(st->sgl, sg, num_pages, n) + sg_mask |= sg->length; + + __i915_gem_object_set_pages(obj, st, sg_mask); return st; } diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c index 41c15f3aa467..a2632df39173 100644 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -80,7 +80,7 @@ static int huge_get_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, pages)) goto err; - __i915_gem_object_set_pages(obj, pages); + __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); return 0; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index aa1db375d59a..883bc19e3aaf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -45,6 +45,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) #define PFN_BIAS 0x1000 struct sg_table *pages; struct scatterlist *sg; + unsigned int sg_mask; typeof(obj->base.size) rem; pages = kmalloc(sizeof(*pages), GFP); @@ -57,6 +58,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) return -ENOMEM; } + sg_mask = 0; rem = obj->base.size; for (sg = pages->sgl; sg; sg = sg_next(sg)) { unsigned long len = min_t(typeof(rem), rem, BIT(31)); @@ -65,6 +67,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); sg_dma_address(sg) = page_to_phys(sg_page(sg)); sg_dma_len(sg) = len; + sg_mask |= len; rem -= len; } @@ -72,7 +75,7 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, pages); + __i915_gem_object_set_pages(obj, pages, sg_mask); return 0; #undef GFP -- 2.13.5 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx