On Fri, May 20, 2016 at 01:31:29AM +0100, Dave Gordon wrote: > The existing for_each_sg_page() iterator is somewhat heavyweight, and is > limiting i915 driver performance in a few benchmarks. So here we > introduce somewhat lighter weight iterators, primarily for use with GEM > objects or other case where we need only deal with whole aligned pages. > > Unlike the old iterator, the new iterators use an internal state > structure which is not intended to be accessed by the caller; instead > each takes as a parameter an output variable which is set before each > iteration. This makes them particularly simple to use :) > > One of the new iterators provides the caller with the DMA address of > each page in turn; the other provides the 'struct page' pointer required > by many memory management operations. > > Various uses of for_each_sg_page() are then converted to the new macros. > > Signed-off-by: Dave Gordon <david.s.gordon@xxxxxxxxx> > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 62 +++++++++++++++++++++++++-- > drivers/gpu/drm/i915/i915_gem.c | 20 ++++----- > drivers/gpu/drm/i915/i915_gem_fence.c | 14 +++--- > drivers/gpu/drm/i915/i915_gem_gtt.c | 76 ++++++++++++++++----------------- > drivers/gpu/drm/i915/i915_gem_userptr.c | 7 ++- > 5 files changed, 116 insertions(+), 63 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 6894d8e..01cde0f 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2120,6 +2120,10 @@ struct drm_i915_gem_object_ops { > #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \ > (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) > > +void i915_gem_track_fb(struct drm_i915_gem_object *old, > + struct drm_i915_gem_object *new, > + unsigned frontbuffer_bits); > + That's not a much better spot, since this is now before the object it acts upon. One day we'll get our types separated from their actors. > struct drm_i915_gem_object { > struct drm_gem_object base; > > @@ -2251,9 +2255,61 @@ struct drm_i915_gem_object { > }; > #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) > > -void i915_gem_track_fb(struct drm_i915_gem_object *old, > - struct drm_i915_gem_object *new, > - unsigned frontbuffer_bits); > +/* > + * New optimised SGL iterator for GEM objects > + */ > +struct sgt_iter { > + struct scatterlist *sgp; > + union { > + unsigned long pfn; > + unsigned long dma; > + } ix; An anonymous union here would be slightly more pleasant. And we should be using the correct types. > + unsigned int curr; > + unsigned int max; > +}; > + > +/* Constructor for new iterator */ > +static inline struct sgt_iter > +__sgt_iter(struct scatterlist *sgl, bool dma) > +{ > + struct sgt_iter s = { .sgp = sgl }; > + > + if (s.sgp) { Not acceptable just to GPF out if passed in a NULL? It's a BUG for all our use cases. And would save a conditional every chain step. > + s.max = s.curr = s.sgp->offset; > + s.max += s.sgp->length; > + if (dma) > + s.ix.dma = sg_dma_address(s.sgp); > + else > + s.ix.pfn = page_to_pfn(sg_page(s.sgp)); > + } > + > + return s; > +} > + > +/** > + * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table > + * @__dmap: DMA address (output) > + * @__iter: 'struct sgt_iter' (iterator state, internal) > + * @__sgt: sg_table to iterate over (input) > + */ > +#define for_each_sgt_dma(__dmap, __iter, __sgt) \ > + for ((__iter) = __sgt_iter((__sgt)->sgl, true); \ > + ((__dmap) = (__iter).ix.dma + (__iter).curr); \ This prevents us using dma_addr_t 0, which is the error value iirc, so ok. > + (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ > + ((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0)) > + > +/** > + * for_each_sgt_page - iterate over the pages of the given sg_table > + * @__pp: page pointer (output) > + * @__iter: 'struct sgt_iter' (iterator state, internal) > + * @__sgt: sg_table to iterate over (input) > + */ > +#define for_each_sgt_page(__pp, __iter, __sgt) \ > + for ((__iter) = __sgt_iter((__sgt)->sgl, false); \ > + ((__pp) = (__iter).ix.pfn == 0 ? NULL : \ > + pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\ If we shifted in the __sgt_iter() we could do simpler instructions inside the loop. The compiler won't know that pfn_to_page() is always true, so maybe a pfn_to_page(),1 here will help? > + (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ > + ((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0)) -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx