On Sat, Mar 24, 2012 at 12:20:24AM +0000, Chris Wilson wrote: > Originally the code tried to allocate a large enough array to perform > the copy using vmalloc, performance wasn't great and throughput was > improved by processing each individual relocation entry separately. > This too is not as efficient as one would desire. A compromise is then > to allocate a single page (since that is relatively cheap) and process > the relocations in page size batches. > > x11perf -copywinwin10: n450/pnv i3-330m i5-2520m (cpu) > Before: 249000 785000 1280000 (80%) > After: 264000 896000 1280000 (65%) > > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 92 +++++++++++++++++++++++----- > 1 files changed, 77 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 2d5d41b..1da04db 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -407,28 +407,90 @@ i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, > { > struct drm_i915_gem_relocation_entry __user *user_relocs; > struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; > - int i, ret; > + int i, ret = 0; > + > + if (entry->relocation_count == 0) > + return 0; > + > +#define N_RELOC_PER_PAGE \ > + (PAGE_SIZE / sizeof(struct drm_i915_gem_relocation_entry)) > > user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; > - for (i = 0; i < entry->relocation_count; i++) { > - struct drm_i915_gem_relocation_entry reloc; > > - if (__copy_from_user_inatomic(&reloc, > - user_relocs+i, > - sizeof(reloc))) > - return -EFAULT; > + if (entry->relocation_count < N_RELOC_PER_PAGE / 2) { > +fallback: We don't we go slightly more over board with optimiziting this? drm_i915_gem_relocation_entry is on 32 bytes, so we can fit a few onto the stack. Hence what about this before the loop? drm_i915_gem_relocation_entry stack_arr[512/sizeof(reloc_entry)]; if (relocation_count > N_RELOC_PER_PAGE) { tmp_stor = __get_free_page; if (!tmp_stor) goto fallback; tmp_stor_size = N_RELOC_PER_PAGE; } else { fallback: tmp_stor = stackk_arr; tmp_stor_size = 512/sizeof(reloc_entry); } Then we also wouldn't have the not-so-beautiful duplication in code. Maybe the added complexity of the page_alloc path isn't even worth it anymore. Can I bother you to play around with this? Cheers, Daniel > + for (i = 0; i < entry->relocation_count; i++) { > + struct drm_i915_gem_relocation_entry reloc; > + u64 offset; > > - ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); > - if (ret) > - return ret; > + if (__copy_from_user_inatomic(&reloc, > + user_relocs+i, > + sizeof(reloc))) > + return -EFAULT; > > - if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset, > - &reloc.presumed_offset, > - sizeof(reloc.presumed_offset))) > - return -EFAULT; > + offset = reloc.presumed_offset; > + > + ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); > + if (ret) > + return ret; > + > + if (reloc.presumed_offset != offset && > + __copy_to_user_inatomic(&user_relocs[i].presumed_offset, > + &reloc.presumed_offset, > + sizeof(reloc.presumed_offset))) > + return -EFAULT; > + } > + } else { > + unsigned long page; > + int remain; > + > + page = __get_free_page(GFP_TEMPORARY); > + if (unlikely(page == 0)) > + goto fallback; > + > + i = 0; > + remain = entry->relocation_count; > + do { > + struct drm_i915_gem_relocation_entry *r = > + (struct drm_i915_gem_relocation_entry *)page; > + int count = remain; > + if (count > N_RELOC_PER_PAGE) > + count = N_RELOC_PER_PAGE; > + remain -= count; > + > + if (__copy_from_user_inatomic(r, user_relocs+i, > + count*sizeof(r[0]))) { > + ret = -EFAULT; > + goto err; > + } > + > + do { > + u64 offset = r->presumed_offset; > + > + ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); > + if (ret) > + goto err; > + > + if (r->presumed_offset != offset && > + __copy_to_user_inatomic(&user_relocs[i].presumed_offset, > + &r->presumed_offset, > + sizeof(r->presumed_offset))) { > + ret = -EFAULT; > + goto err; > + } > + > + i++; > + r++; > + } while (--count); > + } while (remain); > + > + ret = 0; > +err: > + free_page(page); > } > > - return 0; > + return ret; > +#undef N_RELOC_PER_PAGE > } > > static int > -- > 1.7.9.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Mail: daniel at ffwll.ch Mobile: +41 (0)79 365 57 48