On Sat, Aug 11, 2012 at 03:41:21PM +0100, Chris Wilson wrote: > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> What about putting kmap/unmap abstractions into obj->ops (like the dma_buf interface already has)? Since the pwrite/pread code is already rather branch heave I hope we don't see the overhead of the indirect call even in microbenchmarks (haven't checked). And this way we would also neatly wrap up dma_bufs for pwrite (if anyone ever really wants that ...). The kmap(_atomic) for stolen mem backed objects would boil down to doing the pointer arithmetic, kunmap would be just a noop. Cheers, Daniel > --- > drivers/gpu/drm/i915/i915_gem.c | 169 +++++++++++++++++++++++++-------------- > 1 file changed, 111 insertions(+), 58 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 552f95b..a2fb2aa 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -664,19 +664,17 @@ out: > * needs_clflush_before is set and flushes out any written cachelines after > * writing if needs_clflush is set. */ > static int > -shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, > +shmem_pwrite_fast(char *vaddr, int shmem_page_offset, int page_length, > char __user *user_data, > bool page_do_bit17_swizzling, > bool needs_clflush_before, > bool needs_clflush_after) > { > - char *vaddr; > int ret; > > if (unlikely(page_do_bit17_swizzling)) > return -EINVAL; > > - vaddr = kmap_atomic(page); > if (needs_clflush_before) > drm_clflush_virt_range(vaddr + shmem_page_offset, > page_length); > @@ -686,7 +684,6 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, > if (needs_clflush_after) > drm_clflush_virt_range(vaddr + shmem_page_offset, > page_length); > - kunmap_atomic(vaddr); > > return ret ? -EFAULT : 0; > } > @@ -694,16 +691,14 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, > /* Only difference to the fast-path function is that this can handle bit17 > * and uses non-atomic copy and kmap functions. */ > static int > -shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, > +shmem_pwrite_slow(char *vaddr, int shmem_page_offset, int page_length, > char __user *user_data, > bool page_do_bit17_swizzling, > bool needs_clflush_before, > bool needs_clflush_after) > { > - char *vaddr; > int ret; > > - vaddr = kmap(page); > if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) > shmem_clflush_swizzled_range(vaddr + shmem_page_offset, > page_length, > @@ -720,7 +715,6 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, > shmem_clflush_swizzled_range(vaddr + shmem_page_offset, > page_length, > page_do_bit17_swizzling); > - kunmap(page); > > return ret ? -EFAULT : 0; > } > @@ -731,6 +725,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, > struct drm_i915_gem_pwrite *args, > struct drm_file *file) > { > + struct drm_i915_private *dev_priv = dev->dev_private; > ssize_t remain; > loff_t offset; > char __user *user_data; > @@ -770,74 +765,132 @@ i915_gem_shmem_pwrite(struct drm_device *dev, > if (ret) > return ret; > > - i915_gem_object_pin_pages(obj); > - > offset = args->offset; > obj->dirty = 1; > > - for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { > - struct page *page; > - int partial_cacheline_write; > + if (obj->stolen) { > + while (remain > 0) { > + char *vaddr; > + int partial_cacheline_write; > > - if (i < offset >> PAGE_SHIFT) > - continue; > + /* Operation in this page > + * > + * shmem_page_offset = offset within page in shmem file > + * page_length = bytes to copy for this page > + */ > + shmem_page_offset = offset_in_page(offset); > > - if (remain <= 0) > - break; > + page_length = remain; > + if ((shmem_page_offset + page_length) > PAGE_SIZE) > + page_length = PAGE_SIZE - shmem_page_offset; > > - /* Operation in this page > - * > - * shmem_page_offset = offset within page in shmem file > - * page_length = bytes to copy for this page > - */ > - shmem_page_offset = offset_in_page(offset); > + /* If we don't overwrite a cacheline completely we need to be > + * careful to have up-to-date data by first clflushing. Don't > + * overcomplicate things and flush the entire patch. */ > + partial_cacheline_write = needs_clflush_before && > + ((shmem_page_offset | page_length) > + & (boot_cpu_data.x86_clflush_size - 1)); > > - page_length = remain; > - if ((shmem_page_offset + page_length) > PAGE_SIZE) > - page_length = PAGE_SIZE - shmem_page_offset; > + vaddr = (char *)(dev_priv->mm.stolen_base + obj->stolen->start + offset); > + page_do_bit17_swizzling = obj_do_bit17_swizzling && > + ((uintptr_t)vaddr & (1 << 17)) != 0; > > - /* If we don't overwrite a cacheline completely we need to be > - * careful to have up-to-date data by first clflushing. Don't > - * overcomplicate things and flush the entire patch. */ > - partial_cacheline_write = needs_clflush_before && > - ((shmem_page_offset | page_length) > - & (boot_cpu_data.x86_clflush_size - 1)); > + ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length, > + user_data, page_do_bit17_swizzling, > + partial_cacheline_write, > + needs_clflush_after); > > - page = sg_page(sg); > - page_do_bit17_swizzling = obj_do_bit17_swizzling && > - (page_to_phys(page) & (1 << 17)) != 0; > + if (ret == 0) > + goto next_stolen; > > - ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, > - user_data, page_do_bit17_swizzling, > - partial_cacheline_write, > - needs_clflush_after); > - if (ret == 0) > - goto next_page; > + hit_slowpath = 1; > + mutex_unlock(&dev->struct_mutex); > > - hit_slowpath = 1; > - mutex_unlock(&dev->struct_mutex); > - ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, > - user_data, page_do_bit17_swizzling, > - partial_cacheline_write, > - needs_clflush_after); > + ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length, > + user_data, page_do_bit17_swizzling, > + partial_cacheline_write, > + needs_clflush_after); > > - mutex_lock(&dev->struct_mutex); > + mutex_lock(&dev->struct_mutex); > + if (ret) > + goto out; > > -next_page: > - set_page_dirty(page); > - mark_page_accessed(page); > +next_stolen: > + remain -= page_length; > + user_data += page_length; > + offset += page_length; > + } > + } else { > + i915_gem_object_pin_pages(obj); > > - if (ret) > - goto out; > + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { > + struct page *page; > + char *vaddr; > + int partial_cacheline_write; > > - remain -= page_length; > - user_data += page_length; > - offset += page_length; > + if (i < offset >> PAGE_SHIFT) > + continue; > + > + if (remain <= 0) > + break; > + > + /* Operation in this page > + * > + * shmem_page_offset = offset within page in shmem file > + * page_length = bytes to copy for this page > + */ > + shmem_page_offset = offset_in_page(offset); > + > + page_length = remain; > + if ((shmem_page_offset + page_length) > PAGE_SIZE) > + page_length = PAGE_SIZE - shmem_page_offset; > + > + /* If we don't overwrite a cacheline completely we need to be > + * careful to have up-to-date data by first clflushing. Don't > + * overcomplicate things and flush the entire patch. */ > + partial_cacheline_write = needs_clflush_before && > + ((shmem_page_offset | page_length) > + & (boot_cpu_data.x86_clflush_size - 1)); > + > + page = sg_page(sg); > + page_do_bit17_swizzling = obj_do_bit17_swizzling && > + (page_to_phys(page) & (1 << 17)) != 0; > + > + vaddr = kmap_atomic(page); > + ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length, > + user_data, page_do_bit17_swizzling, > + partial_cacheline_write, > + needs_clflush_after); > + > + kunmap_atomic(vaddr); > + > + if (ret == 0) > + goto next_page; > + > + hit_slowpath = 1; > + mutex_unlock(&dev->struct_mutex); > + > + vaddr = kmap(page); > + ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length, > + user_data, page_do_bit17_swizzling, > + partial_cacheline_write, > + needs_clflush_after); > + kunmap(page); > + > + mutex_lock(&dev->struct_mutex); > + if (ret) > + goto out_unpin; > + > +next_page: > + remain -= page_length; > + user_data += page_length; > + offset += page_length; > + } > +out_unpin: > + i915_gem_object_unpin_pages(obj); > } > > out: > - i915_gem_object_unpin_pages(obj); > - > if (hit_slowpath) { > /* Fixup: Kill any reinstated backing storage pages */ > if (obj->madv == __I915_MADV_PURGED) > -- > 1.7.10.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Mail: daniel at ffwll.ch Mobile: +41 (0)79 365 57 48