Hi, On Thu, 2016-02-11 at 11:40 +0000, Tvrtko Ursulin wrote: > > + > > + mutex_unlock(&dev->struct_mutex); > > + if (likely(!i915.prefault_disable)) { > > + ret = fault_in_multipages_writeable(user_data, remain); > > + if (ret) { > > + mutex_lock(&dev->struct_mutex); > > + goto out_unpin; > > + } > > + } > > + > > + while (remain > 0) { > > + /* Operation in this page > > + * > > + * page_base = page offset within aperture > > + * page_offset = offset within page > > + * page_length = bytes to copy for this page > > + */ > > + u32 page_base = node.start; > > + unsigned page_offset = offset_in_page(offset); > > + unsigned page_length = PAGE_SIZE - page_offset; > > + page_length = remain < page_length ? remain : page_length; > > + if (node.allocated) { > > + wmb(); > > + dev_priv->gtt.base.insert_page(&dev_priv->gtt.base, > > + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), > > + node.start, > > + I915_CACHE_NONE, 0); > > + wmb(); > > + } else { > > + page_base += offset & PAGE_MASK; > > + } > > + /* This is a slow read/write as it tries to read from > > + * and write to user memory which may result into page > > + * faults, and so we cannot perform this under struct_mutex. > > + */ > > + if (slow_user_access(dev_priv->gtt.mappable, page_base, > > + page_offset, user_data, > > + page_length, false)) { > > + ret = -EFAULT; > > + break; > > + } > > Read does not want to try the fast access first, equivalent to pwrite ? Using fast access means we will be unable to handle faults, which are more frequent in a pread case. > > > + > > + remain -= page_length; > > + user_data += page_length; > > + offset += page_length; > > + } > > + > > > > @@ -870,24 +1012,36 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, > > unsigned page_length = PAGE_SIZE - page_offset; > > page_length = remain < page_length ? remain : page_length; > > if (node.allocated) { > > - wmb(); > > + wmb(); /* flush the write before we modify the GGTT */ > > i915->gtt.base.insert_page(&i915->gtt.base, > > i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), > > node.start, > > I915_CACHE_NONE, > > 0); > > - wmb(); > > + wmb(); /* flush modifications to the GGTT (insert_page) */ > > } else { > > page_base += offset & PAGE_MASK; > > } > > /* If we get a fault while copying data, then (presumably) our > > * source page isn't available. Return the error and we'll > > * retry in the slow path. > > + * If the object is non-shmem backed, we retry again with the > > + * path that handles page fault. > > */ > > if (fast_user_write(i915->gtt.mappable, page_base, > > page_offset, user_data, page_length)) { > > - ret = -EFAULT; > > - goto out_flush; > > + hit_slow_path = true; > > + mutex_unlock(&dev->struct_mutex); > > + if (slow_user_access(i915->gtt.mappable, > > + page_base, > > + page_offset, user_data, > > + page_length, true)) { > > + ret = -EFAULT; > > + mutex_lock(&dev->struct_mutex); > > + goto out_flush; > > + } > > I think the function now be called i915_gem_gtt_pwrite. > > Would it also need the same pre-fault as in i915_gem_gtt_pread ? I do not think pre-fault is needed here, as in pread we are dealing with a read from the obj and to the user buffer (which has more chances of faulting). While in the pwrite case, we are optimistic that the user would have already mapped/accessed the buffer before using it to write the buffer contents into the object. > > > + > > + mutex_lock(&dev->struct_mutex); > > } > > > > remain -= page_length; > > @@ -896,6 +1050,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, > > } > > > > out_flush: > > + if (hit_slow_path) > > + WARN_ON(i915_gem_object_set_to_gtt_domain(obj, true)); > > I suppose this is for the same reason as in pread, maybe duplicate the > comment here? > > > + > > intel_fb_obj_flush(obj, false, ORIGIN_GTT); > > out_unpin: > > if (node.allocated) { > > @@ -1152,14 +1309,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > > goto out; > > } > > > > - /* prime objects have no backing filp to GEM pread/pwrite > > - * pages from. > > - */ > > - if (!obj->base.filp) { > > - ret = -EINVAL; > > - goto out; > > - } > > - > > trace_i915_gem_object_pwrite(obj, args->offset, args->size); > > > > ret = -EFAULT; > > @@ -1169,20 +1318,20 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, > > * pread/pwrite currently are reading and writing from the CPU > > * perspective, requiring manual detiling by the client. > > */ > > - if (obj->tiling_mode == I915_TILING_NONE && > > - obj->base.write_domain != I915_GEM_DOMAIN_CPU && > > - cpu_write_needs_clflush(obj)) { > > + if (!obj->base.filp || cpu_write_needs_clflush(obj)) { > > ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); > > /* Note that the gtt paths might fail with non-page-backed user > > * pointers (e.g. gtt mappings when moving data between > > * textures). Fallback to the shmem path in that case. */ > > } > > > > - if (ret == -EFAULT || ret == -ENOSPC) { > > + if (ret == -EFAULT) { > > if (obj->phys_handle) > > ret = i915_gem_phys_pwrite(obj, args, file); > > - else > > + else if (obj->base.filp) > > ret = i915_gem_shmem_pwrite(dev, obj, args, file); > > + else > > + ret = -ENODEV; > > } > > > > out: > > @@ -3979,9 +4128,7 @@ out: > > * object is now coherent at its new cache level (with respect > > * to the access domain). > > */ > > - if (obj->cache_dirty && > > - obj->base.write_domain != I915_GEM_DOMAIN_CPU && > > - cpu_write_needs_clflush(obj)) { > > + if (obj->cache_dirty && cpu_write_needs_clflush(obj)) { > > if (i915_gem_clflush_object(obj, true)) > > i915_gem_chipset_flush(obj->base.dev); > > } > > > > Regards, > > Tvrtko > Thanks, Ankit _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx