A while ago we switched from a contiguous array of pages into an sglist, for that was both more convenient for mapping to hardware and avoided the requirement for a vmalloc array of pages on every object. However, certain GEM API calls (like pwrite, pread as well as performing relocations) do desire access to individual struct pages. A quick hack was to introduce a cache of the last access such that finding the following page was quick - this works so long as the caller desired sequential access. Walking backwards, or multiple callers, still hits a slow linear search for each page. One solution is to store each successful lookup in a radix tree. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 57 ++++-------- drivers/gpu/drm/i915/i915_gem.c | 149 ++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +- 4 files changed, 154 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6ef2068ed399..33ce6db85ce4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2261,9 +2261,12 @@ struct drm_i915_gem_object { struct sg_table *pages; int pages_pin_count; - struct get_page { - struct scatterlist *sg; - int last; + struct i915_gem_object_page_iter { + struct scatterlist *sg_pos; + unsigned long sg_idx; + + struct radix_tree_root radix; + struct mutex lock; } get_page; void *mapping; @@ -3152,45 +3155,21 @@ static inline int __sg_page_count(struct scatterlist *sg) return sg->length >> PAGE_SHIFT; } -struct page * -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n); - -static inline dma_addr_t -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n) -{ - if (n < obj->get_page.last) { - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; - } - - while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { - obj->get_page.last += __sg_page_count(obj->get_page.sg++); - if (unlikely(sg_is_chain(obj->get_page.sg))) - obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); - } - - return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT); -} - -static inline struct page * -i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) -{ - if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT)) - return NULL; +struct scatterlist * +i915_gem_object_get_sg(struct drm_i915_gem_object *obj, + unsigned long n, unsigned int *offset); - if (n < obj->get_page.last) { - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; - } +struct page * +i915_gem_object_get_page(struct drm_i915_gem_object *obj, + unsigned long n); - while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { - obj->get_page.last += __sg_page_count(obj->get_page.sg++); - if (unlikely(sg_is_chain(obj->get_page.sg))) - obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); - } +struct page * +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, + unsigned long n); - return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last); -} +dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, + unsigned long n); static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0d8ae6c54e5a..253f92b95400 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2292,6 +2292,15 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) kfree(obj->pages); } +static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) +{ + struct radix_tree_iter iter; + void **slot; + + radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0) + radix_tree_delete(&obj->get_page.radix, iter.index); +} + int i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { @@ -2324,6 +2333,8 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) obj->mapping = NULL; } + __i915_gem_object_reset_page_iter(obj); + ops->put_pages(obj); obj->pages = NULL; @@ -2488,8 +2499,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; + obj->get_page.sg_pos = obj->pages->sgl; + obj->get_page.sg_idx = 0; return 0; } @@ -4234,6 +4245,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->frontbuffer_ggtt_origin = ORIGIN_GTT; obj->madv = I915_MADV_WILLNEED; + INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL); + mutex_init(&obj->get_page.lock); i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); } @@ -4884,21 +4897,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, } } -/* Like i915_gem_object_get_page(), but mark the returned page dirty */ -struct page * -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) -{ - struct page *page; - - /* Only default objects have per-page dirty tracking */ - if (WARN_ON(!i915_gem_object_has_struct_page(obj))) - return NULL; - - page = i915_gem_object_get_page(obj, n); - set_page_dirty(page); - return page; -} - /* Allocate a new GEM object and fill it with the supplied data */ struct drm_i915_gem_object * i915_gem_object_create_from_data(struct drm_device *dev, @@ -4939,3 +4937,120 @@ fail: i915_gem_object_put(obj); return ERR_PTR(ret); } + +static struct scatterlist * +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, + unsigned long n, unsigned int *offset) +{ + struct scatterlist *sg = obj->pages->sgl; + int idx = 0; + + while (idx + __sg_page_count(sg) <= n) { + idx += __sg_page_count(sg++); + if (unlikely(sg_is_chain(sg))) + sg = sg_chain_ptr(sg); + } + + *offset = n - idx; + return sg; +} + +struct scatterlist * +i915_gem_object_get_sg(struct drm_i915_gem_object *obj, + unsigned long n, + unsigned int *offset) +{ + struct i915_gem_object_page_iter *iter = &obj->get_page; + struct scatterlist *sg; + + GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); + GEM_BUG_ON(obj->pages_pin_count == 0); + + if (n < READ_ONCE(iter->sg_idx)) + goto lookup; + + mutex_lock(&iter->lock); + if (n >= iter->sg_idx && + n < iter->sg_idx + __sg_page_count(iter->sg_pos)) { + sg = iter->sg_pos; + *offset = n - iter->sg_idx; + mutex_unlock(&iter->lock); + return sg; + } + + while (iter->sg_idx <= n) { + unsigned long exception; + unsigned int count, i; + + radix_tree_insert(&iter->radix, + iter->sg_idx, + iter->sg_pos); + + exception = + RADIX_TREE_EXCEPTIONAL_ENTRY | + iter->sg_idx << RADIX_TREE_EXCEPTIONAL_SHIFT; + count = __sg_page_count(iter->sg_pos); + for (i = 1; i < count; i++) + radix_tree_insert(&iter->radix, + iter->sg_idx + i, + (void *)exception); + + iter->sg_idx += count; + iter->sg_pos = __sg_next(iter->sg_pos); + } + mutex_unlock(&iter->lock); + +lookup: + rcu_read_lock(); + sg = radix_tree_lookup(&iter->radix, n); + rcu_read_unlock(); + + if (unlikely(!sg)) + return __i915_gem_object_get_sg(obj, n, offset); + + *offset = 0; + if (unlikely(radix_tree_exception(sg))) { + unsigned long base = + (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; + sg = radix_tree_lookup(&iter->radix, base); + *offset = n - base; + } + return sg; +} + +struct page * +i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned long n) +{ + struct scatterlist *sg; + unsigned int offset; + + GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); + + sg = i915_gem_object_get_sg(obj, n, &offset); + return nth_page(sg_page(sg), offset); +} + +/* Like i915_gem_object_get_page(), but mark the returned page dirty */ +struct page * +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, + unsigned long n) +{ + struct page *page; + + page = i915_gem_object_get_page(obj, n); + if (!obj->dirty) + set_page_dirty(page); + + return page; +} + +dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, + unsigned long n) +{ + struct scatterlist *sg; + unsigned int offset; + + sg = i915_gem_object_get_sg(obj, n, &offset); + return sg_dma_address(sg) + (offset << PAGE_SHIFT); +} diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 59989e8ee5dc..24bad4e60ef0 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -594,8 +594,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev, if (obj->pages == NULL) goto cleanup; - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; + obj->get_page.sg_pos = obj->pages->sgl; + obj->get_page.sg_idx = 0; i915_gem_object_pin_pages(obj); obj->stolen = stolen; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 1c891b92ac80..cb95789da76e 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) if (ret == 0) { list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list); - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; + obj->get_page.sg_pos = obj->pages->sgl; + obj->get_page.sg_idx = 0; pinned = 0; } } -- 2.9.3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx