Userspace can still abuse alignment while allocating buffer object to slow down rb_hole_addr rbtree search. This patch improves search in fragmented rb_hole_addr rbtree by storing maximum subtree hole alignment and use that to skip a complete subtree if that subtree can not fit a (size + alignment) request. With this patch applied, 50k bo allocs of size 4k and alignment 9k took ~0.24 sec on amdgpu, compared to 27 sec without it. Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxx> --- drivers/gpu/drm/drm_mm.c | 66 ++++++++++++++++++++++++++++++++++------ include/drm/drm_mm.h | 1 + 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 91e90c635e05..1af0a211b660 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -212,8 +212,11 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, &drm_mm_interval_tree_augment); } +#define DRM_MM_ALIGN_SHIFT 6 #define HOLE_SIZE(NODE) ((NODE)->hole_size) #define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE)) +#define HOLE_SIZE_ALIGN(NODE) ((NODE->hole_size << DRM_MM_ALIGN_SHIFT) | \ + ffs(HOLE_ADDR(NODE))) static u64 rb_to_hole_size(struct rb_node *rb) { @@ -241,6 +244,33 @@ static void insert_hole_size(struct rb_root_cached *root, rb_insert_color_cached(&node->rb_hole_size, root, first); } +static inline bool +augment_callbacks_compute_max_hole_align(struct drm_mm_node *node, bool exit) +{ + struct drm_mm_node *child; + u64 max = HOLE_SIZE_ALIGN(node); + + if (node->rb_hole_addr.rb_left) { + child = rb_entry(node->rb_hole_addr.rb_left, struct drm_mm_node, + rb_hole_addr); + if (child->subtree_max_hole_align > max) + max = child->subtree_max_hole_align; + } + + if (node->rb_hole_addr.rb_right) { + child = rb_entry(node->rb_hole_addr.rb_right, + struct drm_mm_node, rb_hole_addr); + if (child->subtree_max_hole_align > max) + max = child->subtree_max_hole_align; + } + + if (exit && node->subtree_max_hole_align == max) + return true; + + node->subtree_max_hole_align = max; + return false; +} + static inline bool augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit) { @@ -271,10 +301,14 @@ augment_callbacks_compute_max_hole(struct drm_mm_node *node, bool exit) static inline void augment_callbacks_propagate(struct rb_node *rb, struct rb_node *stop) { + bool compute_max_hole, compute_max_hole_align; + while (rb != stop) { struct drm_mm_node *node = rb_entry(rb, struct drm_mm_node, rb_hole_addr); - if (augment_callbacks_compute_max_hole(node, true)) + compute_max_hole = augment_callbacks_compute_max_hole(node, true); + compute_max_hole_align = augment_callbacks_compute_max_hole_align(node, true); + if (compute_max_hole && compute_max_hole_align) break; rb = rb_parent(&node->rb_hole_addr); @@ -290,6 +324,7 @@ augment_callbacks_copy(struct rb_node *rb_old, struct rb_node *rb_new) rb_hole_addr); new->subtree_max_hole = old->subtree_max_hole; + new->subtree_max_hole_align = old->subtree_max_hole_align; } static void @@ -301,7 +336,9 @@ augment_callbacks_rotate(struct rb_node *rb_old, struct rb_node *rb_new) rb_hole_addr); new->subtree_max_hole = old->subtree_max_hole; + new->subtree_max_hole_align = old->subtree_max_hole_align; augment_callbacks_compute_max_hole(old, false); + augment_callbacks_compute_max_hole_align(old, false); } static const struct rb_augment_callbacks augment_callbacks = { @@ -313,7 +350,9 @@ static const struct rb_augment_callbacks augment_callbacks = { static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node) { struct rb_node **link = &root->rb_node, *rb_parent = NULL; - u64 start = HOLE_ADDR(node), subtree_max_hole = node->subtree_max_hole; + u64 start = HOLE_ADDR(node); + u64 subtree_max_hole = node->subtree_max_hole; + u64 subtree_max_hole_align = node->subtree_max_hole_align; struct drm_mm_node *parent; while (*link) { @@ -322,6 +361,9 @@ static void insert_hole_addr(struct rb_root *root, struct drm_mm_node *node) if (parent->subtree_max_hole < subtree_max_hole) parent->subtree_max_hole = subtree_max_hole; + if (parent->subtree_max_hole_align < subtree_max_hole_align) + parent->subtree_max_hole_align = subtree_max_hole_align; + if (start < HOLE_ADDR(parent)) link = &parent->rb_hole_addr.rb_left; else @@ -339,6 +381,7 @@ static void add_hole(struct drm_mm_node *node) node->hole_size = __drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node); node->subtree_max_hole = node->hole_size; + node->subtree_max_hole_align = HOLE_SIZE_ALIGN(node); DRM_MM_BUG_ON(!drm_mm_hole_follows(node)); insert_hole_size(&mm->holes_size, node); @@ -355,8 +398,10 @@ static void rm_hole(struct drm_mm_node *node) rb_erase_cached(&node->rb_hole_size, &node->mm->holes_size); rb_erase_augmented(&node->rb_hole_addr, &node->mm->holes_addr, &augment_callbacks); + node->hole_size = 0; node->subtree_max_hole = 0; + node->subtree_max_hole_align = 0; DRM_MM_BUG_ON(drm_mm_hole_follows(node)); } @@ -458,10 +503,11 @@ first_hole(struct drm_mm *mm, * else return parent of @entry */ static struct drm_mm_node * -next_hole_high_addr(struct drm_mm_node *entry, u64 size) +next_hole_high_addr(struct drm_mm_node *entry, u64 size, u64 alignment) { struct rb_node *rb_node, *left_rb_node, *parent_rb_node; struct drm_mm_node *left_node; + u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT; if (!entry) return NULL; @@ -473,6 +519,7 @@ next_hole_high_addr(struct drm_mm_node *entry, u64 size) left_node = rb_entry(left_rb_node, struct drm_mm_node, rb_hole_addr); if ((left_node->subtree_max_hole < size || + left_node->subtree_max_hole_align < req_align || entry->size == entry->subtree_max_hole) && parent_rb_node && parent_rb_node->rb_left != rb_node) return rb_hole_addr_to_node(parent_rb_node); @@ -498,10 +545,11 @@ next_hole_high_addr(struct drm_mm_node *entry, u64 size) * else return parent of @entry */ static struct drm_mm_node * -next_hole_low_addr(struct drm_mm_node *entry, u64 size) +next_hole_low_addr(struct drm_mm_node *entry, u64 size, u64 alignment) { struct rb_node *rb_node, *right_rb_node, *parent_rb_node; struct drm_mm_node *right_node; + u64 req_align = (size + alignment) << DRM_MM_ALIGN_SHIFT; if (!entry) return NULL; @@ -513,6 +561,7 @@ next_hole_low_addr(struct drm_mm_node *entry, u64 size) right_node = rb_entry(right_rb_node, struct drm_mm_node, rb_hole_addr); if ((right_node->subtree_max_hole < size || + right_node->subtree_max_hole_align < req_align || entry->size == entry->subtree_max_hole) && parent_rb_node && parent_rb_node->rb_right != rb_node) return rb_hole_addr_to_node(parent_rb_node); @@ -524,7 +573,7 @@ next_hole_low_addr(struct drm_mm_node *entry, u64 size) static struct drm_mm_node * next_hole(struct drm_mm *mm, struct drm_mm_node *node, - u64 size, + u64 size, u64 alignment, enum drm_mm_insert_mode mode) { switch (mode) { @@ -533,10 +582,10 @@ next_hole(struct drm_mm *mm, return rb_hole_size_to_node(rb_prev(&node->rb_hole_size)); case DRM_MM_INSERT_LOW: - return next_hole_low_addr(node, size); + return next_hole_low_addr(node, size, alignment); case DRM_MM_INSERT_HIGH: - return next_hole_high_addr(node, size); + return next_hole_high_addr(node, size, alignment); case DRM_MM_INSERT_EVICT: node = list_next_entry(node, hole_stack); @@ -650,7 +699,7 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0; for (hole = first_hole(mm, range_start, range_end, size, mode); hole; - hole = once ? NULL : next_hole(mm, hole, size, mode)) { + hole = once ? NULL : next_hole(mm, hole, size, alignment, mode)) { u64 hole_start = __drm_mm_hole_node_start(hole); u64 hole_end = hole_start + hole->hole_size; u64 adj_start, adj_end; @@ -713,7 +762,6 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm, add_hole(hole); if (adj_start + size < hole_end) add_hole(node); - save_stack(node); return 0; } diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index a01bc6fac83c..c280b535d9a9 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -169,6 +169,7 @@ struct drm_mm_node { u64 __subtree_last; u64 hole_size; u64 subtree_max_hole; + u64 subtree_max_hole_align; unsigned long flags; #define DRM_MM_NODE_ALLOCATED_BIT 0 #define DRM_MM_NODE_SCANNED_BIT 1 -- 2.26.2 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel