[PATCH 3/3] debugobjects: Use hlist_cut_number() to optimize performance and improve readability

Zhen Lei <thunder.leizhen@xxxxxxxxxx> · Wed, 4 Sep 2024 21:41:52 +0800

Currently, there are multiple instances where several nodes are extracted
from one list and added to another list. One by one extraction, and then
one by one splicing, not only low efficiency, readability is also poor.
The work can be done well with hlist_cut_number() and hlist_splice_init(),
which move the entire sublist at once.

When the number of nodes expected to be moved is less than or equal to 0,
or the source list is empty, hlist_cut_number() safely returns 0. The
splicing is performed only when the return value of hlist_cut_number() is
greater than 0.

For two calls to hlist_cut_number() in __free_object(), the result is
obviously positive, the check of the return value is omitted.

Signed-off-by: Zhen Lei <thunder.leizhen@xxxxxxxxxx>
---
 lib/debugobjects.c | 115 +++++++++++++++++++--------------------------
 1 file changed, 48 insertions(+), 67 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index db8f6b4b8b3151a..1cb9458af3d0b4f 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -128,8 +128,9 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
 static void fill_pool(void)
 {
 	gfp_t gfp = __GFP_HIGH | __GFP_NOWARN;
-	struct debug_obj *obj;
+	HLIST_HEAD(freelist);
 	unsigned long flags;
+	int cnt;
 
 	/*
 	 * The upper-layer function uses only one node at a time. If there are
@@ -152,17 +153,19 @@ static void fill_pool(void)
 	 * the WRITE_ONCE() in pool_lock critical sections.
 	 */
 	if (READ_ONCE(obj_nr_tofree)) {
+		struct hlist_node *last;
+
 		raw_spin_lock_irqsave(&pool_lock, flags);
 		/*
 		 * Recheck with the lock held as the worker thread might have
 		 * won the race and freed the global free list already.
 		 */
-		while (obj_nr_tofree && (obj_pool_free < debug_objects_pool_min_level)) {
-			obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
-			hlist_del(&obj->node);
-			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
-			hlist_add_head(&obj->node, &obj_pool);
-			WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
+		cnt = min(obj_nr_tofree, debug_objects_pool_min_level - obj_pool_free);
+		cnt = hlist_cut_number(&freelist, &obj_to_free, cnt, &last);
+		if (cnt > 0) {
+			hlist_splice_init(&freelist, last, &obj_pool);
+			WRITE_ONCE(obj_pool_free, obj_pool_free + cnt);
+			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - cnt);
 		}
 		raw_spin_unlock_irqrestore(&pool_lock, flags);
 	}
@@ -172,8 +175,6 @@ static void fill_pool(void)
 
 	while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) {
 		struct debug_obj *new, *last = NULL;
-		HLIST_HEAD(freelist);
-		int cnt;
 
 		for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
 			new = kmem_cache_zalloc(obj_cache, gfp);
@@ -245,30 +246,28 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
 	raw_spin_lock(&pool_lock);
 	obj = __alloc_object(&obj_pool);
 	if (obj) {
-		obj_pool_used++;
-		WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
+		int cnt = 0;
 
 		/*
 		 * Looking ahead, allocate one batch of debug objects and
 		 * put them into the percpu free pool.
 		 */
 		if (likely(obj_cache)) {
-			int i;
-
-			for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
-				struct debug_obj *obj2;
-
-				obj2 = __alloc_object(&obj_pool);
-				if (!obj2)
-					break;
-				hlist_add_head(&obj2->node,
-					       &percpu_pool->free_objs);
-				percpu_pool->obj_free++;
-				obj_pool_used++;
-				WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
+			struct hlist_node *last;
+			HLIST_HEAD(freelist);
+
+			cnt = hlist_cut_number(&freelist, &obj_pool, ODEBUG_BATCH_SIZE, &last);
+			if (cnt > 0) {
+				hlist_splice_init(&freelist, last, &percpu_pool->free_objs);
+				percpu_pool->obj_free += cnt;
 			}
 		}
 
+		/* add one for obj */
+		cnt++;
+		obj_pool_used += cnt;
+		WRITE_ONCE(obj_pool_free, obj_pool_free - cnt);
+
 		if (obj_pool_used > obj_pool_max_used)
 			obj_pool_max_used = obj_pool_used;
 
@@ -300,6 +299,7 @@ static void free_obj_work(struct work_struct *work)
 	struct debug_obj *obj;
 	unsigned long flags;
 	HLIST_HEAD(tofree);
+	int cnt;
 
 	WRITE_ONCE(obj_freeing, false);
 	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
@@ -315,12 +315,12 @@ static void free_obj_work(struct work_struct *work)
 	 * may be gearing up to use more and more objects, don't free any
 	 * of them until the next round.
 	 */
-	while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
-		obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
-		hlist_del(&obj->node);
-		hlist_add_head(&obj->node, &obj_pool);
-		WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
+	cnt = min(obj_nr_tofree, debug_objects_pool_size - obj_pool_free);
+	cnt = hlist_cut_number(&tofree, &obj_to_free, cnt, &tmp);
+	if (cnt > 0) {
+		hlist_splice_init(&tofree, tmp, &obj_pool);
+		WRITE_ONCE(obj_pool_free, obj_pool_free + cnt);
+		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - cnt);
 	}
 	raw_spin_unlock_irqrestore(&pool_lock, flags);
 	return;
@@ -346,11 +346,12 @@ static void free_obj_work(struct work_struct *work)
 
 static void __free_object(struct debug_obj *obj)
 {
-	struct debug_obj *objs[ODEBUG_BATCH_SIZE];
 	struct debug_percpu_free *percpu_pool;
-	int lookahead_count = 0;
+	struct hlist_node *last;
+	HLIST_HEAD(freelist);
 	unsigned long flags;
 	bool work;
+	int cnt;
 
 	local_irq_save(flags);
 	if (!obj_cache)
@@ -371,56 +372,36 @@ static void __free_object(struct debug_obj *obj)
 	 * As the percpu pool is full, look ahead and pull out a batch
 	 * of objects from the percpu pool and free them as well.
 	 */
-	for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) {
-		objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs);
-		if (!objs[lookahead_count])
-			break;
-		percpu_pool->obj_free--;
-	}
+	cnt = hlist_cut_number(&freelist, &percpu_pool->free_objs, ODEBUG_BATCH_SIZE, &last);
+	percpu_pool->obj_free -= cnt;
+
+	/* add one for obj */
+	cnt++;
+	hlist_add_head(&obj->node, &freelist);
 
 free_to_obj_pool:
 	raw_spin_lock(&pool_lock);
 	work = (obj_pool_free > debug_objects_pool_size) && obj_cache &&
 	       (obj_nr_tofree < ODEBUG_FREE_WORK_MAX);
-	obj_pool_used--;
+	obj_pool_used -= cnt;
 
 	if (work) {
-		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
-		hlist_add_head(&obj->node, &obj_to_free);
-		if (lookahead_count) {
-			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count);
-			obj_pool_used -= lookahead_count;
-			while (lookahead_count) {
-				hlist_add_head(&objs[--lookahead_count]->node,
-					       &obj_to_free);
-			}
-		}
+		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + cnt);
+		hlist_splice_init(&freelist, last, &obj_to_free);
 
 		if ((obj_pool_free > debug_objects_pool_size) &&
 		    (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) {
-			int i;
-
 			/*
 			 * Free one more batch of objects from obj_pool.
 			 */
-			for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
-				obj = __alloc_object(&obj_pool);
-				hlist_add_head(&obj->node, &obj_to_free);
-				WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
-				WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
-			}
+			cnt = hlist_cut_number(&freelist, &obj_pool, ODEBUG_BATCH_SIZE, &last);
+			hlist_splice_init(&freelist, last, &obj_to_free);
+			WRITE_ONCE(obj_pool_free, obj_pool_free - cnt);
+			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + cnt);
 		}
 	} else {
-		WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		hlist_add_head(&obj->node, &obj_pool);
-		if (lookahead_count) {
-			WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count);
-			obj_pool_used -= lookahead_count;
-			while (lookahead_count) {
-				hlist_add_head(&objs[--lookahead_count]->node,
-					       &obj_pool);
-			}
-		}
+		WRITE_ONCE(obj_pool_free, obj_pool_free + cnt);
+		hlist_splice_init(&freelist, last, &obj_pool);
 	}
 	raw_spin_unlock(&pool_lock);
 	local_irq_restore(flags);
-- 
2.34.1