[PATCH 6.6 04/11] Revert "workqueue: Introduce struct wq_node_nr_active"

Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> · Wed, 3 Apr 2024 19:55:53 +0200

6.6-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

This reverts commit b522229a56941adac1ea1da6593b2b5c734b5359 which is
commit 91ccc6e7233bb10a9c176aa4cc70d6f432a441a5 upstream.

The workqueue patches backported to 6.6.y caused some reported
regressions, so revert them for now.

Reported-by: Thorsten Leemhuis <regressions@xxxxxxxxxxxxx>
Cc: Tejun Heo <tj@xxxxxxxxxx>
Cc: Marek Szyprowski <m.szyprowski@xxxxxxxxxxx>
Cc: Nathan Chancellor <nathan@xxxxxxxxxx>
Cc: Sasha Levin <sashal@xxxxxxxxxx>
Cc: Audra Mitchell <audra@xxxxxxxxxx>
Link: https://lore.kernel.org/all/ce4c2f67-c298-48a0-87a3-f933d646c73b@xxxxxxxxxxxxx/
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
 kernel/workqueue.c |  142 ++---------------------------------------------------
 1 file changed, 7 insertions(+), 135 deletions(-)

--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -281,16 +281,6 @@ struct wq_flusher {
 struct wq_device;
 
 /*
- * Unlike in a per-cpu workqueue where max_active limits its concurrency level
- * on each CPU, in an unbound workqueue, max_active applies to the whole system.
- * As sharing a single nr_active across multiple sockets can be very expensive,
- * the counting and enforcement is per NUMA node.
- */
-struct wq_node_nr_active {
-	atomic_t		nr;		/* per-node nr_active count */
-};
-
-/*
  * The externally visible workqueue.  It relays the issued work items to
  * the appropriate worker_pool through its pool_workqueues.
  */
@@ -336,7 +326,6 @@ struct workqueue_struct {
 	/* hot fields used during command issue, aligned to cacheline */
 	unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */
 	struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */
-	struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
 };
 
 static struct kmem_cache *pwq_cache;
@@ -1427,31 +1416,6 @@ work_func_t wq_worker_last_func(struct t
 }
 
 /**
- * wq_node_nr_active - Determine wq_node_nr_active to use
- * @wq: workqueue of interest
- * @node: NUMA node, can be %NUMA_NO_NODE
- *
- * Determine wq_node_nr_active to use for @wq on @node. Returns:
- *
- * - %NULL for per-cpu workqueues as they don't need to use shared nr_active.
- *
- * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE.
- *
- * - Otherwise, node_nr_active[@node].
- */
-static struct wq_node_nr_active *wq_node_nr_active(struct workqueue_struct *wq,
-						   int node)
-{
-	if (!(wq->flags & WQ_UNBOUND))
-		return NULL;
-
-	if (node == NUMA_NO_NODE)
-		node = nr_node_ids;
-
-	return wq->node_nr_active[node];
-}
-
-/**
  * get_pwq - get an extra reference on the specified pool_workqueue
  * @pwq: pool_workqueue to get
  *
@@ -1532,17 +1496,12 @@ static bool pwq_activate_work(struct poo
 			      struct work_struct *work)
 {
 	struct worker_pool *pool = pwq->pool;
-	struct wq_node_nr_active *nna;
 
 	lockdep_assert_held(&pool->lock);
 
 	if (!(*work_data_bits(work) & WORK_STRUCT_INACTIVE))
 		return false;
 
-	nna = wq_node_nr_active(pwq->wq, pool->node);
-	if (nna)
-		atomic_inc(&nna->nr);
-
 	pwq->nr_active++;
 	__pwq_activate_work(pwq, work);
 	return true;
@@ -1559,18 +1518,14 @@ static bool pwq_tryinc_nr_active(struct
 {
 	struct workqueue_struct *wq = pwq->wq;
 	struct worker_pool *pool = pwq->pool;
-	struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node);
 	bool obtained;
 
 	lockdep_assert_held(&pool->lock);
 
 	obtained = pwq->nr_active < READ_ONCE(wq->max_active);
 
-	if (obtained) {
+	if (obtained)
 		pwq->nr_active++;
-		if (nna)
-			atomic_inc(&nna->nr);
-	}
 	return obtained;
 }
 
@@ -1607,26 +1562,10 @@ static bool pwq_activate_first_inactive(
 static void pwq_dec_nr_active(struct pool_workqueue *pwq)
 {
 	struct worker_pool *pool = pwq->pool;
-	struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node);
 
 	lockdep_assert_held(&pool->lock);
 
-	/*
-	 * @pwq->nr_active should be decremented for both percpu and unbound
-	 * workqueues.
-	 */
 	pwq->nr_active--;
-
-	/*
-	 * For a percpu workqueue, it's simple. Just need to kick the first
-	 * inactive work item on @pwq itself.
-	 */
-	if (!nna) {
-		pwq_activate_first_inactive(pwq);
-		return;
-	}
-
-	atomic_dec(&nna->nr);
 	pwq_activate_first_inactive(pwq);
 }
 
@@ -4081,63 +4020,11 @@ static void wq_free_lockdep(struct workq
 }
 #endif
 
-static void free_node_nr_active(struct wq_node_nr_active **nna_ar)
-{
-	int node;
-
-	for_each_node(node) {
-		kfree(nna_ar[node]);
-		nna_ar[node] = NULL;
-	}
-
-	kfree(nna_ar[nr_node_ids]);
-	nna_ar[nr_node_ids] = NULL;
-}
-
-static void init_node_nr_active(struct wq_node_nr_active *nna)
-{
-	atomic_set(&nna->nr, 0);
-}
-
-/*
- * Each node's nr_active counter will be accessed mostly from its own node and
- * should be allocated in the node.
- */
-static int alloc_node_nr_active(struct wq_node_nr_active **nna_ar)
-{
-	struct wq_node_nr_active *nna;
-	int node;
-
-	for_each_node(node) {
-		nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, node);
-		if (!nna)
-			goto err_free;
-		init_node_nr_active(nna);
-		nna_ar[node] = nna;
-	}
-
-	/* [nr_node_ids] is used as the fallback */
-	nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, NUMA_NO_NODE);
-	if (!nna)
-		goto err_free;
-	init_node_nr_active(nna);
-	nna_ar[nr_node_ids] = nna;
-
-	return 0;
-
-err_free:
-	free_node_nr_active(nna_ar);
-	return -ENOMEM;
-}
-
 static void rcu_free_wq(struct rcu_head *rcu)
 {
 	struct workqueue_struct *wq =
 		container_of(rcu, struct workqueue_struct, rcu);
 
-	if (wq->flags & WQ_UNBOUND)
-		free_node_nr_active(wq->node_nr_active);
-
 	wq_free_lockdep(wq);
 	free_percpu(wq->cpu_pwq);
 	free_workqueue_attrs(wq->unbound_attrs);
@@ -4889,8 +4776,7 @@ struct workqueue_struct *alloc_workqueue
 {
 	va_list args;
 	struct workqueue_struct *wq;
-	size_t wq_size;
-	int name_len;
+	int len;
 
 	/*
 	 * Unbound && max_active == 1 used to imply ordered, which is no longer
@@ -4906,12 +4792,7 @@ struct workqueue_struct *alloc_workqueue
 		flags |= WQ_UNBOUND;
 
 	/* allocate wq and format name */
-	if (flags & WQ_UNBOUND)
-		wq_size = struct_size(wq, node_nr_active, nr_node_ids + 1);
-	else
-		wq_size = sizeof(*wq);
-
-	wq = kzalloc(wq_size, GFP_KERNEL);
+	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
 		return NULL;
 
@@ -4922,12 +4803,11 @@ struct workqueue_struct *alloc_workqueue
 	}
 
 	va_start(args, max_active);
-	name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
+	len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
 	va_end(args);
 
-	if (name_len >= WQ_NAME_LEN)
-		pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n",
-			     wq->name);
+	if (len >= WQ_NAME_LEN)
+		pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", wq->name);
 
 	max_active = max_active ?: WQ_DFL_ACTIVE;
 	max_active = wq_clamp_max_active(max_active, flags, wq->name);
@@ -4946,13 +4826,8 @@ struct workqueue_struct *alloc_workqueue
 	wq_init_lockdep(wq);
 	INIT_LIST_HEAD(&wq->list);
 
-	if (flags & WQ_UNBOUND) {
-		if (alloc_node_nr_active(wq->node_nr_active) < 0)
-			goto err_unreg_lockdep;
-	}
-
 	if (alloc_and_link_pwqs(wq) < 0)
-		goto err_free_node_nr_active;
+		goto err_unreg_lockdep;
 
 	if (wq_online && init_rescuer(wq) < 0)
 		goto err_destroy;
@@ -4977,9 +4852,6 @@ struct workqueue_struct *alloc_workqueue
 
 	return wq;
 
-err_free_node_nr_active:
-	if (wq->flags & WQ_UNBOUND)
-		free_node_nr_active(wq->node_nr_active);
 err_unreg_lockdep:
 	wq_unregister_lockdep(wq);
 	wq_free_lockdep(wq);