* Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote: > I just tried to find a valid BUG_ON() that would make me go "yeah, that's > actually worth it", and couldn't really find one. Yeah, there are several > ones in the scheduler that make me go "ok, if that triggers, the machine > is dead anyway", so in that sense there are certainly BUG_ON()s that > don't _hurt_. That's a mostly accidental, historical accumulation of BUG_ON()s - I believe we can change all of them to WARN_ON() via the patch below. As far as the scheduler is concerned, we don't need any BUG_ON()s. [ This assumes that printk() itself is atomic and non-recursive wrt. the scheduler in these code paths ... ] Thanks, Ingo ===============> From: Ingo Molnar <mingo@xxxxxxxxxx> Date: Thu, 11 Aug 2022 08:54:52 +0200 Subject: [PATCH] sched/all: Change BUG_ON() instances to WARN_ON() There's no good reason to crash a user's system with a BUG_ON(), chances are high that they'll never even see the crash message on Xorg, and it won't make it into the syslog either. By using a WARN_ON() we at least give the user a chance to report any bugs triggered here - instead of getting silent hangs. None of these WARN_ON()s are supposed to trigger, ever - so we ignore cases where a NULL check is done via a BUG_ON() and we let a NULL pointer through after a WARN_ON(). There's one exception: WARN_ON() arguments with side-effects, such as locking - in this case we use the return value of the WARN_ON(), such as in: - BUG_ON(!lock_task_sighand(p, &flags)); + if (WARN_ON(!lock_task_sighand(p, &flags))) + return; Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> --- kernel/sched/autogroup.c | 3 ++- kernel/sched/core.c | 2 +- kernel/sched/cpupri.c | 2 +- kernel/sched/deadline.c | 26 +++++++++++++------------- kernel/sched/fair.c | 10 +++++----- kernel/sched/rt.c | 2 +- kernel/sched/sched.h | 6 +++--- 7 files changed, 26 insertions(+), 25 deletions(-) diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 4ebaf97f7bd8..13f6b6da35a0 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -161,7 +161,8 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) struct task_struct *t; unsigned long flags; - BUG_ON(!lock_task_sighand(p, &flags)); + if (WARN_ON(!lock_task_sighand(p, &flags))) + return; prev = p->signal->autogroup; if (prev == ag) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d3d61cbb6b3c..f84206bf42cd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2328,7 +2328,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, rq = cpu_rq(new_cpu); rq_lock(rq, rf); - BUG_ON(task_cpu(p) != new_cpu); + WARN_ON(task_cpu(p) != new_cpu); activate_task(rq, p, 0); check_preempt_curr(rq, p, 0); diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index fa9ce9d83683..9f719e4ea081 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -147,7 +147,7 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p, int task_pri = convert_prio(p->prio); int idx, cpu; - BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); + WARN_ON(task_pri >= CPUPRI_NR_PRIORITIES); for (idx = 0; idx < task_pri; idx++) { diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1d9c90958baa..fb234077c317 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -310,7 +310,7 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw) { struct rq *rq; - BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV); + WARN_ON(p->dl.flags & SCHED_FLAG_SUGOV); if (task_on_rq_queued(p)) return; @@ -607,7 +607,7 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) { struct rb_node *leftmost; - BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); + WARN_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); leftmost = rb_add_cached(&p->pushable_dl_tasks, &rq->dl.pushable_dl_tasks_root, @@ -684,7 +684,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p * Failed to find any suitable CPU. * The task will never come back! */ - BUG_ON(dl_bandwidth_enabled()); + WARN_ON(dl_bandwidth_enabled()); /* * If admission control is disabled we @@ -830,7 +830,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) struct dl_rq *dl_rq = dl_rq_of_se(dl_se); struct rq *rq = rq_of_dl_rq(dl_rq); - BUG_ON(pi_of(dl_se)->dl_runtime <= 0); + WARN_ON(pi_of(dl_se)->dl_runtime <= 0); /* * This could be the case for a !-dl task that is boosted. @@ -1616,7 +1616,7 @@ static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) { struct dl_rq *dl_rq = dl_rq_of_se(dl_se); - BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); + WARN_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less); @@ -1640,7 +1640,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) static void enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) { - BUG_ON(on_dl_rq(dl_se)); + WARN_ON(on_dl_rq(dl_se)); update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags); @@ -2017,7 +2017,7 @@ static struct task_struct *pick_task_dl(struct rq *rq) return NULL; dl_se = pick_next_dl_entity(dl_rq); - BUG_ON(!dl_se); + WARN_ON(!dl_se); p = dl_task_of(dl_se); return p; @@ -2277,12 +2277,12 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root)); - BUG_ON(rq->cpu != task_cpu(p)); - BUG_ON(task_current(rq, p)); - BUG_ON(p->nr_cpus_allowed <= 1); + WARN_ON(rq->cpu != task_cpu(p)); + WARN_ON(task_current(rq, p)); + WARN_ON(p->nr_cpus_allowed <= 1); - BUG_ON(!task_on_rq_queued(p)); - BUG_ON(!dl_task(p)); + WARN_ON(!task_on_rq_queued(p)); + WARN_ON(!dl_task(p)); return p; } @@ -2492,7 +2492,7 @@ static void set_cpus_allowed_dl(struct task_struct *p, struct root_domain *src_rd; struct rq *rq; - BUG_ON(!dl_task(p)); + WARN_ON(!dl_task(p)); rq = task_rq(p); src_rd = rq->rd; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index da388657d5ac..00c01b3232b9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2600,7 +2600,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, if (!join) return; - BUG_ON(irqs_disabled()); + WARN_ON(irqs_disabled()); double_lock_irq(&my_grp->lock, &grp->lock); for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { @@ -7279,7 +7279,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; find_matching_se(&se, &pse); - BUG_ON(!pse); + WARN_ON(!pse); cse_is_idle = se_is_idle(se); pse_is_idle = se_is_idle(pse); @@ -8159,7 +8159,7 @@ static void attach_task(struct rq *rq, struct task_struct *p) { lockdep_assert_rq_held(rq); - BUG_ON(task_rq(p) != rq); + WARN_ON(task_rq(p) != rq); activate_task(rq, p, ENQUEUE_NOCLOCK); check_preempt_curr(rq, p, 0); } @@ -10134,7 +10134,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, goto out_balanced; } - BUG_ON(busiest == env.dst_rq); + WARN_ON(busiest == env.dst_rq); schedstat_add(sd->lb_imbalance[idle], env.imbalance); @@ -10430,7 +10430,7 @@ static int active_load_balance_cpu_stop(void *data) * we need to fix it. Originally reported by * Bjorn Helgaas on a 128-CPU setup. */ - BUG_ON(busiest_rq == target_rq); + WARN_ON(busiest_rq == target_rq); /* Search for an sd spanning us and the target CPU. */ rcu_read_lock(); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 054b6711e961..acf9f5ce0c4a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -843,7 +843,7 @@ static void __disable_runtime(struct rq *rq) * We cannot be left wanting - that would mean some runtime * leaked out of the system. */ - BUG_ON(want); + WARN_ON(want); balanced: /* * Disable all the borrow logic by pretending we have inf diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b0bf2287dd9d..8e5df3bc3483 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2699,8 +2699,8 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) __acquires(rq1->lock) __acquires(rq2->lock) { - BUG_ON(!irqs_disabled()); - BUG_ON(rq1 != rq2); + WARN_ON(!irqs_disabled()); + WARN_ON(rq1 != rq2); raw_spin_rq_lock(rq1); __acquire(rq2->lock); /* Fake it out ;) */ double_rq_clock_clear_update(rq1, rq2); @@ -2716,7 +2716,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) __releases(rq1->lock) __releases(rq2->lock) { - BUG_ON(rq1 != rq2); + WARN_ON(rq1 != rq2); raw_spin_rq_unlock(rq1); __release(rq2->lock); }