On Thu, May 27, 2021 at 04:31:51PM +0200, Peter Zijlstra wrote: > How's something *completely* untested like this? I now have the below; which builds defconfig -cgroup-freezer. I've yet to boot and test. --- include/linux/freezer.h | 53 +++++++++++--------------------------- include/linux/sched.h | 6 ++--- init/do_mounts_initrd.c | 7 ++---- kernel/freezer.c | 67 ++++++++++++++++++++++++++++++------------------- kernel/hung_task.c | 4 +-- kernel/power/main.c | 5 ++-- kernel/power/process.c | 10 +++----- kernel/sched/core.c | 2 +- 8 files changed, 70 insertions(+), 84 deletions(-) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 0621c5f86c39..cdc65bc1e081 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -8,9 +8,11 @@ #include <linux/sched.h> #include <linux/wait.h> #include <linux/atomic.h> +#include <linux/jump_label.h> #ifdef CONFIG_FREEZER -extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */ +DECLARE_STATIC_KEY_FALSE(freezer_active); + extern bool pm_freezing; /* PM freezing in effect */ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ @@ -24,7 +26,7 @@ extern unsigned int freeze_timeout_msecs; */ static inline bool frozen(struct task_struct *p) { - return p->flags & PF_FROZEN; + return p->state == TASK_FROZEN; } extern bool freezing_slow_path(struct task_struct *p); @@ -34,9 +36,10 @@ extern bool freezing_slow_path(struct task_struct *p); */ static inline bool freezing(struct task_struct *p) { - if (likely(!atomic_read(&system_freezing_cnt))) - return false; - return freezing_slow_path(p); + if (static_branch_unlikely(&freezer_active)) + return freezing_slow_path(p); + + return false; } /* Takes and releases task alloc lock using task_lock() */ @@ -92,6 +95,8 @@ static inline bool cgroup_freezing(struct task_struct *task) * waking up the parent. */ +extern void __freezer_do_not_count(void); +extern void __freezer_count(void); /** * freezer_do_not_count - tell freezer to ignore %current @@ -106,7 +111,8 @@ static inline bool cgroup_freezing(struct task_struct *task) */ static inline void freezer_do_not_count(void) { - current->flags |= PF_FREEZER_SKIP; + if (static_branch_unlikely(&freezer_active)) + __freezer_do_not_count(); } /** @@ -118,47 +124,17 @@ static inline void freezer_do_not_count(void) */ static inline void freezer_count(void) { - current->flags &= ~PF_FREEZER_SKIP; - /* - * If freezing is in progress, the following paired with smp_mb() - * in freezer_should_skip() ensures that either we see %true - * freezing() or freezer_should_skip() sees !PF_FREEZER_SKIP. - */ - smp_mb(); - try_to_freeze(); + if (static_branch_unlikely(&freezer_active)) + __freezer_count(); } /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ static inline void freezer_count_unsafe(void) { - current->flags &= ~PF_FREEZER_SKIP; smp_mb(); try_to_freeze_unsafe(); } -/** - * freezer_should_skip - whether to skip a task when determining frozen - * state is reached - * @p: task in quesion - * - * This function is used by freezers after establishing %true freezing() to - * test whether a task should be skipped when determining the target frozen - * state is reached. IOW, if this function returns %true, @p is considered - * frozen enough. - */ -static inline bool freezer_should_skip(struct task_struct *p) -{ - /* - * The following smp_mb() paired with the one in freezer_count() - * ensures that either freezer_count() sees %true freezing() or we - * see cleared %PF_FREEZER_SKIP and return %false. This makes it - * impossible for a task to slip frozen state testing after - * clearing %PF_FREEZER_SKIP. - */ - smp_mb(); - return p->flags & PF_FREEZER_SKIP; -} - /* * These functions are intended to be used whenever you want allow a sleeping * task to be frozen. Note that neither return any clear indication of @@ -283,7 +259,6 @@ static inline bool try_to_freeze(void) { return false; } static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} -static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} #define freezable_schedule() schedule() diff --git a/include/linux/sched.h b/include/linux/sched.h index d2c881384517..744dc617dbea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -95,7 +95,9 @@ struct task_group; #define TASK_WAKING 0x0200 #define TASK_NOLOAD 0x0400 #define TASK_NEW 0x0800 -#define TASK_STATE_MAX 0x1000 +#define TASK_MAY_FREEZE 0x1000 +#define TASK_FROZEN 0x2000 +#define TASK_STATE_MAX 0x4000 /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) @@ -1572,7 +1574,6 @@ extern struct pid *cad_pid; #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ -#define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ @@ -1584,7 +1585,6 @@ extern struct pid *cad_pid; #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ /* diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 533d81ed74d4..2f1227053694 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -80,10 +80,9 @@ static void __init handle_initrd(void) init_chdir("/old"); /* - * In case that a resume from disk is carried out by linuxrc or one of - * its children, we need to tell the freezer not to wait for us. + * PF_FREEZER_SKIP is pointless because UMH is laundered through a + * workqueue and that doesn't have the current context. */ - current->flags |= PF_FREEZER_SKIP; info = call_usermodehelper_setup("/linuxrc", argv, envp_init, GFP_KERNEL, init_linuxrc, NULL, NULL); @@ -91,8 +90,6 @@ static void __init handle_initrd(void) return; call_usermodehelper_exec(info, UMH_WAIT_PROC); - current->flags &= ~PF_FREEZER_SKIP; - /* move initrd to rootfs' /old */ init_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ diff --git a/kernel/freezer.c b/kernel/freezer.c index dc520f01f99d..e316008042df 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -13,8 +13,8 @@ #include <linux/kthread.h> /* total number of freezing conditions in effect */ -atomic_t system_freezing_cnt = ATOMIC_INIT(0); -EXPORT_SYMBOL(system_freezing_cnt); +DEFINE_STATIC_KEY_FALSE(freezer_active); +EXPORT_SYMBOL(freezer_active); /* indicate whether PM freezing is in effect, protected by * system_transition_mutex @@ -29,7 +29,7 @@ static DEFINE_SPINLOCK(freezer_lock); * freezing_slow_path - slow path for testing whether a task needs to be frozen * @p: task to be tested * - * This function is called by freezing() if system_freezing_cnt isn't zero + * This function is called by freezing() if freezer_active isn't zero * and tests whether @p needs to enter and stay in frozen state. Can be * called under any context. The freezers are responsible for ensuring the * target tasks see the updated state. @@ -63,18 +63,13 @@ bool __refrigerator(bool check_kthr_stop) pr_debug("%s entered refrigerator\n", current->comm); for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - spin_lock_irq(&freezer_lock); - current->flags |= PF_FROZEN; - if (!freezing(current) || - (check_kthr_stop && kthread_should_stop())) - current->flags &= ~PF_FROZEN; + if (freezing(current) && (check_kthr_stop && kthread_should_stop())) { + set_current_state(TASK_FROZEN); + was_frozen = true; + } spin_unlock_irq(&freezer_lock); - if (!(current->flags & PF_FROZEN)) - break; - was_frozen = true; schedule(); } @@ -101,6 +96,38 @@ static void fake_signal_wake_up(struct task_struct *p) } } +void __freezer_do_not_count(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(¤t->pi_lock, flags); + if (current->state) + current->state |= TASK_MAY_FREEZE; + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); +} +EXPORT_SYMBOL(__freezer_do_not_count); + +void __freezer_count(void) +{ + try_to_freeze(); +} +EXPORT_SYMBOL(__freezer_count); + +static bool __freeze_task(struct task_struct *p) +{ + unsigned long flags; + bool frozen = false; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + if (p->state & TASK_MAY_FREEZE) { + p->state = TASK_FROZEN; + frozen = true; + } + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + + return frozen; +} + /** * freeze_task - send a freeze request to given task * @p: task to send the request to @@ -116,20 +143,8 @@ bool freeze_task(struct task_struct *p) { unsigned long flags; - /* - * This check can race with freezer_do_not_count, but worst case that - * will result in an extra wakeup being sent to the task. It does not - * race with freezer_count(), the barriers in freezer_count() and - * freezer_should_skip() ensure that either freezer_count() sees - * freezing == true in try_to_freeze() and freezes, or - * freezer_should_skip() sees !PF_FREEZE_SKIP and freezes the task - * normally. - */ - if (freezer_should_skip(p)) - return false; - spin_lock_irqsave(&freezer_lock, flags); - if (!freezing(p) || frozen(p)) { + if (!freezing(p) || frozen(p) || __freeze_task(p)) { spin_unlock_irqrestore(&freezer_lock, flags); return false; } @@ -149,7 +164,7 @@ void __thaw_task(struct task_struct *p) spin_lock_irqsave(&freezer_lock, flags); if (frozen(p)) - wake_up_process(p); + wake_up_state(p, TASK_FROZEN); spin_unlock_irqrestore(&freezer_lock, flags); } diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 396ebaebea3f..154cac8b805a 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -92,8 +92,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) * Ensure the task is not frozen. * Also, skip vfork and any other user process that freezer should skip. */ - if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP))) - return; + if (unlikely(t->state & (TASK_MAY_FREEZE | TASK_FROZEN))) + return; /* * When a freshly created task is scheduled once, changes its state to diff --git a/kernel/power/main.c b/kernel/power/main.c index 12c7e1bb442f..0817e3913294 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -23,7 +23,8 @@ void lock_system_sleep(void) { - current->flags |= PF_FREEZER_SKIP; + WARN_ON_ONCE(current->flags & PF_NOFREEZE); + current->flags |= PF_NOFREEZE; mutex_lock(&system_transition_mutex); } EXPORT_SYMBOL_GPL(lock_system_sleep); @@ -46,7 +47,7 @@ void unlock_system_sleep(void) * Which means, if we use try_to_freeze() here, it would make them * enter the refrigerator, thus causing hibernation to lockup. */ - current->flags &= ~PF_FREEZER_SKIP; + current->flags &= ~PF_NOFREEZE; mutex_unlock(&system_transition_mutex); } EXPORT_SYMBOL_GPL(unlock_system_sleep); diff --git a/kernel/power/process.c b/kernel/power/process.c index 50cc63534486..36dee2dcfeab 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -53,8 +53,7 @@ static int try_to_freeze_tasks(bool user_only) if (p == current || !freeze_task(p)) continue; - if (!freezer_should_skip(p)) - todo++; + todo++; } read_unlock(&tasklist_lock); @@ -99,8 +98,7 @@ static int try_to_freeze_tasks(bool user_only) if (!wakeup || pm_debug_messages_on) { read_lock(&tasklist_lock); for_each_process_thread(g, p) { - if (p != current && !freezer_should_skip(p) - && freezing(p) && !frozen(p)) + if (p != current && freezing(p) && !frozen(p)) sched_show_task(p); } read_unlock(&tasklist_lock); @@ -132,7 +130,7 @@ int freeze_processes(void) current->flags |= PF_SUSPEND_TASK; if (!pm_freezing) - atomic_inc(&system_freezing_cnt); + static_branch_inc(&freezer_active); pm_wakeup_clear(true); pr_info("Freezing user space processes ... "); @@ -193,7 +191,7 @@ void thaw_processes(void) trace_suspend_resume(TPS("thaw_processes"), 0, true); if (pm_freezing) - atomic_dec(&system_freezing_cnt); + static_branch_dec(&freezer_active); pm_freezing = false; pm_nosig_freezing = false; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5226cc26a095..6bc5df6f8d73 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5082,7 +5082,7 @@ static void __sched notrace __schedule(bool preempt) prev->sched_contributes_to_load = (prev_state & TASK_UNINTERRUPTIBLE) && !(prev_state & TASK_NOLOAD) && - !(prev->flags & PF_FROZEN); + !(prev_state & TASK_FROZEN); if (prev->sched_contributes_to_load) rq->nr_uninterruptible++;