The following commit has been merged into the sched/core branch of tip: Commit-ID: df89544263cd98ffcef1318b3bf18509b9420c8a Gitweb: https://git.kernel.org/tip/df89544263cd98ffcef1318b3bf18509b9420c8a Author: Thomas Gleixner <tglx@xxxxxxxxxxxxx> AuthorDate: Tue, 28 Sep 2021 14:24:32 +02:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Fri, 01 Oct 2021 13:58:06 +02:00 sched: Move mmdrop to RCU on RT mmdrop() is invoked from finish_task_switch() by the incoming task to drop the mm which was handed over by the previous task. mmdrop() can be quite expensive which prevents an incoming real-time task from getting useful work done. Provide mmdrop_sched() which maps to mmdrop() on !RT kernels. On RT kernels it delagates the eventually required invocation of __mmdrop() to RCU. Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Link: https://lkml.kernel.org/r/20210928122411.648582026@xxxxxxxxxxxxx --- include/linux/mm_types.h | 4 ++++ include/linux/sched/mm.h | 29 +++++++++++++++++++++++++++++ kernel/sched/core.c | 2 +- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8f0fb62..09a2885 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -12,6 +12,7 @@ #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/uprobes.h> +#include <linux/rcupdate.h> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <linux/seqlock.h> @@ -567,6 +568,9 @@ struct mm_struct { bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; +#ifdef CONFIG_PREEMPT_RT + struct rcu_head delayed_drop; +#endif #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index e24b1fe..0d81060 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -49,6 +49,35 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } +#ifdef CONFIG_PREEMPT_RT +/* + * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is + * by far the least expensive way to do that. + */ +static inline void __mmdrop_delayed(struct rcu_head *rhp) +{ + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); + + __mmdrop(mm); +} + +/* + * Invoked from finish_task_switch(). Delegates the heavy lifting on RT + * kernels via RCU. + */ +static inline void mmdrop_sched(struct mm_struct *mm) +{ + /* Provides a full memory barrier. See mmdrop() */ + if (atomic_dec_and_test(&mm->mm_count)) + call_rcu(&mm->delayed_drop, __mmdrop_delayed); +} +#else +static inline void mmdrop_sched(struct mm_struct *mm) +{ + mmdrop(mm); +} +#endif + /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b36b5d7..bb70a07 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4773,7 +4773,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) */ if (mm) { membarrier_mm_sync_core_before_usermode(mm); - mmdrop(mm); + mmdrop_sched(mm); } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead)