It was found that a dying mm_struct where the owning task has exited can stay on as active_mm of kernel threads as long as no other user tasks run on those CPUs that use it as active_mm. This prolongs the life time of dying mm holding up memory and other resources that cannot be freed. Fix that by forcing the kernel threads to use init_mm as the active_mm if the previous active_mm is dying. Signed-off-by: Waiman Long <longman@xxxxxxxxxx> --- kernel/sched/core.c | 13 +++++++++++-- mm/init-mm.c | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2b037f195473..ca348e1f5a1e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3233,13 +3233,22 @@ context_switch(struct rq *rq, struct task_struct *prev, * Both of these contain the full memory barrier required by * membarrier after storing to rq->curr, before returning to * user-space. + * + * If mm is NULL and oldmm is dying (!owner), we switch to + * init_mm instead to make sure that oldmm can be freed ASAP. */ - if (!mm) { + if (!mm && oldmm->owner) { next->active_mm = oldmm; mmgrab(oldmm); enter_lazy_tlb(oldmm, next); - } else + } else { + if (!mm) { + mm = &init_mm; + next->active_mm = mm; + mmgrab(mm); + } switch_mm_irqs_off(oldmm, mm, next); + } if (!prev->mm) { prev->active_mm = NULL; diff --git a/mm/init-mm.c b/mm/init-mm.c index a787a319211e..5bfc6bc333ca 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -5,6 +5,7 @@ #include <linux/spinlock.h> #include <linux/list.h> #include <linux/cpumask.h> +#include <linux/sched/task.h> #include <linux/atomic.h> #include <linux/user_namespace.h> @@ -36,5 +37,6 @@ struct mm_struct init_mm = { .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns = &init_user_ns, .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0}, + .owner = &init_task, INIT_MM_CONTEXT(init_mm) }; -- 2.18.1