TLB shootdowns are tracked globally, but on a busy system it can be difficult to disambiguate the source of TLB shootdowns. Add two counter fields: - nrtlbflush: number of tlb flush events received - ngtlbflush: number of tlb flush events generated Expose those fields in /proc/[pid]/stat so that they can be analyzed alongside similar metrics (e.g. min_flt and maj_flt). Signed-off-by: Joe Damato <jdamato@xxxxxxxxxx> --- arch/x86/mm/tlb.c | 2 ++ fs/proc/array.c | 9 +++++++++ include/linux/sched.h | 6 ++++++ include/linux/sched/signal.h | 1 + kernel/exit.c | 6 ++++++ kernel/fork.c | 1 + 6 files changed, 25 insertions(+) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c1e31e9..58f7c59 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -745,6 +745,7 @@ static void flush_tlb_func(void *info) if (!local) { inc_irq_stat(irq_tlb_count); count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + current->nrtlbflush++; /* Can only happen on remote CPUs */ if (f->mm && f->mm != loaded_mm) @@ -895,6 +896,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, * would not happen. */ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + current->ngtlbflush++; if (info->end == TLB_FLUSH_ALL) trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); else diff --git a/fs/proc/array.c b/fs/proc/array.c index 49283b81..435afdc 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -469,6 +469,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, unsigned long long start_time; unsigned long cmin_flt = 0, cmaj_flt = 0; unsigned long min_flt = 0, maj_flt = 0; + unsigned long ngtlbflush = 0, nrtlbflush = 0; u64 cutime, cstime, utime, stime; u64 cgtime, gtime; unsigned long rsslim = 0; @@ -530,11 +531,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, do { min_flt += t->min_flt; maj_flt += t->maj_flt; + ngtlbflush += t->ngtlbflush; + nrtlbflush += t->nrtlbflush; gtime += task_gtime(t); } while_each_thread(task, t); min_flt += sig->min_flt; maj_flt += sig->maj_flt; + ngtlbflush += sig->ngtlbflush; + nrtlbflush += sig->nrtlbflush; thread_group_cputime_adjusted(task, &utime, &stime); gtime += sig->gtime; @@ -554,6 +559,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; + nrtlbflush = task->nrtlbflush; + ngtlbflush = task->ngtlbflush; task_cputime_adjusted(task, &utime, &stime); gtime = task_gtime(task); } @@ -643,6 +650,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, else seq_puts(m, " 0"); + seq_put_decimal_ull(m, " ", ngtlbflush); + seq_put_decimal_ull(m, " ", nrtlbflush); seq_putc(m, '\n'); if (mm) mmput(mm); diff --git a/include/linux/sched.h b/include/linux/sched.h index 5cdf746..2a0d879 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1047,6 +1047,12 @@ struct task_struct { unsigned long min_flt; unsigned long maj_flt; + /* Number of TLB flushes generated by this task */ + unsigned long ngtlbflush; + + /* Number of TLB flushes received by this task */ + unsigned long nrtlbflush; + /* Empty if CONFIG_POSIX_CPUTIMERS=n */ struct posix_cputimers posix_cputimers; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 2009926..4e0b09c 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -189,6 +189,7 @@ struct signal_struct { struct prev_cputime prev_cputime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; + unsigned long ngtlbflush, nrtlbflush; unsigned long inblock, oublock, cinblock, coublock; unsigned long maxrss, cmaxrss; struct task_io_accounting ioac; diff --git a/kernel/exit.c b/kernel/exit.c index 35e0a31..5a72755 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -141,6 +141,8 @@ static void __exit_signal(struct task_struct *tsk) sig->gtime += task_gtime(tsk); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; + sig->ngtlbflush += tsk->ngtlbflush; + sig->nrtlbflush += tsk->nrtlbflush; sig->nvcsw += tsk->nvcsw; sig->nivcsw += tsk->nivcsw; sig->inblock += task_io_get_inblock(tsk); @@ -1095,6 +1097,10 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) p->min_flt + sig->min_flt + sig->cmin_flt; psig->cmaj_flt += p->maj_flt + sig->maj_flt + sig->cmaj_flt; + psig->ngtlbflush += + p->ngtlbflush + sig->ngtlbflush; + psig->nrtlbflush += + p->nrtlbflush + sig->nrtlbflush; psig->cnvcsw += p->nvcsw + sig->nvcsw + sig->cnvcsw; psig->cnivcsw += diff --git a/kernel/fork.c b/kernel/fork.c index b339918..5fa9f64 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1555,6 +1555,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) struct mm_struct *mm, *oldmm; tsk->min_flt = tsk->maj_flt = 0; + tsk->ngtlbflush = tsk->nrtlbflush = 0; tsk->nvcsw = tsk->nivcsw = 0; #ifdef CONFIG_DETECT_HUNG_TASK tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; -- 2.7.4