The patch titled per-task-delay-accounting: setup has been added to the -mm tree. Its filename is per-task-delay-accounting-setup.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this From: Balbir Singh <balbir@xxxxxxxxxx> Initialization code related to collection of per-task "delay" statistics which measure how long it had to wait for cpu, sync block io, swapping etc. The collection of statistics and the interface are in other patches. This patch sets up the data structures and allows the statistics collection to be disabled through a kernel boot parameter. Signed-off-by: Shailabh Nagar <nagar@xxxxxxxxxxxxxx> Signed-off-by: Balbir Singh <balbir@xxxxxxxxxx> Cc: Jes Sorensen <jes@xxxxxxx> Cc: Peter Chubb <peterc@xxxxxxxxxxxxxxxxxx> Cc: Erich Focht <efocht@xxxxxxxxxx> Cc: Levent Serinol <lserinol@xxxxxxxxx> Cc: Jay Lan <jlan@xxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- Documentation/kernel-parameters.txt | 2 include/linux/delayacct.h | 69 ++++++++++++++++++++ include/linux/sched.h | 20 +++++ include/linux/time.h | 10 ++ init/Kconfig | 10 ++ init/main.c | 2 kernel/Makefile | 1 kernel/delayacct.c | 87 ++++++++++++++++++++++++++ kernel/exit.c | 2 kernel/fork.c | 2 10 files changed, 205 insertions(+) diff -puN Documentation/kernel-parameters.txt~per-task-delay-accounting-setup Documentation/kernel-parameters.txt --- 25/Documentation/kernel-parameters.txt~per-task-delay-accounting-setup Mon May 8 14:33:31 2006 +++ 25-akpm/Documentation/kernel-parameters.txt Mon May 8 14:33:31 2006 @@ -432,6 +432,8 @@ running once the system is up. Format: <area>[,<node>] See also Documentation/networking/decnet.txt. + delayacct [KNL] Enable per-task delay accounting + dhash_entries= [KNL] Set number of hash buckets for dentry cache. diff -puN /dev/null include/linux/delayacct.h --- /dev/null Thu Apr 11 07:25:15 2002 +++ 25-akpm/include/linux/delayacct.h Mon May 8 14:33:31 2006 @@ -0,0 +1,69 @@ +/* delayacct.h - per-task delay accounting + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + */ + +#ifndef _LINUX_DELAYACCT_H +#define _LINUX_DELAYACCT_H + +#include <linux/sched.h> + +#ifdef CONFIG_TASK_DELAY_ACCT + +extern int delayacct_on; /* Delay accounting turned on/off */ +extern kmem_cache_t *delayacct_cache; +extern void delayacct_init(void); +extern void __delayacct_tsk_init(struct task_struct *); +extern void __delayacct_tsk_exit(struct task_struct *); + +static inline void delayacct_set_flag(int flag) +{ + if (current->delays) + current->delays->flags |= flag; +} + +static inline void delayacct_clear_flag(int flag) +{ + if (current->delays) + current->delays->flags &= ~flag; +} + +static inline void delayacct_tsk_init(struct task_struct *tsk) +{ + /* reinitialize in case parent's non-null pointer was dup'ed*/ + tsk->delays = NULL; + if (unlikely(delayacct_on)) + __delayacct_tsk_init(tsk); +} + +static inline void delayacct_tsk_exit(struct task_struct *tsk) +{ + if (tsk->delays) + __delayacct_tsk_exit(tsk); +} + +#else +static inline void delayacct_set_flag(int flag) +{} +static inline void delayacct_clear_flag(int flag) +{} +static inline void delayacct_init(void) +{} +static inline void delayacct_tsk_init(struct task_struct *tsk) +{} +static inline void delayacct_tsk_exit(struct task_struct *tsk) +{} +#endif /* CONFIG_TASK_DELAY_ACCT */ + +#endif diff -puN include/linux/sched.h~per-task-delay-accounting-setup include/linux/sched.h --- 25/include/linux/sched.h~per-task-delay-accounting-setup Mon May 8 14:33:31 2006 +++ 25-akpm/include/linux/sched.h Mon May 8 14:33:31 2006 @@ -536,6 +536,23 @@ struct sched_info { extern struct file_operations proc_schedstat_operations; #endif +#ifdef CONFIG_TASK_DELAY_ACCT +struct task_delay_info { + spinlock_t lock; + unsigned int flags; /* Private per-task flags */ + + /* For each stat XXX, add following, aligned appropriately + * + * struct timespec XXX_start, XXX_end; + * u64 XXX_delay; + * u32 XXX_count; + * + * Atomicity of updates to XXX_delay, XXX_count protected by + * single lock above (split into XXX_lock if contention is an issue). + */ +}; +#endif + enum idle_type { SCHED_IDLE, @@ -889,6 +906,9 @@ struct task_struct { * cache last used pipe for splice */ struct pipe_inode_info *splice_pipe; +#ifdef CONFIG_TASK_DELAY_ACCT + struct task_delay_info *delays; +#endif }; static inline pid_t process_group(struct task_struct *tsk) diff -puN include/linux/time.h~per-task-delay-accounting-setup include/linux/time.h --- 25/include/linux/time.h~per-task-delay-accounting-setup Mon May 8 14:33:31 2006 +++ 25-akpm/include/linux/time.h Mon May 8 14:33:31 2006 @@ -68,6 +68,16 @@ extern unsigned long mktime(const unsign extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); /* + * sub = end - start, in normalized form + */ +static inline void timespec_sub(struct timespec *start, struct timespec *end, + struct timespec *sub) +{ + set_normalized_timespec(sub, end->tv_sec - start->tv_sec, + end->tv_nsec - start->tv_nsec); +} + +/* * Returns true if the timespec is norm, false if denorm: */ #define timespec_valid(ts) \ diff -puN init/Kconfig~per-task-delay-accounting-setup init/Kconfig --- 25/init/Kconfig~per-task-delay-accounting-setup Mon May 8 14:33:31 2006 +++ 25-akpm/init/Kconfig Mon May 8 14:33:31 2006 @@ -158,6 +158,16 @@ config BSD_PROCESS_ACCT_V3 for processing it. A preliminary version of these tools is available at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>. +config TASK_DELAY_ACCT + bool "Enable per-task delay accounting (EXPERIMENTAL)" + help + Collect information on time spent by a task waiting for system + resources like cpu, synchronous block I/O completion and swapping + in pages. Such statistics can help in setting a task's priorities + relative to other tasks for cpu, io, rss limits etc. + + Say N if unsure. + config SYSCTL bool "Sysctl support" ---help--- diff -puN init/main.c~per-task-delay-accounting-setup init/main.c --- 25/init/main.c~per-task-delay-accounting-setup Mon May 8 14:33:31 2006 +++ 25-akpm/init/main.c Mon May 8 14:33:31 2006 @@ -35,6 +35,7 @@ #include <linux/security.h> #include <linux/workqueue.h> #include <linux/profile.h> +#include <linux/delayacct.h> #include <linux/rcupdate.h> #include <linux/moduleparam.h> #include <linux/kallsyms.h> @@ -540,6 +541,7 @@ asmlinkage void __init start_kernel(void proc_root_init(); #endif cpuset_init(); + delayacct_init(); check_bugs(); diff -puN /dev/null kernel/delayacct.c --- /dev/null Thu Apr 11 07:25:15 2002 +++ 25-akpm/kernel/delayacct.c Mon May 8 14:33:31 2006 @@ -0,0 +1,87 @@ +/* delayacct.c - per-task delay accounting + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/sysctl.h> +#include <linux/delayacct.h> + +int delayacct_on __read_mostly; /* Delay accounting turned on/off */ +kmem_cache_t *delayacct_cache; + +static int __init delayacct_setup_enable(char *str) +{ + delayacct_on = 1; + return 1; +} +__setup("delayacct", delayacct_setup_enable); + +void delayacct_init(void) +{ + delayacct_cache = kmem_cache_create("delayacct_cache", + sizeof(struct task_delay_info), + 0, + SLAB_PANIC, + NULL, NULL); + delayacct_tsk_init(&init_task); +} + +void __delayacct_tsk_init(struct task_struct *tsk) +{ + tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); + if (tsk->delays) + spin_lock_init(&tsk->delays->lock); +} + +void __delayacct_tsk_exit(struct task_struct *tsk) +{ + kmem_cache_free(delayacct_cache, tsk->delays); + tsk->delays = NULL; +} + +/* + * Start accounting for a delay statistic using + * its starting timestamp (@start) + */ + +static inline void delayacct_start(struct timespec *start) +{ + do_posix_clock_monotonic_gettime(start); +} + +/* + * Finish delay accounting for a statistic using + * its timestamps (@start, @end), accumalator (@total) and @count + */ + +static inline void delayacct_end(struct timespec *start, struct timespec *end, + u64 *total, u32 *count) +{ + struct timespec ts = {0, 0}; + s64 ns; + + do_posix_clock_monotonic_gettime(end); + timespec_sub(&ts, start, end); + ns = timespec_to_ns(&ts); + if (ns < 0) + return; + + spin_lock(¤t->delays->lock); + *total += ns; + (*count)++; + spin_unlock(¤t->delays->lock); +} + diff -puN kernel/exit.c~per-task-delay-accounting-setup kernel/exit.c --- 25/kernel/exit.c~per-task-delay-accounting-setup Mon May 8 14:33:31 2006 +++ 25-akpm/kernel/exit.c Mon May 8 14:33:31 2006 @@ -26,6 +26,7 @@ #include <linux/mount.h> #include <linux/proc_fs.h> #include <linux/mempolicy.h> +#include <linux/delayacct.h> #include <linux/cpuset.h> #include <linux/syscalls.h> #include <linux/signal.h> @@ -913,6 +914,7 @@ fastcall NORET_TYPE void do_exit(long co #endif if (unlikely(tsk->audit_context)) audit_free(tsk); + delayacct_tsk_exit(tsk); exit_mm(tsk); exit_sem(tsk); diff -puN kernel/fork.c~per-task-delay-accounting-setup kernel/fork.c --- 25/kernel/fork.c~per-task-delay-accounting-setup Mon May 8 14:33:31 2006 +++ 25-akpm/kernel/fork.c Mon May 8 14:33:31 2006 @@ -44,6 +44,7 @@ #include <linux/rmap.h> #include <linux/acct.h> #include <linux/cn_proc.h> +#include <linux/delayacct.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -985,6 +986,7 @@ static task_t *copy_process(unsigned lon goto bad_fork_cleanup_put_domain; p->did_exec = 0; + delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); p->pid = pid; retval = -EFAULT; diff -puN kernel/Makefile~per-task-delay-accounting-setup kernel/Makefile --- 25/kernel/Makefile~per-task-delay-accounting-setup Mon May 8 14:33:31 2006 +++ 25-akpm/kernel/Makefile Mon May 8 14:33:31 2006 @@ -38,6 +38,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RELAY) += relay.o +obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra <alan@xxxxxxxxxxxxxxxx>, the -fno-omit-frame-pointer is _ Patches currently in -mm which might be from balbir@xxxxxxxxxx are fix-dcache-race-during-umount.patch fix-dcache-race-during-umount-fix.patch prune_one_dentry-tweaks.patch per-task-delay-accounting-setup.patch per-task-delay-accounting-sync-block-i-o-and-swapin-delay-collection.patch per-task-delay-accounting-cpu-delay-collection-via-schedstats.patch per-task-delay-accounting-utilities-for-genetlink-usage.patch per-task-delay-accounting-taskstats-interface.patch per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface.patch per-task-delay-accounting-documentation.patch per-task-delay-accounting-proc-export-of-aggregated-block-i-o-delays.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html