The patch titled per-task-delay-accounting: delay accounting usage of taskstats interface has been added to the -mm tree. Its filename is per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: per-task-delay-accounting: delay accounting usage of taskstats interface From: Balbir Singh <balbir@xxxxxxxxxx> Usage of taskstats interface by delay accounting. Signed-off-by: Shailabh Nagar <nagar@xxxxxxxxxx> Signed-off-by: Balbir Singh <balbir@xxxxxxxxxx> Cc: Jes Sorensen <jes@xxxxxxx> Cc: Peter Chubb <peterc@xxxxxxxxxxxxxxxxxx> Cc: Erich Focht <efocht@xxxxxxxxxx> Cc: Levent Serinol <lserinol@xxxxxxxxx> Cc: Jay Lan <jlan@xxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- include/linux/delayacct.h | 13 ++++++++ include/linux/taskstats.h | 49 +++++++++++++++++++++++++++++++ include/linux/taskstats_kern.h | 1 init/Kconfig | 1 kernel/delayacct.c | 47 +++++++++++++++++++++++++++++ kernel/taskstats.c | 17 +++++++++- 6 files changed, 125 insertions(+), 3 deletions(-) diff -puN include/linux/taskstats_kern.h~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface include/linux/taskstats_kern.h --- a/include/linux/taskstats_kern.h~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface +++ a/include/linux/taskstats_kern.h @@ -17,6 +17,7 @@ enum { #ifdef CONFIG_TASKSTATS extern kmem_cache_t *taskstats_cache; +extern struct mutex taskstats_exit_mutex; static inline void taskstats_exit_alloc(struct taskstats **ptidstats, struct taskstats **ptgidstats) diff -puN kernel/delayacct.c~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface kernel/delayacct.c --- a/kernel/delayacct.c~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface +++ a/kernel/delayacct.c @@ -48,8 +48,11 @@ void __delayacct_tsk_init(struct task_st void __delayacct_tsk_exit(struct task_struct *tsk) { - kmem_cache_free(delayacct_cache, tsk->delays); + struct task_delay_info *delays = tsk->delays; + mutex_lock(&taskstats_exit_mutex); tsk->delays = NULL; + mutex_unlock(&taskstats_exit_mutex); + kmem_cache_free(delayacct_cache, delays); } /* @@ -104,3 +107,45 @@ void __delayacct_blkio_end(void) ¤t->delays->blkio_delay, ¤t->delays->blkio_count); } + +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) +{ + s64 tmp; + struct timespec ts; + unsigned long t1,t2,t3; + + tmp = (s64)d->cpu_run_real_total; + tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC; + d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; + + /* + * No locking available for sched_info (and too expensive to add one) + * Mitigate by taking snapshot of values + */ + t1 = tsk->sched_info.pcnt; + t2 = tsk->sched_info.run_delay; + t3 = tsk->sched_info.cpu_time; + + d->cpu_count += t1; + + jiffies_to_timespec(t2, &ts); + tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); + d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; + + tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; + d->cpu_run_virtual_total = + (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; + + /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ + + spin_lock(&tsk->delays->lock); + tmp = d->blkio_delay_total + tsk->delays->blkio_delay; + d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; + tmp = d->swapin_delay_total + tsk->delays->swapin_delay; + d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; + d->blkio_count += tsk->delays->blkio_count; + d->swapin_count += tsk->delays->swapin_count; + spin_unlock(&tsk->delays->lock); + + return 0; +} diff -puN kernel/taskstats.c~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface kernel/taskstats.c --- a/kernel/taskstats.c~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface +++ a/kernel/taskstats.c @@ -18,13 +18,14 @@ #include <linux/kernel.h> #include <linux/taskstats_kern.h> +#include <linux/delayacct.h> #include <net/genetlink.h> #include <asm/atomic.h> static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; static int family_registered = 0; kmem_cache_t *taskstats_cache; -static DEFINE_MUTEX(taskstats_exit_mutex); +DEFINE_MUTEX(taskstats_exit_mutex); static struct genl_family family = { .id = GENL_ID_GENERATE, @@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta * goto err; */ -err: + rc = delayacct_add_tsk(stats, tsk); + stats->version = TASKSTATS_VERSION; + + /* Define err: label here if needed */ put_task_struct(tsk); return rc; @@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct * break; */ + rc = delayacct_add_tsk(stats, tsk); + if (rc) + break; + } while_each_thread(first, tsk); read_unlock(&tasklist_lock); + stats->version = TASKSTATS_VERSION; + /* * Accounting subsytems can also add calls here if they don't @@ -183,6 +193,7 @@ static int taskstats_send_stats(struct s if (rc < 0) return rc; + mutex_lock(&taskstats_exit_mutex); if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); rc = fill_pid(pid, NULL, &stats); @@ -208,6 +219,7 @@ static int taskstats_send_stats(struct s goto err; } + mutex_unlock(&taskstats_exit_mutex); nla_nest_end(rep_skb, na); return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST); @@ -216,6 +228,7 @@ nla_put_failure: return genlmsg_cancel(rep_skb, reply); err: nlmsg_free(rep_skb); + mutex_unlock(&taskstats_exit_mutex); return rc; } diff -puN include/linux/delayacct.h~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface include/linux/delayacct.h --- a/include/linux/delayacct.h~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface +++ a/include/linux/delayacct.h @@ -18,6 +18,7 @@ #define _LINUX_DELAYACCT_H #include <linux/sched.h> +#include <linux/taskstats_kern.h> /* * Per-task flags relevant to delay accounting @@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(void); +extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); static inline void delayacct_set_flag(int flag) { @@ -74,6 +76,14 @@ static inline void delayacct_blkio_end(v __delayacct_blkio_end(); } +static inline int delayacct_add_tsk(struct taskstats *d, + struct task_struct *tsk) +{ + if (!tsk->delays) + return -EINVAL; + return __delayacct_add_tsk(d, tsk); +} + #else static inline void delayacct_set_flag(int flag) {} @@ -89,6 +99,9 @@ static inline void delayacct_blkio_start {} static inline void delayacct_blkio_end(void) {} +static inline int delayacct_add_tsk(struct taskstats *d, + struct task_struct *tsk) +{ return 0; } #endif /* CONFIG_TASK_DELAY_ACCT */ #endif diff -puN include/linux/taskstats.h~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface include/linux/taskstats.h --- a/include/linux/taskstats.h~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface +++ a/include/linux/taskstats.h @@ -35,6 +35,55 @@ struct taskstats { /* Version 1 */ __u64 version; + + /* Delay accounting fields start + * + * All values, until comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count; + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ }; diff -puN init/Kconfig~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface init/Kconfig --- a/init/Kconfig~per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface +++ a/init/Kconfig @@ -172,6 +172,7 @@ config TASKSTATS config TASK_DELAY_ACCT bool "Enable per-task delay accounting (EXPERIMENTAL)" + depends on TASKSTATS help Collect information on time spent by a task waiting for system resources like cpu, synchronous block I/O completion and swapping _ Patches currently in -mm which might be from balbir@xxxxxxxxxx are origin.patch per-task-delay-accounting-setup.patch per-task-delay-accounting-setup-fix-1.patch per-task-delay-accounting-setup-fix-2.patch per-task-delay-accounting-sync-block-i-o-and-swapin-delay-collection.patch per-task-delay-accounting-sync-block-i-o-and-swapin-delay-collection-fix-1.patch per-task-delay-accounting-cpu-delay-collection-via-schedstats.patch per-task-delay-accounting-cpu-delay-collection-via-schedstats-fix-1.patch per-task-delay-accounting-utilities-for-genetlink-usage.patch per-task-delay-accounting-taskstats-interface.patch per-task-delay-accounting-taskstats-interface-fix-1.patch per-task-delay-accounting-taskstats-interface-fix-2.patch per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface.patch per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface-use-portable-cputime-api-in-__delayacct_add_tsk.patch per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface-fix-return-value-of-delayacct_add_tsk.patch revised-locking-for-taskstats-interface.patch per-task-delay-accounting-documentation.patch per-task-delay-accounting-proc-export-of-aggregated-block-i-o-delays.patch per-task-delay-accounting-proc-export-of-aggregated-block-i-o-delays-warning-fix.patch task-watchers-register-per-task-delay-accounting.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html