- per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting.patch removed from -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled

     Fix exit race in per-task-delay-accounting

has been removed from the -mm tree.  Its filename is

     per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting.patch

This patch was dropped because it is obsolete

------------------------------------------------------
Subject: Fix exit race in per-task-delay-accounting
From: Balbir Singh <balbir@xxxxxxxxxx>


Fix a race in per-task-delay-accounting.  This race was reported by Jay
Lan.  I tested the patch using cerebrus test control system for eight hours
with getdelays running on the side (for both push and pull of delay
statistics).

It fixed the problem that Jay Lan saw.

Here's an explanation of the race condition

Consider tasks of the same thread group exiting, lets call them T1 and T2

T1                          T2

CPU0                        CPU1
=====                       =====

do_exit()
...                        do_exit()
taskstats_exit_send()                ...
                           taskstats_exit_send()
                           	fill_tgid()
                                delayacct_add_tsk()
delayacct_tsk_exit()            ....
                                __delayacct_add_tsk()

While T1 is yet to be removed from the thread group.  T1->delays is set to
NULL between delayacct_add_tsk() and __delayacct_add_tsk() call.

When T2 looks for threads in the thread group, it finds T1 and tries to
collect stats for it.  When we get to the spin_lock() in
__delayacct_add_tsk(), we find T1->delays is a NULL pointer.

Signed-off-by: Balbir Singh <balbir@xxxxxxxxxx>
DESC
per-task-delay-accounting: delay accounting usage of taskstats interface
EDESC

Usage of taskstats interface by delay accounting.

Signed-off-by: Shailabh Nagar <nagar@xxxxxxxxxx>
Signed-off-by: Balbir Singh <balbir@xxxxxxxxxx>
Cc: Jes Sorensen <jes@xxxxxxx>
Cc: Peter Chubb <peterc@xxxxxxxxxxxxxxxxxx>
Cc: Erich Focht <efocht@xxxxxxxxxx>
Cc: Levent Serinol <lserinol@xxxxxxxxx>
Cc: Jay Lan <jlan@xxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 include/linux/delayacct.h      |   13 ++++++++
 include/linux/taskstats.h      |   49 +++++++++++++++++++++++++++++++
 include/linux/taskstats_kern.h |    1 
 init/Kconfig                   |    1 
 kernel/delayacct.c             |   47 +++++++++++++++++++++++++++++
 kernel/taskstats.c             |   17 +++++++++-
 6 files changed, 125 insertions(+), 3 deletions(-)

diff -puN include/linux/taskstats_kern.h~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting include/linux/taskstats_kern.h
--- a/include/linux/taskstats_kern.h~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting
+++ a/include/linux/taskstats_kern.h
@@ -17,6 +17,7 @@ enum {
 
 #ifdef CONFIG_TASKSTATS
 extern kmem_cache_t *taskstats_cache;
+extern struct mutex taskstats_exit_mutex;
 
 static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
 					struct taskstats **ptgidstats)
diff -puN kernel/delayacct.c~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting kernel/delayacct.c
--- a/kernel/delayacct.c~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting
+++ a/kernel/delayacct.c
@@ -48,8 +48,11 @@ void __delayacct_tsk_init(struct task_st
 
 void __delayacct_tsk_exit(struct task_struct *tsk)
 {
-	kmem_cache_free(delayacct_cache, tsk->delays);
+	struct task_delay_info *delays = tsk->delays;
+	mutex_lock(&taskstats_exit_mutex);
 	tsk->delays = NULL;
+	mutex_unlock(&taskstats_exit_mutex);
+	kmem_cache_free(delayacct_cache, delays);
 }
 
 /*
@@ -104,3 +107,45 @@ void __delayacct_blkio_end(void)
 			&current->delays->blkio_delay,
 			&current->delays->blkio_count);
 }
+
+int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
+{
+	s64 tmp;
+	struct timespec ts;
+	unsigned long t1,t2,t3;
+
+	tmp = (s64)d->cpu_run_real_total;
+	tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC;
+	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
+
+	/*
+	 * No locking available for sched_info (and too expensive to add one)
+	 * Mitigate by taking snapshot of values
+	 */
+	t1 = tsk->sched_info.pcnt;
+	t2 = tsk->sched_info.run_delay;
+	t3 = tsk->sched_info.cpu_time;
+
+	d->cpu_count += t1;
+
+	jiffies_to_timespec(t2, &ts);
+	tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
+	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
+
+	tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
+	d->cpu_run_virtual_total =
+		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
+
+	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
+
+	spin_lock(&tsk->delays->lock);
+	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
+	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
+	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
+	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+	d->blkio_count += tsk->delays->blkio_count;
+	d->swapin_count += tsk->delays->swapin_count;
+	spin_unlock(&tsk->delays->lock);
+
+	return 0;
+}
diff -puN kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting kernel/taskstats.c
--- a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting
+++ a/kernel/taskstats.c
@@ -18,13 +18,14 @@
 
 #include <linux/kernel.h>
 #include <linux/taskstats_kern.h>
+#include <linux/delayacct.h>
 #include <net/genetlink.h>
 #include <asm/atomic.h>
 
 static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
 static int family_registered = 0;
 kmem_cache_t *taskstats_cache;
-static DEFINE_MUTEX(taskstats_exit_mutex);
+DEFINE_MUTEX(taskstats_exit_mutex);
 
 static struct genl_family family = {
 	.id		= GENL_ID_GENERATE,
@@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta
 	 *		goto err;
 	 */
 
-err:
+	rc = delayacct_add_tsk(stats, tsk);
+	stats->version = TASKSTATS_VERSION;
+
+	/* Define err: label here if needed */
 	put_task_struct(tsk);
 	return rc;
 
@@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct 
 		 *		break;
 		 */
 
+		rc = delayacct_add_tsk(stats, tsk);
+		if (rc)
+			break;
+
 	} while_each_thread(first, tsk);
 	read_unlock(&tasklist_lock);
+	stats->version = TASKSTATS_VERSION;
+
 
 	/*
 	 * Accounting subsytems can also add calls here if they don't
@@ -183,6 +193,7 @@ static int taskstats_send_stats(struct s
 	if (rc < 0)
 		return rc;
 
+	mutex_lock(&taskstats_exit_mutex);
 	if (info->attrs[TASKSTATS_CMD_ATTR_PID]) {
 		u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
 		rc = fill_pid(pid, NULL, &stats);
@@ -208,6 +219,7 @@ static int taskstats_send_stats(struct s
 		goto err;
 	}
 
+	mutex_unlock(&taskstats_exit_mutex);
 	nla_nest_end(rep_skb, na);
 
 	return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST);
@@ -216,6 +228,7 @@ nla_put_failure:
 	return genlmsg_cancel(rep_skb, reply);
 err:
 	nlmsg_free(rep_skb);
+	mutex_unlock(&taskstats_exit_mutex);
 	return rc;
 }
 
diff -puN include/linux/delayacct.h~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting include/linux/delayacct.h
--- a/include/linux/delayacct.h~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting
+++ a/include/linux/delayacct.h
@@ -18,6 +18,7 @@
 #define _LINUX_DELAYACCT_H
 
 #include <linux/sched.h>
+#include <linux/taskstats_kern.h>
 
 /*
  * Per-task flags relevant to delay accounting
@@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct 
 extern void __delayacct_tsk_exit(struct task_struct *);
 extern void __delayacct_blkio_start(void);
 extern void __delayacct_blkio_end(void);
+extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 
 static inline void delayacct_set_flag(int flag)
 {
@@ -74,6 +76,14 @@ static inline void delayacct_blkio_end(v
 		__delayacct_blkio_end();
 }
 
+static inline int delayacct_add_tsk(struct taskstats *d,
+					struct task_struct *tsk)
+{
+	if (!tsk->delays)
+		return -EINVAL;
+	return __delayacct_add_tsk(d, tsk);
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -89,6 +99,9 @@ static inline void delayacct_blkio_start
 {}
 static inline void delayacct_blkio_end(void)
 {}
+static inline int delayacct_add_tsk(struct taskstats *d,
+					struct task_struct *tsk)
+{ return 0; }
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
diff -puN include/linux/taskstats.h~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting include/linux/taskstats.h
--- a/include/linux/taskstats.h~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting
+++ a/include/linux/taskstats.h
@@ -35,6 +35,55 @@ struct taskstats {
 
 	/* Version 1 */
 	__u64	version;
+
+	/* Delay accounting fields start
+	 *
+	 * All values, until comment "Delay accounting fields end" are
+	 * available only if delay accounting is enabled, even though the last
+	 * few fields are not delays
+	 *
+	 * xxx_count is the number of delay values recorded
+	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+	 *
+	 * xxx_delay_total wraps around to zero on overflow
+	 * xxx_count incremented regardless of overflow
+	 */
+
+	/* Delay waiting for cpu, while runnable
+	 * count, delay_total NOT updated atomically
+	 */
+	__u64	cpu_count;
+	__u64	cpu_delay_total;
+
+	/* Following four fields atomically updated using task->delays->lock */
+
+	/* Delay waiting for synchronous block I/O to complete
+	 * does not account for delays in I/O submission
+	 */
+	__u64	blkio_count;
+	__u64	blkio_delay_total;
+
+	/* Delay waiting for page fault I/O (swap in only) */
+	__u64	swapin_count;
+	__u64	swapin_delay_total;
+
+	/* cpu "wall-clock" running time
+	 * On some architectures, value will adjust for cpu time stolen
+	 * from the kernel in involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_real_total;
+
+	/* cpu "virtual" running time
+	 * Uses time intervals seen by the kernel i.e. no adjustment
+	 * for kernel's involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_virtual_total;
+	/* Delay accounting fields end */
+	/* version 1 ends here */
 };
 
 
diff -puN init/Kconfig~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting init/Kconfig
--- a/init/Kconfig~per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting
+++ a/init/Kconfig
@@ -172,6 +172,7 @@ config TASKSTATS
 
 config TASK_DELAY_ACCT
 	bool "Enable per-task delay accounting (EXPERIMENTAL)"
+	depends on TASKSTATS
 	help
 	  Collect information on time spent by a task waiting for system
 	  resources like cpu, synchronous block I/O completion and swapping
_

Patches currently in -mm which might be from balbir@xxxxxxxxxx are

origin.patch
per-task-delay-accounting-setup.patch
per-task-delay-accounting-setup-fix-1.patch
per-task-delay-accounting-setup-fix-2.patch
per-task-delay-accounting-sync-block-i-o-and-swapin-delay-collection.patch
per-task-delay-accounting-sync-block-i-o-and-swapin-delay-collection-fix-1.patch
per-task-delay-accounting-cpu-delay-collection-via-schedstats.patch
per-task-delay-accounting-cpu-delay-collection-via-schedstats-fix-1.patch
per-task-delay-accounting-utilities-for-genetlink-usage.patch
per-task-delay-accounting-taskstats-interface.patch
per-task-delay-accounting-taskstats-interface-fix-1.patch
per-task-delay-accounting-taskstats-interface-fix-2.patch
per-task-delay-accounting-taskstats-interface-fix-exit-race-in-per-task-delay-accounting.patch
per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface.patch
per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface-use-portable-cputime-api-in-__delayacct_add_tsk.patch
per-task-delay-accounting-delay-accounting-usage-of-taskstats-interface-fix-return-value-of-delayacct_add_tsk.patch
revised-locking-for-taskstats-interface.patch
per-task-delay-accounting-documentation.patch
per-task-delay-accounting-proc-export-of-aggregated-block-i-o-delays.patch
per-task-delay-accounting-proc-export-of-aggregated-block-i-o-delays-warning-fix.patch
task-watchers-register-per-task-delay-accounting.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux