Define a 'struct task_schedstat' which contains members corresponding to scheduler stats that are currently available thru /proc/pid/tasks/pid /schedstats. Update scheduler stats in this structure in kernel at the same time stats in the 'struct task_struct' are updated. Add a TASK_SCHEDSTAT option to task_getshared system call to request these per thread scheduler stats thru the shared structure. Signed-off-by: Prakash Sangappa <prakash.sangappa@xxxxxxxxxx> --- include/linux/task_shared.h | 35 ++++++++++++++++++++++++++++++++++- include/uapi/linux/task_shared.h | 22 ++++++++++++++++++++++ kernel/sched/deadline.c | 1 + kernel/sched/fair.c | 1 + kernel/sched/rt.c | 1 + kernel/sched/sched.h | 1 + kernel/sched/stats.h | 3 +++ kernel/sched/stop_task.c | 1 + mm/task_shared.c | 13 +++++++++++++ 9 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/task_shared.h diff --git a/include/linux/task_shared.h b/include/linux/task_shared.h index de17849..62793e4 100644 --- a/include/linux/task_shared.h +++ b/include/linux/task_shared.h @@ -3,6 +3,7 @@ #define __TASK_SHARED_H__ #include <linux/mm_types.h> +#include <uapi/linux/task_shared.h> /* * Track user-kernel shared pages referred by mm_struct @@ -18,7 +19,7 @@ struct ushared_pages { * fast communication. */ struct task_ushared { - long version; + struct task_schedstat ts; }; /* @@ -52,6 +53,38 @@ struct task_ushrd_struct { struct ushared_pg *upg; }; + +#ifdef CONFIG_SCHED_INFO + +#define task_update_exec_runtime(t) \ + do { \ + struct task_ushrd_struct *shrdp = t->task_ushrd; \ + if (shrdp != NULL && shrdp->kaddr != NULL) \ + shrdp->kaddr->ts.sum_exec_runtime = \ + t->se.sum_exec_runtime; \ + } while (0) + +#define task_update_runq_stat(t, p) \ + do { \ + struct task_ushrd_struct *shrdp = t->task_ushrd; \ + if (shrdp != NULL && shrdp->kaddr != NULL) { \ + shrdp->kaddr->ts.run_delay = \ + t->sched_info.run_delay; \ + if (p) { \ + shrdp->kaddr->ts.pcount = \ + t->sched_info.pcount; \ + } \ + } \ + } while (0) +#else + +#define task_update_exec_runtime(t) do { } while (0) +#define task_update_runq_stat(t, p) do { } while (0) + +#endif + + + extern void task_ushared_free(struct task_struct *t); extern void mm_ushared_clear(struct mm_struct *mm); #endif /* __TASK_SHARED_H__ */ diff --git a/include/uapi/linux/task_shared.h b/include/uapi/linux/task_shared.h new file mode 100644 index 0000000..06a8522 --- /dev/null +++ b/include/uapi/linux/task_shared.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef LINUX_TASK_SHARED_H +#define LINUX_TASK_SHARED_H + +/* + * Per task user-kernel mapped structure for faster communication. + */ + +/* + * Following is the option to request struct task_schedstats shared structure, + * in which kernel shares the task's exec time and time on run queue & number + * of times it was scheduled to run on a cpu. Requires kernel with + * CONFIG_SCHED_INFO enabled. + */ +#define TASK_SCHEDSTAT 1 + +struct task_schedstat { + volatile u64 sum_exec_runtime; + volatile u64 run_delay; + volatile u64 pcount; +}; +#endif diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index aaacd6c..189c74c 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1270,6 +1270,7 @@ static void update_curr_dl(struct rq *rq) curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); + task_update_exec_runtime(curr); curr->se.exec_start = now; cgroup_account_cputime(curr, delta_exec); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 44c4520..cbd182b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -817,6 +817,7 @@ static void update_curr(struct cfs_rq *cfs_rq) if (entity_is_task(curr)) { struct task_struct *curtask = task_of(curr); + task_update_exec_runtime(curtask); trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime); cgroup_account_cputime(curtask, delta_exec); account_group_exec_runtime(curtask, delta_exec); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 3daf42a..61082fc 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1014,6 +1014,7 @@ static void update_curr_rt(struct rq *rq) curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); + task_update_exec_runtime(curr); curr->se.exec_start = now; cgroup_account_cputime(curr, delta_exec); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 14a41a2..4ebbd8f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -67,6 +67,7 @@ #include <linux/syscalls.h> #include <linux/task_work.h> #include <linux/tsacct_kern.h> +#include <linux/task_shared.h> #include <asm/tlb.h> diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index d8f8eb0..6b2d69c 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifdef CONFIG_SCHEDSTATS +#include <linux/task_shared.h> /* * Expects runqueue lock to be held for atomicity of update @@ -166,6 +167,7 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) delta = rq_clock(rq) - t->sched_info.last_queued; t->sched_info.last_queued = 0; t->sched_info.run_delay += delta; + task_update_runq_stat(t, 0); rq_sched_info_dequeue(rq, delta); } @@ -188,6 +190,7 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) t->sched_info.run_delay += delta; t->sched_info.last_arrival = now; t->sched_info.pcount++; + task_update_runq_stat(t, 1); rq_sched_info_arrive(rq, delta); } diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index f988ebe..7b9b60f 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -82,6 +82,7 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) max(curr->se.statistics.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; + task_update_exec_runtime(curr); account_group_exec_runtime(curr, delta_exec); curr->se.exec_start = rq_clock_task(rq); diff --git a/mm/task_shared.c b/mm/task_shared.c index 3ec5eb6..7169ccd 100644 --- a/mm/task_shared.c +++ b/mm/task_shared.c @@ -275,6 +275,14 @@ static long task_getshared(u64 opt, u64 flags, void __user *uaddr) { struct task_ushrd_struct *ushrd = current->task_ushrd; + /* Currently only TASK_SCHEDSTAT supported */ +#ifdef CONFIG_SCHED_INFO + if (opt != TASK_SCHEDSTAT) + return (-EINVAL); +#else + return (-EOPNOTSUPP); +#endif + /* We have address, return. */ if (ushrd != NULL && ushrd->upg != NULL) { if (copy_to_user(uaddr, &ushrd->uaddr, @@ -286,6 +294,11 @@ static long task_getshared(u64 opt, u64 flags, void __user *uaddr) task_ushared_alloc(); ushrd = current->task_ushrd; if (ushrd != NULL && ushrd->upg != NULL) { + if (opt == TASK_SCHEDSTAT) { + /* init current values */ + task_update_exec_runtime(current); + task_update_runq_stat(current, 1); + } if (copy_to_user(uaddr, &ushrd->uaddr, sizeof(struct task_ushared *))) return (-EFAULT); -- 2.7.4