Now that IO schedule accounting is done inside __schedule(), io_schedule() can be split into three steps - prep, schedule, and finish - where the schedule part doesn't need any special annotation. This allows marking a sleep as iowait by simply wrapping an existing blocking function with io_schedule_prepare() and io_schedule_finish(). Because task_struct->in_iowait is single bit, the caller of io_schedule_prepare() needs to record and the pass its state to io_schedule_finish() to be safe regarding nesting. While this isn't the prettiest, these functions are mostly gonna be used by core functions and we don't want to use more space for ->in_iowait. While at it, as it's simple to do now, reimplement io_schedule() without unnecessarily going through io_schedule_timeout(). Signed-off-by: Tejun Heo <tj@xxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Jens Axboe <axboe@xxxxxxxxx> --- include/linux/sched.h | 8 +++----- kernel/sched/core.c | 33 ++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b..c025f77 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -441,12 +441,10 @@ extern signed long schedule_timeout_idle(signed long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); +extern int __must_check io_schedule_prepare(void); +extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); - -static inline void io_schedule(void) -{ - io_schedule_timeout(MAX_SCHEDULE_TIMEOUT); -} +extern void io_schedule(void); void __noreturn do_task_dead(void); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f6baa38..30d3185 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5067,25 +5067,48 @@ int __sched yield_to(struct task_struct *p, bool preempt) } EXPORT_SYMBOL_GPL(yield_to); +int io_schedule_prepare(void) +{ + int old_iowait = current->in_iowait; + + current->in_iowait = 1; + blk_schedule_flush_plug(current); + + return old_iowait; +} + +void io_schedule_finish(int token) +{ + current->in_iowait = token; +} + /* * This task is about to go to sleep on IO. Increment rq->nr_iowait so * that process accounting knows that this is a task in IO wait state. */ long __sched io_schedule_timeout(long timeout) { - int old_iowait = current->in_iowait; + int token; long ret; - current->in_iowait = 1; - blk_schedule_flush_plug(current); - + token = io_schedule_prepare(); ret = schedule_timeout(timeout); - current->in_iowait = old_iowait; + io_schedule_finish(token); return ret; } EXPORT_SYMBOL(io_schedule_timeout); +void io_schedule(void) +{ + int token; + + token = io_schedule_prepare(); + schedule(); + io_schedule_finish(token); +} +EXPORT_SYMBOL(io_schedule); + /** * sys_sched_get_priority_max - return maximum RT priority. * @policy: scheduling class. -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html