[PATCH 2/4] sched: separate out io_schedule_prepare() and io_schedule_finish()

From: Tejun Heo
Date: Fri Oct 28 2016 - 12:59:29 EST


Now that IO schedule accounting is done inside __schedule(),
io_schedule() can be split into three steps - prep, schedule, and
finish - where the schedule part doesn't need any special annotation.
This allows marking a sleep as iowait by simply wrapping an existing
blocking function with io_schedule_prepare() and io_schedule_finish().

Because task_struct->in_iowait is single bit, the caller of
io_schedule_prepare() needs to record and the pass its state to
io_schedule_finish() to be safe regarding nesting. While this isn't
the prettiest, these functions are mostly gonna be used by core
functions and we don't want to use more space for ->in_iowait.

While at it, as it's simple to do now, reimplement io_schedule()
without unnecessarily going through io_schedule_timeout().

Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Jens Axboe <axboe@xxxxxxxxx>
---
include/linux/sched.h | 8 +++-----
kernel/sched/core.c | 33 ++++++++++++++++++++++++++++-----
2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b..c025f77 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -441,12 +441,10 @@ extern signed long schedule_timeout_idle(signed long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);

+extern int __must_check io_schedule_prepare(void);
+extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
-
-static inline void io_schedule(void)
-{
- io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
-}
+extern void io_schedule(void);

void __noreturn do_task_dead(void);

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f6baa38..30d3185 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5067,25 +5067,48 @@ int __sched yield_to(struct task_struct *p, bool preempt)
}
EXPORT_SYMBOL_GPL(yield_to);

+int io_schedule_prepare(void)
+{
+ int old_iowait = current->in_iowait;
+
+ current->in_iowait = 1;
+ blk_schedule_flush_plug(current);
+
+ return old_iowait;
+}
+
+void io_schedule_finish(int token)
+{
+ current->in_iowait = token;
+}
+
/*
* This task is about to go to sleep on IO. Increment rq->nr_iowait so
* that process accounting knows that this is a task in IO wait state.
*/
long __sched io_schedule_timeout(long timeout)
{
- int old_iowait = current->in_iowait;
+ int token;
long ret;

- current->in_iowait = 1;
- blk_schedule_flush_plug(current);
-
+ token = io_schedule_prepare();
ret = schedule_timeout(timeout);
- current->in_iowait = old_iowait;
+ io_schedule_finish(token);

return ret;
}
EXPORT_SYMBOL(io_schedule_timeout);

+void io_schedule(void)
+{
+ int token;
+
+ token = io_schedule_prepare();
+ schedule();
+ io_schedule_finish(token);
+}
+EXPORT_SYMBOL(io_schedule);
+
/**
* sys_sched_get_priority_max - return maximum RT priority.
* @policy: scheduling class.
--
2.7.4