[PATCH 1/2] sched/nohz: add sysctl control over sched_tick_max_deferment

From: Kevin Hilman
Date: Tue Jun 18 2013 - 19:58:41 EST


Allow sysctl override of sched_tick_max_deferment in order to ease
finding/fixing the remaining issues with full nohz.

The value to be written is in jiffies, and -1 means the max deferment
is disabled (scheduler_tick_max_deferment() returns KTIME_MAX.)

Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Signed-off-by: Kevin Hilman <khilman@xxxxxxxxxx>
---
include/linux/sched/sysctl.h | 3 +++
kernel/sched/core.c | 6 +++++-
kernel/sched/debug.c | 1 +
kernel/sysctl.c | 9 +++++++++
4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index bf8086b..2ad07bb 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -57,6 +57,9 @@ extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_timer_migration;
extern unsigned int sysctl_sched_shares_window;
+#ifdef CONFIG_NO_HZ_FULL
+extern unsigned int sysctl_sched_tick_max_deferment;
+#endif

int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e1a27f9..b5d3f99 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2751,12 +2751,16 @@ void scheduler_tick(void)
* balancing, etc... continue to move forward, even
* with a very low granularity.
*/
+unsigned int sysctl_sched_tick_max_deferment = HZ;
u64 scheduler_tick_max_deferment(void)
{
struct rq *rq = this_rq();
unsigned long next, now = ACCESS_ONCE(jiffies);

- next = rq->last_sched_tick + HZ;
+ if (sysctl_sched_tick_max_deferment == -1)
+ return KTIME_MAX;
+
+ next = rq->last_sched_tick + sysctl_sched_tick_max_deferment;

if (time_before_eq(next, now))
return 0;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 75024a6..f445ab9 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -374,6 +374,7 @@ static void sched_debug_header(struct seq_file *m)
PN(sysctl_sched_wakeup_granularity);
P(sysctl_sched_child_runs_first);
P(sysctl_sched_features);
+ P(sysctl_sched_tick_max_deferment);
#undef PN
#undef P

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9edcf45..fb0b7d8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -393,6 +393,15 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_NO_HZ_FULL
+ {
+ .procname = "sched_tick_max_deferment",
+ .data = &sysctl_sched_tick_max_deferment,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif /* CONFIG_NO_HZ_FULL */
#endif /* CONFIG_SCHED_DEBUG */
{
.procname = "sched_rt_period_us",
--
1.8.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/