Re: aim7 -30% regression in 2.6.24-rc1

From: Peter Zijlstra
Date: Thu Nov 01 2007 - 11:30:01 EST


(restoring CCs which I inadvertly dropped)

On Thu, 2007-11-01 at 16:00 +0100, Ingo Molnar wrote:
> * Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> wrote:
>
> > > could we instead justmake sched_nr_latency non-tunable, and
> > > recalculate it from the sysctl handler whenever sched_latency or
> > > sched_min_granularity changes? That would avoid not only the
> > > division by zero bug but also other out-of-spec tunings.
> >
> > We don't have min_granularity anymore.
>
> i think we should reintroduce it in the SCHED_DEBUG case and make it the
> main tunable item - sched_nr is a nice performance optimization but
> quite unintuitive as a tuning knob.

ok, I don't particularly care either way, could be because I wrote the
stuff :-)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1466,12 +1466,16 @@ extern void sched_idle_next(void);

#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_nr_latency;
+extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_batch_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
+
+int sched_nr_latency_handler(struct ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *length,
+ loff_t *ppos);
#endif

extern unsigned int sysctl_sched_compat_yield;
Index: linux-2.6/kernel/sched_debug.c
===================================================================
--- linux-2.6.orig/kernel/sched_debug.c
+++ linux-2.6/kernel/sched_debug.c
@@ -210,7 +210,7 @@ static int sched_debug_show(struct seq_f
#define PN(x) \
SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
PN(sysctl_sched_latency);
- PN(sysctl_sched_nr_latency);
+ PN(sysctl_sched_min_granularity);
PN(sysctl_sched_wakeup_granularity);
PN(sysctl_sched_batch_wakeup_granularity);
PN(sysctl_sched_child_runs_first);
Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -35,16 +35,21 @@
const_debug unsigned int sysctl_sched_latency = 20000000ULL;

/*
- * After fork, child runs first. (default) If set to 0 then
- * parent will (try to) run first.
+ * Minimal preemption granularity for CPU-bound tasks:
+ * (default: 1 msec, units: nanoseconds)
*/
-const_debug unsigned int sysctl_sched_child_runs_first = 1;
+const_debug unsigned int sysctl_sched_min_granularity = 1000000ULL;

/*
- * Minimal preemption granularity for CPU-bound tasks:
- * (default: 2 msec, units: nanoseconds)
+ * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
+ */
+const_debug unsigned int sched_nr_latency = 20;
+
+/*
+ * After fork, child runs first. (default) If set to 0 then
+ * parent will (try to) run first.
*/
-const_debug unsigned int sysctl_sched_nr_latency = 20;
+const_debug unsigned int sysctl_sched_child_runs_first = 1;

/*
* sys_sched_yield() compat mode
@@ -301,6 +306,21 @@ static inline struct sched_entity *__pic
* Scheduling class statistics methods:
*/

+#ifdef CONFIG_SCHED_DEBUG
+int sched_nr_latency_handler(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+ if (!ret && write) {
+ sched_nr_latency =
+ sysctl_sched_latency / sysctl_sched_min_granularity;
+ }
+
+ return ret;
+}
+#endif

/*
* The idea is to set a period in which each task runs once.
@@ -313,7 +333,7 @@ static inline struct sched_entity *__pic
static u64 __sched_period(unsigned long nr_running)
{
u64 period = sysctl_sched_latency;
- unsigned long nr_latency = sysctl_sched_nr_latency;
+ unsigned long nr_latency = sched_nr_latency;

if (unlikely(nr_running > nr_latency)) {
period *= nr_running;
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -235,11 +235,14 @@ static struct ctl_table kern_table[] = {
#ifdef CONFIG_SCHED_DEBUG
{
.ctl_name = CTL_UNNUMBERED,
- .procname = "sched_nr_latency",
- .data = &sysctl_sched_nr_latency,
+ .procname = "sched_min_granularity_ns",
+ .data = &sysctl_sched_min_granularity,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &sched_nr_latency_handler,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_sched_granularity_ns,
+ .extra2 = &max_sched_granularity_ns,
},
{
.ctl_name = CTL_UNNUMBERED,
@@ -247,7 +250,7 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_sched_latency,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
+ .proc_handler = &sched_nr_latency_handler,
.strategy = &sysctl_intvec,
.extra1 = &min_sched_granularity_ns,
.extra2 = &max_sched_granularity_ns,

Attachment: signature.asc
Description: This is a digitally signed message part