Re: [patch] CFS scheduler, -v7

From: Ingo Molnar
Date: Sat Apr 28 2007 - 15:26:39 EST



* S.ÃaÄlar Onur <caglar@xxxxxxxxxxxxx> wrote:

> If you want some more output/info etc. please just say, i have both v6
> and v7 available.

could you try the auto-renice patch ontop of -v7:

http://people.redhat.com/mingo/cfs-scheduler/sched-cfs-auto-renice.patch

does this make it behave like -v6?

Ingo
---
arch/i386/kernel/ioport.c | 17 ++++++++++++++---
arch/x86_64/kernel/ioport.c | 12 ++++++++++--
drivers/block/loop.c | 5 ++++-
include/linux/sched.h | 7 +++++++
kernel/Kconfig.preempt | 17 +++++++++++++++++
kernel/sched.c | 40 ++++++++++++++++++++++++++++++++++++++++
kernel/workqueue.c | 2 +-
mm/oom_kill.c | 4 +++-
8 files changed, 96 insertions(+), 8 deletions(-)

Index: linux/arch/i386/kernel/ioport.c
===================================================================
--- linux.orig/arch/i386/kernel/ioport.c
+++ linux/arch/i386/kernel/ioport.c
@@ -64,9 +64,17 @@ asmlinkage long sys_ioperm(unsigned long

if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
return -EINVAL;
- if (turn_on && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
+ if (turn_on) {
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+ /*
+ * Task will be accessing hardware IO ports,
+ * mark it as special with the scheduler too:
+ */
+#ifdef CONFIG_BOOST_X
+ sched_privileged_task(current);
+#endif
+ }
/*
* If it's the first ioperm() call in this thread's lifetime, set the
* IO bitmap up. ioperm() is much less timing critical than clone(),
@@ -145,6 +153,9 @@ asmlinkage long sys_iopl(unsigned long u
if (level > old) {
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+#ifdef CONFIG_BOOST_X
+ sched_privileged_task(current);
+#endif
}
t->iopl = level << 12;
regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
Index: linux/arch/x86_64/kernel/ioport.c
===================================================================
--- linux.orig/arch/x86_64/kernel/ioport.c
+++ linux/arch/x86_64/kernel/ioport.c
@@ -41,8 +41,13 @@ asmlinkage long sys_ioperm(unsigned long

if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
return -EINVAL;
- if (turn_on && !capable(CAP_SYS_RAWIO))
- return -EPERM;
+ if (turn_on) {
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+#ifdef CONFIG_BOOST_X
+ sched_privileged_task(current);
+#endif
+ }

/*
* If it's the first ioperm() call in this thread's lifetime, set the
@@ -113,6 +118,9 @@ asmlinkage long sys_iopl(unsigned int le
if (level > old) {
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+#ifdef CONFIG_BOOST_X
+ sched_privileged_task(current);
+#endif
}
regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12);
return 0;
Index: linux/drivers/block/loop.c
===================================================================
--- linux.orig/drivers/block/loop.c
+++ linux/drivers/block/loop.c
@@ -588,7 +588,10 @@ static int loop_thread(void *data)
*/
current->flags |= PF_NOFREEZE;

- set_user_nice(current, -20);
+ /*
+ * The loop thread is important enough to be given a boost:
+ */
+ sched_privileged_task(current);

while (!kthread_should_stop() || lo->lo_bio) {

Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h
+++ linux/include/linux/sched.h
@@ -1268,6 +1268,13 @@ static inline int rt_mutex_getprio(struc
#endif

extern void set_user_nice(struct task_struct *p, long nice);
+/*
+ * Task has special privileges, give it more CPU power:
+ */
+extern void sched_privileged_task(struct task_struct *p);
+
+extern int sysctl_sched_privileged_nice_level;
+
extern int task_prio(const struct task_struct *p);
extern int task_nice(const struct task_struct *p);
extern int can_nice(const struct task_struct *p, const int nice);
Index: linux/kernel/Kconfig.preempt
===================================================================
--- linux.orig/kernel/Kconfig.preempt
+++ linux/kernel/Kconfig.preempt
@@ -63,3 +63,20 @@ config PREEMPT_BKL
Say Y here if you are building a kernel for a desktop system.
Say N if you are unsure.

+config BOOST_X
+ bool "Boost X"
+ default y
+ help
+ This option instructs the kernel to guarantee more CPU time to
+ X than to other tasks, which is useful if you want to have a
+ faster desktop even under high system load.
+
+ This option works by automatically boosting X's priority via
+ renicing it to -10. NOTE: CFS does not suffer from
+ "overscheduling" problems when X is reniced to -10, so if this
+ is a predominantly desktop box it makes sense to select this
+ option.
+
+ Say Y here if you are building a kernel for a desktop system.
+ Say N if you want X to be treated as a normal task.
+
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -3323,6 +3323,46 @@ out_unlock:
EXPORT_SYMBOL(set_user_nice);

/*
+ * Nice level for privileged tasks. (can be set to 0 for this
+ * to be turned off)
+ */
+int sysctl_sched_privileged_nice_level __read_mostly = -10;
+
+static int __init privileged_nice_level_setup(char *str)
+{
+ sysctl_sched_privileged_nice_level = simple_strtol(str, NULL, 0);
+ return 1;
+}
+__setup("privileged_nice_level=", privileged_nice_level_setup);
+
+/*
+ * Tasks with special privileges call this and gain extra nice
+ * levels:
+ */
+void sched_privileged_task(struct task_struct *p)
+{
+ long new_nice = sysctl_sched_privileged_nice_level;
+ long old_nice = TASK_NICE(p);
+
+ if (new_nice >= old_nice)
+ return;
+ /*
+ * Setting the sysctl to 0 turns off the boosting:
+ */
+ if (unlikely(!new_nice))
+ return;
+
+ if (new_nice < -20)
+ new_nice = -20;
+ else if (new_nice > 19)
+ new_nice = 19;
+
+ set_user_nice(p, new_nice);
+}
+
+EXPORT_SYMBOL(sched_privileged_task);
+
+/*
* can_nice - check if a task can reduce its nice value
* @p: task
* @nice: nice value
Index: linux/kernel/workqueue.c
===================================================================
--- linux.orig/kernel/workqueue.c
+++ linux/kernel/workqueue.c
@@ -355,7 +355,7 @@ static int worker_thread(void *__cwq)
if (!cwq->freezeable)
current->flags |= PF_NOFREEZE;

- set_user_nice(current, -5);
+ sched_privileged_task(current);

/* Block and flush all signals */
sigfillset(&blocked);
Index: linux/mm/oom_kill.c
===================================================================
--- linux.orig/mm/oom_kill.c
+++ linux/mm/oom_kill.c
@@ -293,7 +293,9 @@ static void __oom_kill_task(struct task_
* all the memory it needs. That way it should be able to
* exit() and clear out its resources quickly...
*/
- p->time_slice = HZ;
+ if (p->policy == SCHED_NORMAL || p->policy == SCHED_BATCH)
+ sched_privileged_task(p);
+
set_tsk_thread_flag(p, TIF_MEMDIE);

force_sig(SIGKILL, p);