[PATCH 02/32] cpuset: Set up interface for nohz flag

From: Steven Rostedt
Date: Mon Oct 29 2012 - 16:42:36 EST


From: Frederic Weisbecker <fweisbec@xxxxxxxxx>

Prepare the interface to implement the nohz cpuset flag.
This flag, once set, will tell the system to try to
shutdown the periodic timer tick when possible.

We use here a per cpu refcounter. As long as a CPU
is contained into at least one cpuset that has the
nohz flag set, it is part of the set of CPUs that
run into adaptive nohz mode.

[ include build fix from Zen Lin ]

Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Alessio Igor Bogani <abogani@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Avi Kivity <avi@xxxxxxxxxx>
Cc: Chris Metcalf <cmetcalf@xxxxxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Daniel Lezcano <daniel.lezcano@xxxxxxxxxx>
Cc: Geoff Levand <geoff@xxxxxxxxxxxxx>
Cc: Gilad Ben Yossef <gilad@xxxxxxxxxxxxx>
Cc: Hakan Akkan <hakanakkan@xxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Kevin Hilman <khilman@xxxxxx>
Cc: Max Krasnyansky <maxk@xxxxxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Stephen Hemminger <shemminger@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Sven-Thorsten Dietrich <thebigcorporation@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
arch/Kconfig | 3 +++
include/linux/cpuset.h | 31 ++++++++++++++++++++++++++++
init/Kconfig | 8 ++++++++
kernel/cpuset.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 366ec06..8e2162f6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -239,6 +239,9 @@ config HAVE_ARCH_JUMP_LABEL
bool

config HAVE_ARCH_MUTEX_CPU_RELAX
+ bool
+
+config HAVE_CPUSETS_NO_HZ
bool

config HAVE_RCU_TABLE_FREE
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 838320f..7e7eb41 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -13,6 +13,7 @@
#include <linux/nodemask.h>
#include <linux/cgroup.h>
#include <linux/mm.h>
+#include <linux/atomic.h>

#ifdef CONFIG_CPUSETS

@@ -235,4 +236,34 @@ static inline bool put_mems_allowed(unsigned int seq)

#endif /* !CONFIG_CPUSETS */

+#ifdef CONFIG_CPUSETS_NO_HZ
+
+DECLARE_PER_CPU(atomic_t, cpu_adaptive_nohz_ref);
+
+static inline bool cpuset_cpu_adaptive_nohz(int cpu)
+{
+ atomic_t *ref = &per_cpu(cpu_adaptive_nohz_ref, cpu);
+
+ if (atomic_add_return(0, ref) > 0)
+ return true;
+
+ return false;
+}
+
+static inline bool cpuset_adaptive_nohz(void)
+{
+ /*
+ * We probably want to do atomic_read() when we read
+ * locally to avoid the overhead of an ordered add.
+ * For that we have to do the dec of the ref locally as
+ * well.
+ */
+ return cpuset_cpu_adaptive_nohz(smp_processor_id());
+}
+#else
+static inline bool cpuset_cpu_adaptive_nohz(int cpu) { return false; }
+static inline bool cpuset_adaptive_nohz(void) { return false; }
+
+#endif /* CONFIG_CPUSETS_NO_HZ */
+
#endif /* _LINUX_CPUSET_H */
diff --git a/init/Kconfig b/init/Kconfig
index 6fdd6e3..ffdeeab 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -749,6 +749,14 @@ config PROC_PID_CPUSET
depends on CPUSETS
default y

+config CPUSETS_NO_HZ
+ bool "Tickless cpusets"
+ depends on CPUSETS && HAVE_CPUSETS_NO_HZ
+ help
+ This options let you apply a nohz property to a cpuset such
+ that the periodic timer tick tries to be avoided when possible on
+ the concerned CPUs.
+
config CGROUP_CPUACCT
bool "Simple CPU accounting cgroup subsystem"
help
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f33c715..6319d8e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -145,6 +145,7 @@ typedef enum {
CS_SCHED_LOAD_BALANCE,
CS_SPREAD_PAGE,
CS_SPREAD_SLAB,
+ CS_ADAPTIVE_NOHZ,
} cpuset_flagbits_t;

/* the type of hotplug event */
@@ -189,6 +190,11 @@ static inline int is_spread_slab(const struct cpuset *cs)
return test_bit(CS_SPREAD_SLAB, &cs->flags);
}

+static inline int is_adaptive_nohz(const struct cpuset *cs)
+{
+ return test_bit(CS_ADAPTIVE_NOHZ, &cs->flags);
+}
+
static struct cpuset top_cpuset = {
.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
};
@@ -1190,6 +1196,32 @@ static void cpuset_change_flag(struct task_struct *tsk,
cpuset_update_task_spread_flag(cgroup_cs(scan->cg), tsk);
}

+#ifdef CONFIG_CPUSETS_NO_HZ
+
+DEFINE_PER_CPU(atomic_t, cpu_adaptive_nohz_ref);
+
+static void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs)
+{
+ int cpu;
+ int val;
+
+ if (is_adaptive_nohz(old_cs) == is_adaptive_nohz(cs))
+ return;
+
+ for_each_cpu(cpu, cs->cpus_allowed) {
+ atomic_t *ref = &per_cpu(cpu_adaptive_nohz_ref, cpu);
+ if (is_adaptive_nohz(cs))
+ atomic_inc(ref);
+ else
+ atomic_dec(ref);
+ }
+}
+#else
+static inline void update_nohz_cpus(struct cpuset *old_cs, struct cpuset *cs)
+{
+}
+#endif
+
/*
* update_tasks_flags - update the spread flags of tasks in the cpuset.
* @cs: the cpuset in which each task's spread flags needs to be changed
@@ -1255,6 +1287,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
|| (is_spread_page(cs) != is_spread_page(trialcs)));

+ update_nohz_cpus(cs, trialcs);
+
mutex_lock(&callback_mutex);
cs->flags = trialcs->flags;
mutex_unlock(&callback_mutex);
@@ -1465,6 +1499,7 @@ typedef enum {
FILE_MEMORY_PRESSURE,
FILE_SPREAD_PAGE,
FILE_SPREAD_SLAB,
+ FILE_ADAPTIVE_NOHZ,
} cpuset_filetype_t;

static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
@@ -1504,6 +1539,11 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
case FILE_SPREAD_SLAB:
retval = update_flag(CS_SPREAD_SLAB, cs, val);
break;
+#ifdef CONFIG_CPUSETS_NO_HZ
+ case FILE_ADAPTIVE_NOHZ:
+ retval = update_flag(CS_ADAPTIVE_NOHZ, cs, val);
+ break;
+#endif
default:
retval = -EINVAL;
break;
@@ -1663,6 +1703,10 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
return is_spread_page(cs);
case FILE_SPREAD_SLAB:
return is_spread_slab(cs);
+#ifdef CONFIG_CPUSETS_NO_HZ
+ case FILE_ADAPTIVE_NOHZ:
+ return is_adaptive_nohz(cs);
+#endif
default:
BUG();
}
@@ -1771,7 +1815,14 @@ static struct cftype files[] = {
.write_u64 = cpuset_write_u64,
.private = FILE_SPREAD_SLAB,
},
-
+#ifdef CONFIG_CPUSETS_NO_HZ
+ {
+ .name = "adaptive_nohz",
+ .read_u64 = cpuset_read_u64,
+ .write_u64 = cpuset_write_u64,
+ .private = FILE_ADAPTIVE_NOHZ,
+ },
+#endif
{
.name = "memory_pressure_enabled",
.flags = CFTYPE_ONLY_ON_ROOT,
--
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/