[PATCH 06/89] sched/topology: Split out scheduler topology interfaces from <linux/sched.h> into <linux/sched/topology.h>

From: Ingo Molnar
Date: Mon Feb 06 2017 - 08:54:41 EST


The vast majority of sched.h users does not require the topology types and
interfaces, so split them out into a separate header, and include them
in the .c files that require them.

This reduces the size of linux/sched.h by ~6%.

Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Mike Galbraith <efault@xxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/arm/kernel/topology.c | 1 +
arch/arm64/kernel/topology.c | 1 +
arch/powerpc/kernel/smp.c | 1 +
arch/s390/kernel/topology.c | 1 +
arch/x86/kernel/smpboot.c | 1 +
block/blk-mq.c | 1 +
block/blk-softirq.c | 1 +
include/linux/cpuset.h | 1 +
include/linux/sched.h | 212 -----------------------------------------------------------
include/linux/sched/topology.h | 217 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 2 +
kernel/sched/sched.h | 1 +
12 files changed, 228 insertions(+), 212 deletions(-)

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index ebf47d91b804..f8a3ab82e77f 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -21,6 +21,7 @@
#include <linux/nodemask.h>
#include <linux/of.h>
#include <linux/sched.h>
+#include <linux/sched/topology.h>
#include <linux/slab.h>
#include <linux/string.h>

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 655e65f38f31..3aeb7bbb1bfc 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -20,6 +20,7 @@
#include <linux/nodemask.h>
#include <linux/of.h>
#include <linux/sched.h>
+#include <linux/sched/topology.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/cpufreq.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 893bd7f79be6..54903862cf57 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/sched.h>
+#include <linux/sched/topology.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 93dcbae1e98d..044dec44ce8d 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -13,6 +13,7 @@
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/topology.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/slab.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc3b73c..0fe89249d9ac 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -45,6 +45,7 @@
#include <linux/smp.h>
#include <linux/export.h>
#include <linux/sched.h>
+#include <linux/sched/topology.h>
#include <linux/percpu.h>
#include <linux/bootmem.h>
#include <linux/err.h>
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c3400b5444a7..92bf48dac06f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -20,6 +20,7 @@
#include <linux/cpu.h>
#include <linux/cache.h>
#include <linux/sched/sysctl.h>
+#include <linux/sched/topology.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
#include <linux/prefetch.h>
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 06cf9807f49a..87b7df4851bf 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -9,6 +9,7 @@
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/sched.h>
+#include <linux/sched/topology.h>

#include "blk.h"

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index bfc204e70338..c608c39cb161 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -9,6 +9,7 @@
*/

#include <linux/sched.h>
+#include <linux/sched/topology.h>
#include <linux/cpumask.h>
#include <linux/nodemask.h>
#include <linux/mm.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c8e519d0b4a3..b2e18537088b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -969,12 +969,6 @@ enum cpu_idle_type {
# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)

/*
- * Increase resolution of cpu_capacity calculations
- */
-#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
-#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
-
-/*
* Wake-queues are lists of tasks with a pending wakeup, whose
* callers have already marked the task as woken internally,
* and can thus carry on. A common use case is being able to
@@ -1024,214 +1018,8 @@ extern void wake_q_add(struct wake_q_head *head,
struct task_struct *task);
extern void wake_up_q(struct wake_q_head *head);

-/*
- * sched-domains (multiprocessor balancing) declarations:
- */
-#ifdef CONFIG_SMP
-#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */
-#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
-#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
-#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
-#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
-#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
-#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */
-#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */
-#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
-#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
-#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
-#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
-#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
-#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
-#define SD_NUMA 0x4000 /* cross-node balancing */
-
-#ifdef CONFIG_SCHED_SMT
-static inline int cpu_smt_flags(void)
-{
- return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
-}
-#endif
-
-#ifdef CONFIG_SCHED_MC
-static inline int cpu_core_flags(void)
-{
- return SD_SHARE_PKG_RESOURCES;
-}
-#endif
-
-#ifdef CONFIG_NUMA
-static inline int cpu_numa_flags(void)
-{
- return SD_NUMA;
-}
-#endif
-
-extern int arch_asym_cpu_priority(int cpu);
-
-struct sched_domain_attr {
- int relax_domain_level;
-};
-
-#define SD_ATTR_INIT (struct sched_domain_attr) { \
- .relax_domain_level = -1, \
-}
-
-extern int sched_domain_level_max;
-
-struct sched_group;
-
-struct sched_domain_shared {
- atomic_t ref;
- atomic_t nr_busy_cpus;
- int has_idle_cores;
-};
-
-struct sched_domain {
- /* These fields must be setup */
- struct sched_domain *parent; /* top domain must be null terminated */
- struct sched_domain *child; /* bottom domain must be null terminated */
- struct sched_group *groups; /* the balancing groups of the domain */
- unsigned long min_interval; /* Minimum balance interval ms */
- unsigned long max_interval; /* Maximum balance interval ms */
- unsigned int busy_factor; /* less balancing by factor if busy */
- unsigned int imbalance_pct; /* No balance until over watermark */
- unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
- unsigned int busy_idx;
- unsigned int idle_idx;
- unsigned int newidle_idx;
- unsigned int wake_idx;
- unsigned int forkexec_idx;
- unsigned int smt_gain;
-
- int nohz_idle; /* NOHZ IDLE status */
- int flags; /* See SD_* */
- int level;
-
- /* Runtime fields. */
- unsigned long last_balance; /* init to jiffies. units in jiffies */
- unsigned int balance_interval; /* initialise to 1. units in ms. */
- unsigned int nr_balance_failed; /* initialise to 0 */
-
- /* idle_balance() stats */
- u64 max_newidle_lb_cost;
- unsigned long next_decay_max_lb_cost;
-
- u64 avg_scan_cost; /* select_idle_sibling */
-
-#ifdef CONFIG_SCHEDSTATS
- /* load_balance() stats */
- unsigned int lb_count[CPU_MAX_IDLE_TYPES];
- unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
- unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
- unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
- unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
- unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
- unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
- unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
-
- /* Active load balancing */
- unsigned int alb_count;
- unsigned int alb_failed;
- unsigned int alb_pushed;
-
- /* SD_BALANCE_EXEC stats */
- unsigned int sbe_count;
- unsigned int sbe_balanced;
- unsigned int sbe_pushed;
-
- /* SD_BALANCE_FORK stats */
- unsigned int sbf_count;
- unsigned int sbf_balanced;
- unsigned int sbf_pushed;
-
- /* try_to_wake_up() stats */
- unsigned int ttwu_wake_remote;
- unsigned int ttwu_move_affine;
- unsigned int ttwu_move_balance;
-#endif
-#ifdef CONFIG_SCHED_DEBUG
- char *name;
-#endif
- union {
- void *private; /* used during construction */
- struct rcu_head rcu; /* used during destruction */
- };
- struct sched_domain_shared *shared;
-
- unsigned int span_weight;
- /*
- * Span of all CPUs in this domain.
- *
- * NOTE: this field is variable length. (Allocated dynamically
- * by attaching extra space to the end of the structure,
- * depending on how many CPUs the kernel has booted up with)
- */
- unsigned long span[0];
-};
-
-static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
-{
- return to_cpumask(sd->span);
-}
-
-extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
- struct sched_domain_attr *dattr_new);
-
-/* Allocate an array of sched domains, for partition_sched_domains(). */
-cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
-void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
-
-bool cpus_share_cache(int this_cpu, int that_cpu);
-
-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
-typedef int (*sched_domain_flags_f)(void);
-
-#define SDTL_OVERLAP 0x01
-
-struct sd_data {
- struct sched_domain **__percpu sd;
- struct sched_domain_shared **__percpu sds;
- struct sched_group **__percpu sg;
- struct sched_group_capacity **__percpu sgc;
-};
-
-struct sched_domain_topology_level {
- sched_domain_mask_f mask;
- sched_domain_flags_f sd_flags;
- int flags;
- int numa_level;
- struct sd_data data;
-#ifdef CONFIG_SCHED_DEBUG
- char *name;
-#endif
-};
-
-extern void set_sched_topology(struct sched_domain_topology_level *tl);
extern void wake_up_if_idle(int cpu);

-#ifdef CONFIG_SCHED_DEBUG
-# define SD_INIT_NAME(type) .name = #type
-#else
-# define SD_INIT_NAME(type)
-#endif
-
-#else /* CONFIG_SMP */
-
-struct sched_domain_attr;
-
-static inline void
-partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
- struct sched_domain_attr *dattr_new)
-{
-}
-
-static inline bool cpus_share_cache(int this_cpu, int that_cpu)
-{
- return true;
-}
-
-#endif /* !CONFIG_SMP */
-
-
struct io_context; /* See blkdev.h */


diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
new file mode 100644
index 000000000000..ae2cb8f9c431
--- /dev/null
+++ b/include/linux/sched/topology.h
@@ -0,0 +1,217 @@
+#ifndef _LINUX_SCHED_TOPOLOGY_H
+#define _LINUX_SCHED_TOPOLOGY_H
+
+/*
+ * sched-domains (multiprocessor balancing) declarations:
+ */
+#ifdef CONFIG_SMP
+
+#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */
+#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
+#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
+#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
+#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
+#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
+#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */
+#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */
+#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
+#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
+#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
+#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
+#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
+#define SD_NUMA 0x4000 /* cross-node balancing */
+
+/*
+ * Increase resolution of cpu_capacity calculations
+ */
+#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
+#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
+
+#ifdef CONFIG_SCHED_SMT
+static inline int cpu_smt_flags(void)
+{
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_SCHED_MC
+static inline int cpu_core_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_NUMA
+static inline int cpu_numa_flags(void)
+{
+ return SD_NUMA;
+}
+#endif
+
+extern int arch_asym_cpu_priority(int cpu);
+
+struct sched_domain_attr {
+ int relax_domain_level;
+};
+
+#define SD_ATTR_INIT (struct sched_domain_attr) { \
+ .relax_domain_level = -1, \
+}
+
+extern int sched_domain_level_max;
+
+struct sched_group;
+
+struct sched_domain_shared {
+ atomic_t ref;
+ atomic_t nr_busy_cpus;
+ int has_idle_cores;
+};
+
+struct sched_domain {
+ /* These fields must be setup */
+ struct sched_domain *parent; /* top domain must be null terminated */
+ struct sched_domain *child; /* bottom domain must be null terminated */
+ struct sched_group *groups; /* the balancing groups of the domain */
+ unsigned long min_interval; /* Minimum balance interval ms */
+ unsigned long max_interval; /* Maximum balance interval ms */
+ unsigned int busy_factor; /* less balancing by factor if busy */
+ unsigned int imbalance_pct; /* No balance until over watermark */
+ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
+ unsigned int busy_idx;
+ unsigned int idle_idx;
+ unsigned int newidle_idx;
+ unsigned int wake_idx;
+ unsigned int forkexec_idx;
+ unsigned int smt_gain;
+
+ int nohz_idle; /* NOHZ IDLE status */
+ int flags; /* See SD_* */
+ int level;
+
+ /* Runtime fields. */
+ unsigned long last_balance; /* init to jiffies. units in jiffies */
+ unsigned int balance_interval; /* initialise to 1. units in ms. */
+ unsigned int nr_balance_failed; /* initialise to 0 */
+
+ /* idle_balance() stats */
+ u64 max_newidle_lb_cost;
+ unsigned long next_decay_max_lb_cost;
+
+ u64 avg_scan_cost; /* select_idle_sibling */
+
+#ifdef CONFIG_SCHEDSTATS
+ /* load_balance() stats */
+ unsigned int lb_count[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
+ unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
+
+ /* Active load balancing */
+ unsigned int alb_count;
+ unsigned int alb_failed;
+ unsigned int alb_pushed;
+
+ /* SD_BALANCE_EXEC stats */
+ unsigned int sbe_count;
+ unsigned int sbe_balanced;
+ unsigned int sbe_pushed;
+
+ /* SD_BALANCE_FORK stats */
+ unsigned int sbf_count;
+ unsigned int sbf_balanced;
+ unsigned int sbf_pushed;
+
+ /* try_to_wake_up() stats */
+ unsigned int ttwu_wake_remote;
+ unsigned int ttwu_move_affine;
+ unsigned int ttwu_move_balance;
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+ char *name;
+#endif
+ union {
+ void *private; /* used during construction */
+ struct rcu_head rcu; /* used during destruction */
+ };
+ struct sched_domain_shared *shared;
+
+ unsigned int span_weight;
+ /*
+ * Span of all CPUs in this domain.
+ *
+ * NOTE: this field is variable length. (Allocated dynamically
+ * by attaching extra space to the end of the structure,
+ * depending on how many CPUs the kernel has booted up with)
+ */
+ unsigned long span[0];
+};
+
+static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+{
+ return to_cpumask(sd->span);
+}
+
+extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+ struct sched_domain_attr *dattr_new);
+
+/* Allocate an array of sched domains, for partition_sched_domains(). */
+cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
+void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+
+bool cpus_share_cache(int this_cpu, int that_cpu);
+
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+typedef int (*sched_domain_flags_f)(void);
+
+#define SDTL_OVERLAP 0x01
+
+struct sd_data {
+ struct sched_domain **__percpu sd;
+ struct sched_domain_shared **__percpu sds;
+ struct sched_group **__percpu sg;
+ struct sched_group_capacity **__percpu sgc;
+};
+
+struct sched_domain_topology_level {
+ sched_domain_mask_f mask;
+ sched_domain_flags_f sd_flags;
+ int flags;
+ int numa_level;
+ struct sd_data data;
+#ifdef CONFIG_SCHED_DEBUG
+ char *name;
+#endif
+};
+
+extern void set_sched_topology(struct sched_domain_topology_level *tl);
+
+#ifdef CONFIG_SCHED_DEBUG
+# define SD_INIT_NAME(type) .name = #type
+#else
+# define SD_INIT_NAME(type)
+#endif
+
+#else /* CONFIG_SMP */
+
+struct sched_domain_attr;
+
+static inline void
+partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+ struct sched_domain_attr *dattr_new)
+{
+}
+
+static inline bool cpus_share_cache(int this_cpu, int that_cpu)
+{
+ return true;
+}
+
+#endif /* !CONFIG_SMP */
+
+#endif /* _LINUX_SCHED_TOPOLOGY_H */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 274c747a01ce..fcd520db8e32 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -21,6 +21,8 @@
*/

#include <linux/sched.h>
+#include <linux/sched/topology.h>
+
#include <linux/latencytop.h>
#include <linux/cpumask.h>
#include <linux/cpuidle.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 17ed94b9b413..319fcf80930c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,6 +1,7 @@

#include <linux/sched.h>
#include <linux/sched/sysctl.h>
+#include <linux/sched/topology.h>
#include <linux/sched/rt.h>
#include <linux/u64_stats_sync.h>
#include <linux/sched/deadline.h>
--
2.7.4