[PATCH v3 4/7] sched: make the output of schedstats independent of fair sched class

From: Yafang Shao
Date: Tue Aug 24 2021 - 07:30:43 EST


The per cpu stats can be show with /proc/sched_debug, which includes the
per cpu schedstats of each task group. Currently these per cpu
schedstats only show for the fair sched class. If we want to support
other sched classes, we have to make these output independent of fair
sched class.

Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Alison Chaiken <achaiken@xxxxxxxxxxx>
---
kernel/sched/debug.c | 70 +++++++++++++++++++++++++++++++-------------
1 file changed, 50 insertions(+), 20 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4cfee2aa1a2d..705987aed658 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -442,11 +442,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
struct sched_entity *se = tg->se[cpu];

#define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
-#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \
- "se->statistics."#F, (long long)schedstat_val(tg->stats[cpu]->F))
#define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
-#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \
- "se->statistics."#F, SPLIT_NS((long long)schedstat_val(tg->stats[cpu]->F)))

if (!se)
return;
@@ -454,20 +450,6 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
PN(se->exec_start);
PN(se->vruntime);
PN(se->sum_exec_runtime);
-
- if (schedstat_enabled()) {
- PN_SCHEDSTAT(wait_start);
- PN_SCHEDSTAT(sleep_start);
- PN_SCHEDSTAT(block_start);
- PN_SCHEDSTAT(sleep_max);
- PN_SCHEDSTAT(block_max);
- PN_SCHEDSTAT(exec_max);
- PN_SCHEDSTAT(slice_max);
- PN_SCHEDSTAT(wait_max);
- PN_SCHEDSTAT(wait_sum);
- P_SCHEDSTAT(wait_count);
- }
-
P(se->load.weight);
#ifdef CONFIG_SMP
P(se->avg.load_avg);
@@ -475,13 +457,60 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
P(se->avg.runnable_avg);
#endif

-#undef PN_SCHEDSTAT
#undef PN
-#undef P_SCHEDSTAT
#undef P
}
#endif

+#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
+struct tg_schedstats {
+ struct seq_file *m;
+ int cpu;
+};
+
+static int tg_show_schedstats(struct task_group *tg, void *data)
+{
+ struct tg_schedstats *p = data;
+ struct seq_file *m = p->m;
+ int cpu = p->cpu;
+
+#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \
+ "se->statistics."#F, (long long)schedstat_val(tg->stats[cpu]->F))
+#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \
+ "se->statistics."#F, SPLIT_NS((long long)schedstat_val(tg->stats[cpu]->F)))
+
+ PN_SCHEDSTAT(wait_start);
+ PN_SCHEDSTAT(sleep_start);
+ PN_SCHEDSTAT(block_start);
+ PN_SCHEDSTAT(sleep_max);
+ PN_SCHEDSTAT(block_max);
+ PN_SCHEDSTAT(exec_max);
+ PN_SCHEDSTAT(slice_max);
+ PN_SCHEDSTAT(wait_max);
+ PN_SCHEDSTAT(wait_sum);
+ P_SCHEDSTAT(wait_count);
+
+#undef P_SCHEDSTAT
+#undef PN_SCHEDSTAT
+
+return 0;
+}
+
+static void print_task_group_stats(struct seq_file *m, int cpu)
+{
+ struct tg_schedstats data = {
+ .m = m,
+ .cpu = cpu,
+ };
+
+ if (!schedstat_enabled())
+ return;
+
+ walk_tg_tree(tg_show_schedstats, tg_nop, &data);
+}
+#endif
+
+
#ifdef CONFIG_CGROUP_SCHED
static DEFINE_SPINLOCK(sched_debug_lock);
static char group_path[PATH_MAX];
@@ -756,6 +785,7 @@ do { \
print_cfs_stats(m, cpu);
print_rt_stats(m, cpu);
print_dl_stats(m, cpu);
+ print_task_group_stats(m, cpu);

print_rq(m, rq, cpu);
SEQ_printf(m, "\n");
--
2.18.2