[RFC PATCH 2/5] sched/events: Introduce cfs_rq load tracking trace event

From: Dietmar Eggemann
Date: Tue Mar 28 2017 - 02:36:56 EST


The trace event keys load and util (utilization) are mapped to:

(1) load : cfs_rq->runnable_load_avg

(2) util : cfs_rq->avg.util_avg

To let this trace event work for configurations w/ and w/o group
scheduling support for cfs (CONFIG_FAIR_GROUP_SCHED) the following
special handling is necessary for non-existent key=value pairs:

path = "(null)" : In case of !CONFIG_FAIR_GROUP_SCHED.

id = -1 : In case of !CONFIG_FAIR_GROUP_SCHED.

The following list shows examples of the key=value pairs in different
configurations for:

(1) a root task_group:

cpu=4 path=/ id=1 load=6 util=331

(2) a task_group:

cpu=1 path=/tg1/tg11/tg111 id=4 load=538 util=522

(3) an autogroup:

cpu=3 path=/autogroup-18 id=0 load=997 util=517

(4) w/o CONFIG_FAIR_GROUP_SCHED:

cpu=0 path=(null) id=-1 load=314 util=289

The trace event is only defined for CONFIG_SMP.

The helper function __trace_sched_path() can be used to get the length
parameter of the dynamic array (path == NULL) and to copy the path into
it (path != NULL).

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
---
include/trace/events/sched.h | 73 ++++++++++++++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 9 ++++++
2 files changed, 82 insertions(+)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9e3ef6c99e4b..51db8a90e45f 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -562,6 +562,79 @@ TRACE_EVENT(sched_wake_idle_without_ipi,

TP_printk("cpu=%d", __entry->cpu)
);
+
+#ifdef CONFIG_SMP
+#ifdef CREATE_TRACE_POINTS
+static inline
+int __trace_sched_cpu(struct cfs_rq *cfs_rq)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct rq *rq = cfs_rq->rq;
+#else
+ struct rq *rq = container_of(cfs_rq, struct rq, cfs);
+#endif
+ return cpu_of(rq);
+}
+
+static inline
+int __trace_sched_path(struct cfs_rq *cfs_rq, char *path, int len)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ int l = path ? len : 0;
+
+ if (task_group_is_autogroup(cfs_rq->tg))
+ return autogroup_path(cfs_rq->tg, path, l) + 1;
+ else
+ return cgroup_path(cfs_rq->tg->css.cgroup, path, l) + 1;
+#else
+ if (path)
+ strcpy(path, "(null)");
+
+ return strlen("(null)");
+#endif
+}
+
+static inline int __trace_sched_id(struct cfs_rq *cfs_rq)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ return cfs_rq->tg->css.id;
+#else
+ return -1;
+#endif
+}
+#endif /* CREATE_TRACE_POINTS */
+
+/*
+ * Tracepoint for cfs_rq load tracking:
+ */
+TRACE_EVENT(sched_load_cfs_rq,
+
+ TP_PROTO(struct cfs_rq *cfs_rq),
+
+ TP_ARGS(cfs_rq),
+
+ TP_STRUCT__entry(
+ __field( int, cpu )
+ __dynamic_array(char, path,
+ __trace_sched_path(cfs_rq, NULL, 0) )
+ __field( int, id )
+ __field( unsigned long, load )
+ __field( unsigned long, util )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = __trace_sched_cpu(cfs_rq);
+ __trace_sched_path(cfs_rq, __get_dynamic_array(path),
+ __get_dynamic_array_len(path));
+ __entry->id = __trace_sched_id(cfs_rq);
+ __entry->load = cfs_rq->runnable_load_avg;
+ __entry->util = cfs_rq->avg.util_avg;
+ ),
+
+ TP_printk("cpu=%d path=%s id=%d load=%lu util=%lu", __entry->cpu,
+ __get_str(path), __entry->id, __entry->load, __entry->util)
+);
+#endif /* CONFIG_SMP */
#endif /* _TRACE_SCHED_H */

/* This part must be outside protection */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 03adf9fb48b1..ac19ab6ced8f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2950,6 +2950,9 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
}

+ if (cfs_rq)
+ trace_sched_load_cfs_rq(cfs_rq);
+
return decayed;
}

@@ -3170,6 +3173,8 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
update_tg_cfs_util(cfs_rq, se);
update_tg_cfs_load(cfs_rq, se);

+ trace_sched_load_cfs_rq(cfs_rq);
+
return 1;
}

@@ -3359,6 +3364,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
set_tg_cfs_propagate(cfs_rq);

cfs_rq_util_change(cfs_rq);
+
+ trace_sched_load_cfs_rq(cfs_rq);
}

/**
@@ -3379,6 +3386,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
set_tg_cfs_propagate(cfs_rq);

cfs_rq_util_change(cfs_rq);
+
+ trace_sched_load_cfs_rq(cfs_rq);
}

/* Add the load generated by se into cfs_rq's load average */
--
2.11.0