[PATCH v2] perf: update perf_cgroup time for ancestor cgroup(s)

From: Song Liu
Date: Mon Mar 12 2018 - 12:59:57 EST


When a perf_event is attached to parent cgroup, it should count events
for all children cgroups:

parent_group <---- perf_event
\
- child_group <---- process(es)

However, in our tests, we found this perf_event cannot report reliable
results. Here is an example case:

# create cgroups
mkdir -p /sys/fs/cgroup/p/c
# start perf for parent group
perf stat -e instructions -G "p"

# on another console, run test process in child cgroup:
stressapptest -s 2 -M 1000 & echo $! > /sys/fs/cgroup/p/c/cgroup.procs

# after the test process is done, stop perf in the first console shows

<not counted> instructions p

The instruction should not be "not counted" as the process runs in the
child cgroup.

We found this is because perf_event->cgrp and cpuctx->cgrp are not
identical, thus perf_event->cgrp are not updated properly.

This patch fixes this by updating perf_cgroup properly for ancestor
cgroup(s).

Signed-off-by: Song Liu <songliubraving@xxxxxx>
Reported-by: Ephraim Park <ephiepark@xxxxxx>
---
kernel/events/core.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5789810..6f015ff 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -724,9 +724,14 @@ static inline void __update_cgrp_time(struct perf_cgroup *cgrp)

static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
{
- struct perf_cgroup *cgrp_out = cpuctx->cgrp;
- if (cgrp_out)
- __update_cgrp_time(cgrp_out);
+ struct perf_cgroup *cgrp = cpuctx->cgrp;
+ struct cgroup_subsys_state *css;
+
+ if (cgrp)
+ for (css = &cgrp->css; css; css = css->parent) {
+ cgrp = container_of(css, struct perf_cgroup, css);
+ __update_cgrp_time(cgrp);
+ }
}

static inline void update_cgrp_time_from_event(struct perf_event *event)
@@ -754,6 +759,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
{
struct perf_cgroup *cgrp;
struct perf_cgroup_info *info;
+ struct cgroup_subsys_state *css;

/*
* ctx->lock held by caller
@@ -764,8 +770,12 @@ perf_cgroup_set_timestamp(struct task_struct *task,
return;

cgrp = perf_cgroup_from_task(task, ctx);
- info = this_cpu_ptr(cgrp->info);
- info->timestamp = ctx->timestamp;
+
+ for (css = &cgrp->css; css; css = css->parent) {
+ cgrp = container_of(css, struct perf_cgroup, css);
+ info = this_cpu_ptr(cgrp->info);
+ info->timestamp = ctx->timestamp;
+ }
}

static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
--
2.9.5