Re: [tip:perf/core] perf/core: Check return value of the perf_event_read() IPI

From: Jiri Olsa
Date: Mon Aug 22 2016 - 06:38:32 EST


On Mon, Aug 22, 2016 at 10:29:32AM +0200, Jiri Olsa wrote:
> On Mon, Aug 22, 2016 at 09:17:37AM +0200, Jiri Olsa wrote:
> > On Sun, Aug 21, 2016 at 02:10:07PM +0200, Vegard Nossum wrote:
> >
> > SNIP
> >
> > > [<ffffffff816d1577>] ? __fget+0x47/0x270
> > > [<ffffffff81676d5b>] vfs_readv+0x8b/0xc0
> > > [<ffffffff81676e6e>] do_readv+0xde/0x230
> > > [<ffffffff81676d90>] ? vfs_readv+0xc0/0xc0
> > > [<ffffffff81002b60>] ? exit_to_usermode_loop+0x190/0x190
> > > [<ffffffff82001b07>] ? check_preemption_disabled+0x37/0x1e0
> > > [<ffffffff81677617>] SyS_readv+0x27/0x30
> > > [<ffffffff816775f0>] ? do_pwritev+0x1a0/0x1a0
> > > [<ffffffff81005524>] do_syscall_64+0x1c4/0x4e0
> > > [<ffffffff83c3286a>] entry_SYSCALL64_slow_path+0x25/0x25
> > >
> > > I don't think WARN() is the right interface for signalling errors to
> > > userspace programs?
> >
> > any special way to trigger that?
>
> nope ;-)
>
> perf stat -a -I 10

reading the event could race with event schedule out
leaving us with active state and oncpu == -1

attached patch fixes the warn for me, but I might
be missing some other cases

jirka


---
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3f07e6cfc1b6..375274b6f3b4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1802,8 +1802,9 @@ event_sched_out(struct perf_event *event,

event->tstamp_stopped = tstamp;
event->pmu->del(event, 0);
- event->oncpu = -1;
- event->state = PERF_EVENT_STATE_INACTIVE;
+ WRITE_ONCE(event->state, PERF_EVENT_STATE_INACTIVE);
+ smp_wmb();
+ WRITE_ONCE(event->oncpu, -1);
if (event->pending_disable) {
event->pending_disable = 0;
event->state = PERF_EVENT_STATE_OFF;
@@ -3424,9 +3425,8 @@ struct perf_read_data {
int ret;
};

-static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+static int find_cpu_to_read(struct perf_event *event, int event_cpu, int local_cpu)
{
- int event_cpu = event->oncpu;
u16 local_pkg, event_pkg;

if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
@@ -3561,13 +3561,17 @@ u64 perf_event_read_local(struct perf_event *event)

static int perf_event_read(struct perf_event *event, bool group)
{
- int ret = 0, cpu_to_read, local_cpu;
+ int ret = 0, cpu_to_read, local_cpu, state;
+
+ state = READ_ONCE(event->state);
+ smp_rmb();
+ cpu_to_read = event->oncpu;

/*
* If event is enabled and currently active on a CPU, update the
* value in the event structure:
*/
- if (event->state == PERF_EVENT_STATE_ACTIVE) {
+ if (state == PERF_EVENT_STATE_ACTIVE && cpu_to_read != -1) {
struct perf_read_data data = {
.event = event,
.group = group,
@@ -3575,7 +3579,7 @@ static int perf_event_read(struct perf_event *event, bool group)
};

local_cpu = get_cpu();
- cpu_to_read = find_cpu_to_read(event, local_cpu);
+ cpu_to_read = find_cpu_to_read(event, cpu_to_read, local_cpu);
put_cpu();

ret = smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);