Re: [PATCH] perf: Fix sys_perf_event_open() race against self

From: Ravi Bangoria
Date: Mon May 23 2022 - 06:33:48 EST


On 21-May-22 12:08 AM, Peter Zijlstra wrote:
>
> Norbert reported that it's possible to race sys_perf_event_open() such
> that the looser ends up in another context from the group leader,
> triggering many WARNs.
>
> The move_group case checks for races against itself, but the
> !move_group case doesn't, seemingly relying on the previous
> group_leader->ctx == ctx check. However, that check is racy due to not
> holding any locks at that time.
>
> Therefore, re-check the result after acquiring locks and bailing
> if they no longer match.
>
> Additionally, clarify the not_move_group case from the
> move_group-vs-move_group race.

Tested-by: Ravi Bangoria <ravi.bangoria@xxxxxxx>

Below is a quick test to reproduce the issue. It triggers WARN_ON()
as normal user. No warnings with the patch.

$ cat perf-event-open-race.c
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <linux/perf_event.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <string.h>
#include <errno.h>

struct thread_args {
int group_leader;
int type;
unsigned long config;
pid_t target;
};

static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}

static void
perf_attr_prepare(struct perf_event_attr *pe, int type, unsigned long config)
{
memset(pe, 0, sizeof(struct perf_event_attr));
pe->type = type;
pe->size = sizeof(struct perf_event_attr);
pe->config = config;
pe->exclude_kernel = 1;
pe->exclude_hv = 1;
}

void *thread(void *arg)
{
struct thread_args *args = arg;
int group_leader = args->group_leader;
unsigned long config = args->config;
struct perf_event_attr pe;
int type = args->type;
pid_t target = args->target;
int fd;

perf_attr_prepare(&pe, type, config);
fd = perf_event_open(&pe, target, -1, group_leader, 0);
if (fd <= 0)
printf("Failed to open %d type event(err: %d)\n", type, -errno);
else
close(fd);

pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
struct thread_args thread_1_args;
struct thread_args thread_2_args;
pthread_t t[2];
struct perf_event_attr pe;
int group_leader;

perf_attr_prepare(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES);
group_leader = perf_event_open(&pe, 0, -1, -1, 0);
if (group_leader <= 0) {
printf("Failed to open group leader (err: %d)\n", -errno);
exit(EXIT_FAILURE);
}

thread_1_args.group_leader = group_leader;
thread_1_args.type = PERF_TYPE_SOFTWARE;
thread_1_args.config = PERF_COUNT_SW_CONTEXT_SWITCHES;
thread_1_args.target = getpid();
thread_2_args.group_leader = group_leader;
thread_2_args.type = PERF_TYPE_HARDWARE;
thread_2_args.config = PERF_COUNT_HW_CPU_CYCLES;
thread_2_args.target = getpid();

if (pthread_create(&t[0], NULL, thread, (void *)&thread_1_args) != 0) {
perror("pthread_create(t1) error");
exit(1);
}

if (pthread_create(&t[1], NULL, thread, (void *)&thread_2_args) != 0) {
perror("pthread_create(t2) error");
exit(1);
}

if (pthread_join(t[0], NULL) != 0) {
perror("pthread_join(t1) error");
exit(1);
}

if (pthread_join(t[1], NULL) != 0) {
perror("pthread_join(t2) error");
exit(1);
}

close(group_leader);
}

$ while true; do ./perf-event-open-race; done

$ dmesg
WARNING: CPU: 121 PID: 7304 at kernel/events/core.c:1948 perf_group_attach+0xfb/0x110
[...]
RIP: 0010:perf_group_attach+0xfb/0x110
[...]
RSP: 0018:ffff9fec0fa87dc8 EFLAGS: 00010006
RAX: ffff8f988b3d0000 RBX: ffff8f98872bc0f8 RCX: 0000000000000002
RDX: 0000000000000001 RSI: ffffffffb571c83b RDI: ffffffffb5793126
RBP: ffff8f9887514830 R08: ffff8f988b3d0120 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000001 R12: ffff8f988b3d0000
R13: ffff8f988b3d0008 R14: ffff8f988b3d0000 R15: ffff8f988b3d0000
FS: 00007f30c5bff700(0000) GS:ffff8fb7f6800000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f719bf4f130 CR3: 000000208c010003 CR4: 0000000000770ee0
PKRU: 55555554
Call Trace:
<TASK>
perf_install_in_context+0x1af/0x210
__do_sys_perf_event_open+0xcb6/0x12b0
do_syscall_64+0x3a/0x80
entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f30c5c398cd
[...]

Thanks,
Ravi