Re: [perf] unchecked MSR access error: WRMSR to 0x3f1

From: Vince Weaver
Date: Thu Jun 19 2025 - 16:11:03 EST


On Thu, 19 Jun 2025, Liang, Kan wrote:

>
>
> On 2025-06-19 11:17 a.m., Vince Weaver wrote:
> > On Wed, 18 Jun 2025, Vince Weaver wrote:
> >
> >> On Wed, 18 Jun 2025, Liang, Kan wrote:
> >>
> >>> No, the error message doesn't say it. Just want to check if you have
> >>> extra information. Because the Topdown perf metrics is only supported on
> >>> p-core. I want to understand whether the code messes up with e-core.
> >>
> >> I can't easily tell from the fuzzer as it intentionally switches cores
> >> often. I guess I could patch the kernel to report CPU when the WRMSR
> >> error triggers.
> >
> > I've patched the kernel to get rid of the warn_once() and added a printk
> > for smp_processor_id() (is that what I want to print?) In any case that
> > reports the warning is happening on CPU1 which is actually a P core, not
> > an atom core.
>
> Thanks for the confirmation.
> I've tried fuzzer in some newer machines (later than raptor-lake), but I
> haven't reproduce it yet. I will try to find a raptor-lake for more tests.

I've managed to use the perf_fuzzer tools to create a small reproducible
test case that can trigger the bug. It's included below.

Vince

---


/* WRMSR top-down reproducer */
/* by Vince Weaver <vincent.weaver _at_ maine.edu> */

#define _GNU_SOURCE 1
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <poll.h>
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
#include <sched.h>

static int fd[1024];
static struct perf_event_attr pe[1024];

FILE *fff;
static int result;

int perf_event_open(struct perf_event_attr *hw_event_uptr,
pid_t pid, int cpu, int group_fd, unsigned long flags) {

return syscall(__NR_perf_event_open,hw_event_uptr, pid, cpu,
group_fd, flags);
}

int main(int argc, char **argv) {

int i;
for(i=0;i<1024;i++) fd[i]=-1;

/* 1 */
/* fd = 72 */

memset(&pe[72],0,sizeof(struct perf_event_attr));
pe[72].type=PERF_TYPE_RAW;
pe[72].config=0xffff880000008000ULL;
pe[72].sample_freq=0x49ULL;
pe[72].sample_type=PERF_SAMPLE_TID|PERF_SAMPLE_ADDR|PERF_SAMPLE_READ|PERF_SAMPLE_CPU; /* 9a */
pe[72].read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP|0x10ULL; /* 1c */
pe[72].exclude_user=1;
pe[72].exclude_kernel=1;
pe[72].mmap=1;
pe[72].comm=1;
pe[72].freq=1;
pe[72].enable_on_exec=1;
pe[72].watermark=1;
pe[72].precise_ip=1; /* constant skid */
pe[72].sample_id_all=1;
pe[72].exclude_callchain_user=1;
pe[72].comm_exec=1;
pe[72].wakeup_watermark=-1970634752;
pe[72].bp_type=HW_BREAKPOINT_R|HW_BREAKPOINT_W; /*3*/
pe[72].bp_addr=0x0ULL;
pe[72].bp_len=0x2ULL;
pe[72].branch_sample_type=PERF_SAMPLE_BRANCH_HV|PERF_SAMPLE_BRANCH_ANY|PERF_SAMPLE_BRANCH_ANY_CALL|PERF_SAMPLE_BRANCH_ANY_RETURN|PERF_SAMPLE_BRANCH_IND_JUMP|PERF_SAMPLE_BRANCH_ABORT_TX|PERF_SAMPLE_BRANCH_COND|0xbcbcbca800ULL;
pe[72].sample_regs_user=4294967253ULL;
pe[72].sample_stack_user=0x23008000;

fd[72]=perf_event_open(&pe[72],
0, /* current thread */
1, /* Only cpu 1 */
fd[114], /* 114 is group leader */
PERF_FLAG_FD_NO_GROUP /*1*/ );


/* 2 */
prctl(PR_TASK_PERF_EVENTS_DISABLE);
/* 3 */
// a 0 1 1
// which=0,num=1,cpi=1

#define MAX_CPUS 1024

pid_t pid=0; /* current thread */
static cpu_set_t *cpu_mask;
int max_cpus=MAX_CPUS;
size_t set_size;

cpu_mask=CPU_ALLOC(max_cpus);
set_size=CPU_ALLOC_SIZE(max_cpus);


CPU_ZERO_S(set_size,cpu_mask);
CPU_SET_S(1,set_size,cpu_mask);

result=sched_setaffinity(pid,max_cpus,cpu_mask);

/* 4 */
prctl(PR_TASK_PERF_EVENTS_ENABLE);
/* 5 */
/* fd = 38 */

memset(&pe[38],0,sizeof(struct perf_event_attr));
pe[38].type=PERF_TYPE_HARDWARE;
pe[38].size=112;
pe[38].config=PERF_COUNT_HW_BRANCH_MISSES;
pe[38].sample_type=0; /* 0 */
pe[38].read_format=PERF_FORMAT_ID|PERF_FORMAT_GROUP|0x10ULL; /* 1c */
pe[38].disabled=1;
pe[38].precise_ip=0; /* arbitrary skid */
pe[38].wakeup_events=0;
pe[38].bp_type=HW_BREAKPOINT_EMPTY;

fd[38]=perf_event_open(&pe[38],
getpid(), /* current thread */
22, /* Only cpu 22 */
-1, /* New Group Leader */
PERF_FLAG_FD_NO_GROUP /*1*/ );




/* Replayed 4 syscalls */
return 0;
}