srcu: BUG in __synchronize_srcu

From: Andrey Konovalov
Date: Fri Mar 10 2017 - 14:28:41 EST


Hi,

I've got the following error report while fuzzing the kernel with
syzkaller on an arm64 board.

On linux-next commit 56b8bad5e066c23e8fa273ef5fba50bd3da2ace8 (Mar 8).

A reproducer and .config are attached.

The bug happens while executing the following syzkaller program in a loop.
While it looks kvm-related, it might be that kvm just stresses the
srcu subsystem.

mmap(&(0x7f0000000000/0xfff000)=nil, (0xfff000), 0x3, 0x32,
0xffffffffffffffff, 0x0)
r0 = openat$kvm(0xffffffffffffff9c,
&(0x7f0000a05000)="2f6465762f6b766d00", 0x0, 0x0)
ioctl$KVM_CREATE_VM(r0, 0xae01, 0x0)

------------[ cut here ]------------
kernel BUG at kernel/rcu/srcu.c:436!
Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
Modules linked in: meson_drm drm_kms_helper drm dwmac_generic realtek
dwmac_meson8b stmmac_platform stmmac meson_rng rng_core meson_gxbb_wdt
ipv6
CPU: 3 PID: 4250 Comm: a.out Not tainted 4.11.0-rc1-next-20170308-xc2-dirty #3
Hardware name: Hardkernel ODROID-C2 (DT)
task: ffff800063699700 task.stack: ffff800063cfc000
PC is at[< none >] __synchronize_srcu+0x3d0/0x470
kernel/rcu/srcu.c:412
LR is at[< none >] __synchronize_srcu+0x130/0x470
kernel/rcu/srcu.c:434
pc : [<ffff20000821a3b8>] lr : [<ffff20000821a118>] pstate: 80000145
sp : ffff800063cffb00
x29: ffff800063cffb00 x28: ffff80005b1d6e00
x27: 1fffe4000156b242 x26: ffff800063cffb70
x25: 1fffe4000156b23b x24: ffff20000ab591d8
x23: ffff200009dbf000 x22: ffff20000ab591a0
x21: ffff20000ab59210 x20: ffff800063cffb70
x19: ffff20000ab59190 x18: 0000000000000a03
x17: 0000ffff944f3950 x16: ffff20000811f818
x15: 0000000000000000 x14: 0000000000000007
x13: 0000000000000002 x12: 0000000000000000
x11: 0000000000000040 x10: 1fffe400014b568c
x9 : ffff20000ab29000 x8 : 0000000000000007
x7 : 0000000000000001 x6 : 0000000000000000
x5 : 0000000000000040 x4 : 0000000000000003
x3 : ffff20000ab59208 x2 : 1fffe4000156b243
x1 : 0000000000000000 x0 : ffff80005e71fb70

Process a.out (pid: 4250, stack limit = 0xffff800063cfc000)
Stack: (0xffff800063cffb00 to 0xffff800063d00000)
fb00: ffff800063cffbd0 ffff20000821a480 ffff20000ab59190 1ffff0000b63adc0
fb20: dfff200000000000 ffff20000ab59190 ffff80004b9a8a00 1ffff000097351bc
fb40: ffff80004b9a8de0 0000000000000000 ffff800060cad328 ffff80005b1d6e00
fb60: ffff80004b9a8a00 1ffff000097351bc ffff80004f5e7b70 ffff200008217968
fb80: ffff800000000001 dead4ead00010001 dfff2000ffffffff ffffffffffffffff
fba0: ffff20000ab4c4b0 0000000000000000 0000000000000000 ffff200009b0b358
fbc0: ffff800063cffbc0 ffff800063cffbc0 ffff800063cffbf0 ffff2000083ffd20
fbe0: ffff80005b1d6e00 0000000000000140 ffff800063cffc50 ffff2000083aedfc
fc00: ffff80004b9a8a00 ffff80004b9a8a00 ffff80004b9a8d78 0000000000000001
fc20: 00000000000002a6 ffff80004f406780 ffff80004b9a8aa0 ffff800063699ac8
fc40: 1ffff0000c6d3359 ffff800063699700 ffff800063cffd20 ffff20000810caec
fc60: ffff80004b9a8a00 ffff80004b9a8b20 ffff80004b9a8d78 0000000000000001
fc80: ffff80005ebae2d8 ffff800063699ac8 ffff800063cffca0 ffff20000840fc08
fca0: ffff800063cffce0 ffff20000843327c ffff80005ebae2d8 ffff80004b9a8a00
fcc0: ffff80005ebae0f0 ffff200009de8000 ffff800063cffce0 ffff2000084332c4
fce0: ffff800063cffd20 ffff20000810cc64 ffff80004b9a8a00 ffff80004b9a8a48
fd00: ffff80004b9a8d78 0000000000000001 ffff800063cffd20 ffff20000810cae0
fd20: ffff800063cffd60 ffff20000811db88 ffff800063699700 ffff800063699700
fd40: ffff80004b9a8a00 0000000000000001 00000000000002a6 ffff80004f406780
fd60: ffff800063cffe40 ffff20000811f694 ffff80004f406780 0000000000000000
fd80: ffff80004f40681c 0000000000000004 1ffff00009e80d03 1ffff00009e80d02
fda0: ffff80004f406810 ffff800063699bd0 ffff800063699700 ffff800063699700
fdc0: ffff800063cffe80 ffff200008490868 0000000000000000 ffff80005fd45000
fde0: ffff80006369972c ffff800063699d48 1ffff0000c6d32e5 0000000000000004
fe00: 0000000000000123 000000000000001d 1ffff0000c6d33a9 ffff800063699700
fe20: ffff800063cffe30 ffff200008813c5c ffff800063cffe40 ffff20000811f688
fe40: ffff800063cffea0 ffff20000811f838 0000000000000000 000060006d24d000
fe60: ffffffffffffffff 0000ffff944f3974 0000000000000000 0000000000000015
fe80: 0000000000000123 000000000000005e ffff200009852000 ffff20000811f82c
fea0: 0000000000000000 ffff200008083f70 0000000000000000 0000000000000015
fec0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
fee0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
ff00: 000000000000005e ffffff80ffffffd0 0101010101010101 0000000000000020
ff20: 0000000000000018 0000000056bcb768 0000000000000000 0000ffff945be000
ff40: 0000000000413110 0000ffff944f3950 0000000000000a03 00000000004020f8
ff60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
ff80: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
ffa0: 0000000000000000 0000ffffc0cc99b0 0000000000401248 0000ffffc0cc99b0
ffc0: 0000ffff944f3974 0000000000000000 0000000000000000 000000000000005e
ffe0: 0000000000000000 0000000000000000 0403030303030100 0807060606060605
Call trace:
Exception stack(0xffff800063cff910 to 0xffff800063cffa40)
f900: ffff20000ab59190 0001000000000000
f920: ffff800063cffb00 ffff20000821a3b8 0000000080000145 000000000000003d
f940: 1fffe4000156b23b ffff800063cffb70 ffff800063cff980 0001000000000000
f960: ffff800063cff9d0 ffff2000081da8e8 ffff800063699ec0 ffff200009df9000
f980: ffff800063cff990 ffff20000891e1e0 ffff800063cff9d0 ffff20000891e23c
f9a0: ffff200009dbfe18 0000000000000040 0000000000000004 0000000000000001
f9c0: 00000000000008ac 00000000000008ac ffff80005e71fb70 0000000000000000
f9e0: 1fffe4000156b243 ffff20000ab59208 0000000000000003 0000000000000040
fa00: 0000000000000000 0000000000000001 0000000000000007 ffff20000ab29000
fa20: 1fffe400014b568c 0000000000000040 0000000000000000 0000000000000002
[<ffff20000821a3b8>] __synchronize_srcu+0x3d0/0x470 kernel/rcu/srcu.c:412
[<ffff20000821a480>] synchronize_srcu+0x28/0x60 kernel/rcu/srcu.c:516
[<ffff2000083ffd20>] __mmu_notifier_release+0x268/0x3e0 mm/mmu_notifier.c:102
[< inline >] mmu_notifier_release ./include/linux/mmu_notifier.h:235
[<ffff2000083aedfc>] exit_mmap+0x21c/0x288 mm/mmap.c:2941
[< inline >] __mmput kernel/fork.c:881
[<ffff20000810caec>] mmput+0xdc/0x2e0 kernel/fork.c:903
[< inline >] exit_mm kernel/exit.c:557
[<ffff20000811db88>] do_exit+0x648/0x2020 kernel/exit.c:865
[<ffff20000811f694>] do_group_exit+0xdc/0x260 kernel/exit.c:982
[< inline >] SYSC_exit_group kernel/exit.c:993
[<ffff20000811f838>] __wake_up_parent+0x0/0x60 kernel/exit.c:991
[<ffff200008083f70>] el0_svc_naked+0x24/0x28 arch/arm64/kernel/entry.S:813
Code: 97feee10 35fff680 17ffff1c d503201f (d4210000)
---[ end trace b727e9858bfac1ff ]---
Kernel panic - not syncing: Fatal exception

Attachment: .config
Description: Binary data

// autogenerated by syzkaller (http://github.com/google/syzkaller)

#ifndef __NR_mmap
#define __NR_mmap 9
#endif
#ifndef __NR_openat
#define __NR_openat 257
#endif
#ifndef __NR_ioctl
#define __NR_ioctl 16
#endif

#define _GNU_SOURCE

#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>

#include <linux/capability.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <linux/kvm.h>
#include <linux/sched.h>
#include <net/if_arp.h>

#include <assert.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
#include <pthread.h>
#include <setjmp.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

const int kFailStatus = 67;
const int kErrorStatus = 68;
const int kRetryStatus = 69;

__attribute__((noreturn)) void doexit(int status)
{
volatile unsigned i;
syscall(__NR_exit_group, status);
for (i = 0;; i++) {
}
}

__attribute__((noreturn)) void fail(const char* msg, ...)
{
int e = errno;
fflush(stdout);
va_list args;
va_start(args, msg);
vfprintf(stderr, msg, args);
va_end(args);
fprintf(stderr, " (errno %d)\n", e);
doexit((e == ENOMEM || e == EAGAIN) ? kRetryStatus : kFailStatus);
}

__attribute__((noreturn)) void exitf(const char* msg, ...)
{
int e = errno;
fflush(stdout);
va_list args;
va_start(args, msg);
vfprintf(stderr, msg, args);
va_end(args);
fprintf(stderr, " (errno %d)\n", e);
doexit(kRetryStatus);
}

static int flag_debug;

void debug(const char* msg, ...)
{
if (!flag_debug)
return;
va_list args;
va_start(args, msg);
vfprintf(stdout, msg, args);
va_end(args);
fflush(stdout);
}

__thread int skip_segv;
__thread jmp_buf segv_env;

static void segv_handler(int sig, siginfo_t* info, void* uctx)
{
uintptr_t addr = (uintptr_t)info->si_addr;
const uintptr_t prog_start = 1 << 20;
const uintptr_t prog_end = 100 << 20;
if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED) &&
(addr < prog_start || addr > prog_end)) {
debug("SIGSEGV on %p, skipping\n", addr);
_longjmp(segv_env, 1);
}
debug("SIGSEGV on %p, exiting\n", addr);
doexit(sig);
for (;;) {
}
}

static void install_segv_handler()
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = segv_handler;
sa.sa_flags = SA_NODEFER | SA_SIGINFO;
sigaction(SIGSEGV, &sa, NULL);
sigaction(SIGBUS, &sa, NULL);
}

#define NONFAILING(...) \
{ \
__atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \
if (_setjmp(segv_env) == 0) { \
__VA_ARGS__; \
} \
__atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \
}

#define BITMASK_LEN(type, bf_len) (type)((1ull << (bf_len)) - 1)

#define BITMASK_LEN_OFF(type, bf_off, bf_len) \
(type)(BITMASK_LEN(type, (bf_len)) << (bf_off))

#define STORE_BY_BITMASK(type, addr, val, bf_off, bf_len) \
if ((bf_off) == 0 && (bf_len) == 0) { \
*(type*)(addr) = (type)(val); \
} else { \
type new_val = *(type*)(addr); \
new_val &= ~BITMASK_LEN_OFF(type, (bf_off), (bf_len)); \
new_val |= ((type)(val)&BITMASK_LEN(type, (bf_len))) << (bf_off); \
*(type*)(addr) = new_val; \
}

static uintptr_t execute_syscall(int nr, uintptr_t a0, uintptr_t a1,
uintptr_t a2, uintptr_t a3,
uintptr_t a4, uintptr_t a5,
uintptr_t a6, uintptr_t a7,
uintptr_t a8)
{
switch (nr) {
default:
return syscall(nr, a0, a1, a2, a3, a4, a5);
}
}

static void setup_main_process()
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = SIG_IGN;
syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8);
syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8);
install_segv_handler();

char tmpdir_template[] = "./syzkaller.XXXXXX";
char* tmpdir = mkdtemp(tmpdir_template);
if (!tmpdir)
fail("failed to mkdtemp");
if (chmod(tmpdir, 0777))
fail("failed to chmod");
if (chdir(tmpdir))
fail("failed to chdir");
}

static void loop();

static void sandbox_common()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
setsid();

struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = 128 << 20;
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 0;
setrlimit(RLIMIT_CORE, &rlim);

unshare(CLONE_NEWNS);
unshare(CLONE_NEWIPC);
unshare(CLONE_IO);
}

static int do_sandbox_none(int executor_pid, bool enable_tun)
{
int pid = fork();
if (pid)
return pid;

sandbox_common();

loop();
doexit(1);
}

static void remove_dir(const char* dir)
{
DIR* dp;
struct dirent* ep;
int iter = 0;
retry:
dp = opendir(dir);
if (dp == NULL) {
if (errno == EMFILE) {
exitf("opendir(%s) failed due to NOFILE, exiting");
}
exitf("opendir(%s) failed", dir);
}
while ((ep = readdir(dp))) {
if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
continue;
char filename[FILENAME_MAX];
snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
struct stat st;
if (lstat(filename, &st))
exitf("lstat(%s) failed", filename);
if (S_ISDIR(st.st_mode)) {
remove_dir(filename);
continue;
}
int i;
for (i = 0;; i++) {
debug("unlink(%s)\n", filename);
if (unlink(filename) == 0)
break;
if (errno == EROFS) {
debug("ignoring EROFS\n");
break;
}
if (errno != EBUSY || i > 100)
exitf("unlink(%s) failed", filename);
debug("umount(%s)\n", filename);
if (umount2(filename, MNT_DETACH))
exitf("umount(%s) failed", filename);
}
}
closedir(dp);
int i;
for (i = 0;; i++) {
debug("rmdir(%s)\n", dir);
if (rmdir(dir) == 0)
break;
if (i < 100) {
if (errno == EROFS) {
debug("ignoring EROFS\n");
break;
}
if (errno == EBUSY) {
debug("umount(%s)\n", dir);
if (umount2(dir, MNT_DETACH))
exitf("umount(%s) failed", dir);
continue;
}
if (errno == ENOTEMPTY) {
if (iter < 100) {
iter++;
goto retry;
}
}
}
exitf("rmdir(%s) failed", dir);
}
}

static uint64_t current_time_ms()
{
struct timespec ts;

if (clock_gettime(CLOCK_MONOTONIC, &ts))
fail("clock_gettime failed");
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static void test();

void loop()
{
int iter;
for (iter = 0;; iter++) {
char cwdbuf[256];
sprintf(cwdbuf, "./%d", iter);
if (mkdir(cwdbuf, 0777))
fail("failed to mkdir");
int pid = fork();
if (pid < 0)
fail("clone failed");
if (pid == 0) {
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
if (chdir(cwdbuf))
fail("failed to chdir");
test();
doexit(0);
}
int status = 0;
uint64_t start = current_time_ms();
for (;;) {
int res = waitpid(-1, &status, __WALL | WNOHANG);
if (res == pid)
break;
usleep(1000);
if (current_time_ms() - start > 5 * 1000) {
kill(-pid, SIGKILL);
kill(pid, SIGKILL);
while (waitpid(-1, &status, __WALL) != pid) {
}
break;
}
}
remove_dir(cwdbuf);
}
}

long r[4];
void test()
{
memset(r, -1, sizeof(r));
r[0] = execute_syscall(__NR_mmap, 0x20000000ul, 0xfff000ul, 0x3ul,
0x32ul, 0xfffffffffffffffful, 0x0ul, 0, 0, 0);
NONFAILING(memcpy((void*)0x20a05000,
"\x2f\x64\x65\x76\x2f\x6b\x76\x6d\x00", 9));
r[2] = execute_syscall(__NR_openat, 0xffffffffffffff9cul,
0x20a05000ul, 0x0ul, 0x0ul, 0, 0, 0, 0, 0);
r[3] = execute_syscall(__NR_ioctl, r[2], 0xae01ul, 0x0ul, 0, 0, 0, 0,
0, 0);
}
int main()
{
int i;
for (i = 0; i < 8; i++) {
if (fork() == 0) {
setup_main_process();
int pid = do_sandbox_none(i, false);
int status = 0;
while (waitpid(pid, &status, __WALL) != pid) {
}
return 0;
}
}
sleep(1000000);
return 0;
}