fs: uninterruptible hang in handle_userfault

From: Dmitry Vyukov
Date: Tue Mar 01 2016 - 06:30:22 EST


Hello,

The following program creates an unkillable process in D state:

// autogenerated by syzkaller (http://github.com/google/syzkaller)
#include <pthread.h>
#include <stdint.h>
#include <string.h>
#include <sys/syscall.h>
#include <unistd.h>

#ifndef SYS_userfaultfd
#define SYS_userfaultfd 323
#endif

long r[38];

int main()
{
memset(r, -1, sizeof(r));
r[0] = syscall(SYS_mmap, 0x20000000ul, 0xc40000ul, 0x3ul, 0x32ul,
0xfffffffffffffffful, 0x0ul);
r[1] = syscall(SYS_userfaultfd, 0x800ul, 0, 0, 0, 0, 0);
*(uint64_t*)0x20c32000 = (uint64_t)0xaa;
*(uint64_t*)0x20c32008 = (uint64_t)0x0;
*(uint64_t*)0x20c32010 = (uint64_t)0x0;
r[5] = syscall(SYS_ioctl, r[1], 0xc018aa3ful, 0x20c32000ul, 0, 0, 0);
r[6] = syscall(SYS_munmap, 0x20879000ul, 0x3000ul, 0, 0, 0, 0);
r[7] =
syscall(SYS_sigaltstack, 0x208e1000ul, 0x2087affeul, 0, 0, 0, 0);
r[8] = syscall(SYS_socketpair, 0x1ul, 0xaul, 0x7ffffffful,
0x201d7ffcul, 0, 0);
if (r[8] != -1)
r[9] = *(uint32_t*)0x201d7ffc;
if (r[8] != -1)
r[10] = *(uint32_t*)0x201d8000;
r[11] = syscall(SYS_getsockopt, r[10], 0x1ul, 0xdul, 0x2087a000ul,
0x2087a000ul, 0);
*(uint64_t*)0x20c2c9d2 = (uint64_t)0x200cb000;
*(uint64_t*)0x20c2c9da = (uint64_t)0x800000;
*(uint64_t*)0x20c2c9e2 = (uint64_t)0x1;
*(uint64_t*)0x20c2c9ea = (uint64_t)0x0;
r[16] = syscall(SYS_ioctl, r[1], 0xc020aa00ul, 0x20c2c9d2ul, 0, 0, 0);
*(uint64_t*)0x20c35000 = (uint64_t)0x2028b000;
*(uint64_t*)0x20c35008 = (uint64_t)0x81;
*(uint64_t*)0x20c35010 = (uint64_t)0x201de000;
r[20] =
syscall(SYS_set_robust_list, 0x20c35000ul, 0x18ul, 0, 0, 0, 0);
memcpy((void*)0x2087bffa, "\x2e\x2f\x62\x75\x73\x00", 6);
memcpy((void*)0x2087b000, "\x2e\x2f\x62\x75\x73\x00", 6);
r[23] = syscall(SYS_linkat, r[10], 0x2087bffaul, r[9], 0x2087b000ul,
0x1400ul, 0);
r[24] = syscall(SYS_pipe2, 0x201d77ceul, 0x800ul, 0, 0, 0, 0);
if (r[24] != -1)
r[25] = *(uint32_t*)0x201d77ce;
if (r[24] != -1)
r[26] = *(uint32_t*)0x201d77d2;
*(uint64_t*)0x2087bff9 = (uint64_t)0x20bff000;
*(uint64_t*)0x2087c001 = (uint64_t)0x1000;
r[29] = syscall(SYS_ioctl, r[9], 0x8010aa02ul, 0x2087bff9ul, 0, 0, 0);
r[30] = syscall(SYS_fstat, r[26], 0x2037463bul, 0, 0, 0, 0);
memcpy((void*)0x20693fed, "\x73\x65\x6c\x69\x6e\x75\x78\x2c\x65\x6d"
"\x30\x09\x70\x70\x70\x30\x2f\x29\x00",
19);
r[32] = syscall(SYS_fgetxattr, r[25], 0x20693fedul, 0x20c366c0ul,
0x7dul, 0, 0);
r[33] = syscall(SYS_dup3, r[26], r[1], 0x80000ul, 0, 0, 0);
*(uint64_t*)0x2087bff5 = (uint64_t)0xaa;
*(uint64_t*)0x2087bffd = (uint64_t)0x0;
*(uint64_t*)0x2087c005 = (uint64_t)0x0;
r[37] =
syscall(SYS_ioctl, r[26], 0xc018aa3ful, 0x2087bff5ul, 0, 0, 0);
return 0;
}


The hang stack is:

[<ffffffff818bc380>] handle_userfault+0x680/0xe50 fs/userfaultfd.c:357
[<ffffffff8179249d>] do_huge_pmd_anonymous_page+0x7bd/0xd90 mm/huge_memory.c:904
[< inline >] create_huge_pmd mm/memory.c:3252
[< inline >] __handle_mm_fault mm/memory.c:3371
[<ffffffff816fb47d>] handle_mm_fault+0x30fd/0x4a10 mm/memory.c:3457
[<ffffffff812834c6>] __do_page_fault+0x376/0x960 arch/x86/mm/fault.c:1245
[<ffffffff81283c14>] trace_do_page_fault+0xf4/0x4f0 arch/x86/mm/fault.c:1338
[<ffffffff81274da4>] do_async_page_fault+0x14/0xd0 arch/x86/kernel/kvm.c:264
[<ffffffff866a5578>] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:986
[<ffffffff81352874>] mm_release+0x2e4/0x410 kernel/fork.c:864
[< inline >] exit_mm kernel/exit.c:391
[<ffffffff813635df>] do_exit+0x42f/0x2d20 kernel/exit.c:735
[<ffffffff81366048>] do_group_exit+0x108/0x330 kernel/exit.c:878
[<ffffffff81389598>] get_signal+0x628/0x1560 kernel/signal.c:2307
[<ffffffff811a4db3>] do_signal+0x83/0x1c90 arch/x86/kernel/signal.c:712
[<ffffffff81006685>] exit_to_usermode_loop+0x1a5/0x210
arch/x86/entry/common.c:247
[<ffffffff810082cc>] prepare_exit_to_usermode+0x10c/0x130
arch/x86/entry/common.c:282
[<ffffffff866a3d74>] retint_user+0x8/0x23 arch/x86/entry/entry_64.S:559
[<ffffffffffffffff>] 0xffffffffffffffff

strace output:

mmap(0x20000000, 12845056, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x20000000
syscall_323(0x800, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) = 0x3
ioctl(3, 0xc018aa3f, 0x20c32000) = 0
munmap(0x20879000, 12288) = 0
sigaltstack({ss_sp=0, ss_flags=0, ss_size=0}, ) = ?
socketpair(PF_FILE, SOCK_PACKET, 2147483647, 0x201d7ffc) = -1
EPROTONOSUPPORT (Protocol not supported)
getsockopt(-1, SOL_SOCKET, SO_LINGER, 0x2087a000, 0x2087a000) = -1
EBADF (Bad file descriptor)
ioctl(3, 0xc020aa00, 0x20c2c9d2) = 0
set_robust_list(0x20c35000, 0x18) = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---


On commit fc77dbd34c5c99bce46d40a2491937c3bcbd10af (4.5-rc6).