Re: deadlock during fuseblk shutdown

From: Dmitry Vyukov
Date: Mon Mar 07 2016 - 09:34:56 EST


On Mon, Mar 7, 2016 at 3:32 PM, Miklos Szeredi <miklos@xxxxxxxxxx> wrote:
> On Sun, Dec 6, 2015 at 10:04 AM, Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote:
>> On Fri, Nov 20, 2015 at 4:01 PM, Dmitry Vyukov <dvyukov@xxxxxxxxxx> wrote:
>>> Hello,
>>>
>>> I've hit the following deadlock on
>>> 8005c49d9aea74d382f474ce11afbbc7d7130bec (Nov 15).
>>> I know that fuse docs warn about deadlocks and this can happen only
>>> under root because of the mount call, but maybe there is still
>>> something to fix. The first suspicious thing is that do_exit in daemon
>>> sends a fuse request to daemon, which it cannot answer obviously. The
>>> second thing is that the hanged processes are unkillable and
>>> /sys/fs/fuse/connections/ is empty, so I don't see any way to repair
>>> it.
>>>
>>> The program is:
>>>
>>> // autogenerated by syzkaller (http://github.com/google/syzkaller)
>>> #include <syscall.h>
>>> #include <string.h>
>>> #include <stdint.h>
>>> #include <stdlib.h>
>>> #include <stdio.h>
>>> #include <errno.h>
>>> #include <sys/types.h>
>>> #include <sys/stat.h>
>>> #include <signal.h>
>>> #include <fcntl.h>
>>> #include <unistd.h>
>>> #include <sys/mount.h>
>>> #include <linux/fuse.h>
>>> #include <sched.h>
>>>
>>> #define CLONE_NEWNS 0x00020000
>>>
>>> int unshare(int flags);
>>>
>>> struct msg {
>>> struct fuse_out_header hdr;
>>> struct fuse_poll_out data;
>>> };
>>>
>>> void work(const char *bklname)
>>> {
>>> unshare(CLONE_NEWNS);
>>> int fd = open("/dev/fuse", O_RDWR);
>>> if (fd == -1)
>>> exit(printf("open /dev/fuse failed: %d\n", errno));
>>> if (mknod(bklname, S_IFBLK, makedev(7, 199)))
>>> exit(printf("mknod failed: %d\n", errno));
>>> char buf[4<<10];
>>> sprintf(buf, "fd=%d,user_id=%d,group_id=%d,rootmode=0%o", fd,
>>> getuid(), getgid(), 0xc000);
>>> if (mount(bklname, bklname, "fuseblk", 0x1000080, buf))
>>> exit(printf("mount failed: %d\n", errno));
>>> read(fd, buf, sizeof(buf));
>>> struct msg m;
>>> memset(&m, 0, sizeof(m));
>>> m.hdr.len = sizeof(m);
>>> m.hdr.error = 0;
>>> m.hdr.unique = 1;
>>> m.data.revents = 7;
>>> write(fd, &m, sizeof(m));
>>> exit(1);
>>> }
>>>
>>> int main()
>>> {
>>> int pid1 = fork();
>>> if (pid1 == 0)
>>> work("./fuseblk1");
>>> sleep(1);
>>> kill(pid1, SIGKILL);
>>> int pid2 = fork();
>>> if (pid2 == 0)
>>> work("./fuseblk2");
>>> sleep(1);
>>> kill(pid2, SIGKILL);
>>> return 0;
>>> }
>>>
>>> It results in two hanged processes:
>>>
>>> root# cat /proc/2769/stack
>>> [<ffffffff815399a8>] request_wait_answer+0x308/0x4c0 fs/fuse/dev.c:436
>>> [<ffffffff8153a36a>] __fuse_request_send+0xaa/0x100 fs/fuse/dev.c:496
>>> [<ffffffff8153a40b>] fuse_request_send+0x4b/0x50 fs/fuse/dev.c:509
>>> [< inline >] fuse_send_destroy fs/fuse/inode.c:367
>>> [<ffffffff815525b9>] fuse_put_super+0xa9/0x180 fs/fuse/inode.c:382
>>> [<ffffffff812daf8b>] generic_shutdown_super+0xcb/0x1d0 fs/super.c:427
>>> [<ffffffff812db532>] kill_block_super+0x52/0xb0 fs/super.c:1047
>>> [<ffffffff8155229b>] fuse_kill_sb_blk+0x6b/0x80 fs/fuse/inode.c:1214
>>> [<ffffffff812db7e0>] deactivate_locked_super+0x60/0xa0 fs/super.c:301
>>> [<ffffffff812dbe64>] deactivate_super+0x94/0xb0 fs/super.c:332
>>> [<ffffffff8131490b>] cleanup_mnt+0x6b/0xd0 fs/namespace.c:1067
>>> [<ffffffff813149d6>] __cleanup_mnt+0x16/0x20 fs/namespace.c:1074
>>> [<ffffffff810d19b1>] task_work_run+0xe1/0x110 kernel/task_work.c:115
>>> [< inline >] exit_task_work include/linux/task_work.h:21
>>> [<ffffffff8109c9ef>] do_exit+0x55f/0x1690 kernel/exit.c:748
>>> [<ffffffff810a0057>] do_group_exit+0xa7/0x190 kernel/exit.c:878
>>> [< inline >] SYSC_exit_group kernel/exit.c:889
>>> [<ffffffff810a015d>] SyS_exit_group+0x1d/0x20 kernel/exit.c:887
>>> [<ffffffff821d6311>] entry_SYSCALL_64_fastpath+0x31/0x9a
>>> arch/x86/entry/entry_64.S:187
>>>
>>> root# cat /proc/2772/stack
>>> [<ffffffff81676783>] call_rwsem_down_write_failed+0x13/0x20
>>> arch/x86/lib/rwsem.S:99
>>> [<ffffffff812dbb60>] grab_super+0x40/0xf0 fs/super.c:355
>>> [<ffffffff812dc782>] sget+0x492/0x630 fs/super.c:468
>>> [<ffffffff812dcc3a>] mount_bdev+0x15a/0x340 fs/super.c:991
>>> [<ffffffff815522e4>] fuse_mount_blk+0x34/0x40 fs/fuse/inode.c:1201
>>> [<ffffffff812ddc39>] mount_fs+0x69/0x200 fs/super.c:1123
>>> [<ffffffff8131517a>] vfs_kern_mount+0x7a/0x200 fs/namespace.c:948
>>> [< inline >] do_new_mount fs/namespace.c:2409
>>> [<ffffffff81319d5b>] do_mount+0x40b/0x1a80 fs/namespace.c:2725
>>> [< inline >] SYSC_mount fs/namespace.c:2915
>>> [<ffffffff8131ba4a>] SyS_mount+0x10a/0x1a0 fs/namespace.c:2893
>>> [<ffffffff821d6311>] entry_SYSCALL_64_fastpath+0x31/0x9a
>>> arch/x86/entry/entry_64.S:187
>>>
>>>
>>> The first process holds a superblock mutex, so the whole system
>>> becomes unstable. For example, sync invocations also hang in D state.
>>>
>>> Is this intentional? Or there is something to fix?
>
> It isn't intentional and depends on the order in which cleanups are
> done at exit time. If files are closed first and then the namespace
> is cleaned up, then the deadlock shouldn't happen. Don't see why this
> isn't the case.
>
> Do you know in which kernel did this start to happen?


No, I don't know. I only know that it happens on
8005c49d9aea74d382f474ce11afbbc7d7130bec.