Re: [PATCH v2] Bluetooth: vhci, fix open_timeout vs. hdev race

From: Takashi Iwai
Date: Tue Mar 22 2016 - 10:00:51 EST


On Sat, 19 Mar 2016 11:05:18 +0100,
Jiri Slaby wrote:
>
> Both vhci_get_user and vhci_release race with open_timeout work. They
> both contain cancel_delayed_work_sync, but do not test whether the
> work actually created hdev or not. Since the work can be in progress
> and _sync will wait for finishing it, we can have data->hdev allocated
> when cancel_delayed_work_sync returns. But the call sites do 'if
> (data->hdev)' *before* cancel_delayed_work_sync.
>
> As a result:
> * vhci_get_user allocates a second hdev and puts it into
> data->hdev. The former is leaked.
> * vhci_release does not release data->hdev properly as it thinks there
> is none.
>
> Fix both cases by moving the actual test *after* the call to
> cancel_delayed_work_sync.
>
> This can be hit by this program:
> #include <err.h>
> #include <fcntl.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <time.h>
> #include <unistd.h>
>
> #include <sys/stat.h>
> #include <sys/types.h>
>
> int main(int argc, char **argv)
> {
> int fd;
>
> srand(time(NULL));
>
> while (1) {
> const int delta = (rand() % 200 - 100) * 100;
>
> fd = open("/dev/vhci", O_RDWR);
> if (fd < 0)
> err(1, "open");
>
> usleep(1000000 + delta);
>
> close(fd);
> }
>
> return 0;
> }
>
> And the result is:
> BUG: KASAN: use-after-free in skb_queue_tail+0x13e/0x150 at addr ffff88006b0c1228
> Read of size 8 by task kworker/u13:1/32068
> =============================================================================
> BUG kmalloc-192 (Tainted: G E ): kasan: bad access detected
> -----------------------------------------------------------------------------
>
> Disabling lock debugging due to kernel taint
> INFO: Allocated in vhci_open+0x50/0x330 [hci_vhci] age=260 cpu=3 pid=32040
> ...
> kmem_cache_alloc_trace+0x150/0x190
> vhci_open+0x50/0x330 [hci_vhci]
> misc_open+0x35b/0x4e0
> chrdev_open+0x23b/0x510
> ...
> INFO: Freed in vhci_release+0xa4/0xd0 [hci_vhci] age=9 cpu=2 pid=32040
> ...
> __slab_free+0x204/0x310
> vhci_release+0xa4/0xd0 [hci_vhci]
> ...
> INFO: Slab 0xffffea0001ac3000 objects=16 used=13 fp=0xffff88006b0c1e00 flags=0x5fffff80004080
> INFO: Object 0xffff88006b0c1200 @offset=4608 fp=0xffff88006b0c0600
> Bytes b4 ffff88006b0c11f0: 09 df 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................
> Object ffff88006b0c1200: 00 06 0c 6b 00 88 ff ff 00 00 00 00 00 00 00 00 ...k............
> Object ffff88006b0c1210: 10 12 0c 6b 00 88 ff ff 10 12 0c 6b 00 88 ff ff ...k.......k....
> Object ffff88006b0c1220: c0 46 c2 6b 00 88 ff ff c0 46 c2 6b 00 88 ff ff .F.k.....F.k....
> Object ffff88006b0c1230: 01 00 00 00 01 00 00 00 e0 ff ff ff 0f 00 00 00 ................
> Object ffff88006b0c1240: 40 12 0c 6b 00 88 ff ff 40 12 0c 6b 00 88 ff ff @..k....@..k....
> Object ffff88006b0c1250: 50 0d 6e a0 ff ff ff ff 00 02 00 00 00 00 ad de P.n.............
> Object ffff88006b0c1260: 00 00 00 00 00 00 00 00 ab 62 02 00 01 00 00 00 .........b......
> Object ffff88006b0c1270: 90 b9 19 81 ff ff ff ff 38 12 0c 6b 00 88 ff ff ........8..k....
> Object ffff88006b0c1280: 03 00 20 00 ff ff ff ff ff ff ff ff 00 00 00 00 .. .............
> Object ffff88006b0c1290: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
> Object ffff88006b0c12a0: 00 00 00 00 00 00 00 00 00 80 cd 3d 00 88 ff ff ...........=....
> Object ffff88006b0c12b0: 00 20 00 00 00 00 00 00 00 00 00 00 00 00 00 00 . ..............
> Redzone ffff88006b0c12c0: bb bb bb bb bb bb bb bb ........
> Padding ffff88006b0c13f8: 00 00 00 00 00 00 00 00 ........
> CPU: 3 PID: 32068 Comm: kworker/u13:1 Tainted: G B E 4.4.6-0-default #1
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014
> Workqueue: hci0 hci_cmd_work [bluetooth]
> 00000000ffffffff ffffffff81926cfa ffff88006be37c68 ffff88006bc27180
> ffff88006b0c1200 ffff88006b0c1234 ffffffff81577993 ffffffff82489320
> ffff88006bc24240 0000000000000046 ffff88006a100000 000000026e51eb80
> Call Trace:
> ...
> [<ffffffff81ec8ebe>] ? skb_queue_tail+0x13e/0x150
> [<ffffffffa06e027c>] ? vhci_send_frame+0xac/0x100 [hci_vhci]
> [<ffffffffa0c61268>] ? hci_send_frame+0x188/0x320 [bluetooth]
> [<ffffffffa0c61515>] ? hci_cmd_work+0x115/0x310 [bluetooth]
> [<ffffffff811a1375>] ? process_one_work+0x815/0x1340
> [<ffffffff811a1f85>] ? worker_thread+0xe5/0x11f0
> [<ffffffff811a1ea0>] ? process_one_work+0x1340/0x1340
> [<ffffffff811b3c68>] ? kthread+0x1c8/0x230
> ...
> Memory state around the buggy address:
> ffff88006b0c1100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> ffff88006b0c1180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> >ffff88006b0c1200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ^
> ffff88006b0c1280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
> ffff88006b0c1300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc

I think this isn't enough. There is a call of vhci_create_device()
via write with HCI_VENDOR_PKT. This is also an open race.

For example, adding a delay like:

================================================================
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -113,6 +113,7 @@ static int vhci_create_device(struct vhci_data *data, __u8 opcode)
return -ENOMEM;
}

+ ssleep(1); /* XXX */
data->hdev = hdev;

hdev->bus = HCI_VIRTUAL;

================================================================

... and running a code like below can blow up things easily.

================================================================
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <stdio.h>
#include <err.h>

static void *test(void *arg)
{
int fd = (long)arg;
static unsigned char data[] = { 0xff, 0x00 };
int err;

err = write(fd, data, sizeof(data));
printf("write %d\n", err);
return NULL;
}

int main()
{
int i;
long fd;
pthread_t th[32];

fd = open("/dev/vhci", O_RDWR);
if (fd < 0)
err(1, "open");
for (i = 0; i < 32; i++)
pthread_create(&th[i], 0, test, (void*)fd);
for (i = 0; i < 32; i++)
pthread_join(th[i], NULL);
return 0;
}
================================================================

For fixing these, we need a proper mutex protection. A patch like
below seems helping, at least for the test case above.

Note that it includes some parts of Jiri's fixes. The readq leak fix
isn't included.


thanks,

Takashi

-- 8< --
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -50,6 +50,7 @@ struct vhci_data {
wait_queue_head_t read_wait;
struct sk_buff_head readq;

+ struct mutex open_mutex;
struct delayed_work open_timeout;
};

@@ -87,7 +88,7 @@ static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
return 0;
}

-static int vhci_create_device(struct vhci_data *data, __u8 opcode)
+static int __vhci_create_device(struct vhci_data *data, __u8 opcode)
{
struct hci_dev *hdev;
struct sk_buff *skb;
@@ -151,6 +152,19 @@ static int vhci_create_device(struct vhci_data *data, __u8 opcode)
return 0;
}

+static int vhci_create_device(struct vhci_data *data, __u8 opcode)
+{
+ int err;
+
+ mutex_lock(&data->open_mutex);
+ if (data->hdev)
+ err = -EBADFD;
+ else
+ err = __vhci_create_device(data, opcode);
+ mutex_unlock(&data->open_mutex);
+ return err;
+}
+
static inline ssize_t vhci_get_user(struct vhci_data *data,
struct iov_iter *from)
{
@@ -189,11 +203,6 @@ static inline ssize_t vhci_get_user(struct vhci_data *data,
break;

case HCI_VENDOR_PKT:
- if (data->hdev) {
- kfree_skb(skb);
- return -EBADFD;
- }
-
cancel_delayed_work_sync(&data->open_timeout);

opcode = *((__u8 *) skb->data);
@@ -320,6 +329,7 @@ static int vhci_open(struct inode *inode, struct file *file)
skb_queue_head_init(&data->readq);
init_waitqueue_head(&data->read_wait);

+ mutex_init(&data->open_mutex);
INIT_DELAYED_WORK(&data->open_timeout, vhci_open_timeout);

file->private_data = data;
@@ -333,10 +343,11 @@ static int vhci_open(struct inode *inode, struct file *file)
static int vhci_release(struct inode *inode, struct file *file)
{
struct vhci_data *data = file->private_data;
- struct hci_dev *hdev = data->hdev;
+ struct hci_dev *hdev;

cancel_delayed_work_sync(&data->open_timeout);

+ hdev = data->hdev;
if (hdev) {
hci_unregister_dev(hdev);
hci_free_dev(hdev);