Re: PATCH: Fix nbd on 2.1.118 SMP kernels

Stephen C. Tweedie (sct@redhat.com)
Fri, 28 Aug 1998 11:21:25 +0100


Hi again,

On Thu, 27 Aug 1998 11:40:43 -0700 (PDT), Linus Torvalds
<torvalds@transmeta.com> said:

> It's otherwise ok, but this part is certainly buggy:

> If you drop the io-request lock, and then not expect CURRENT to change,
> you're just fooling yourself. That's like saying "oh, btw, I now let
> people change the CURRENT queue, but by God if anybody actually does so
> I'll panic". That's just too stupid for me to accept as a patch.

Whoops --- in 2.1.119, I see you've "fixed" this. Umm, by doing so,
you've completely broken the driver! Not only could we safely expect
CURRENT not to change in this particular instance, we relied on it as a
mutex to prevent concurrent requests from different processes. Removing
CURRENT before doing the send means that 2.1.119 suffers badly from
request reordering and interleaving. It survives a load test for only a
few seconds.

The patch below fixes one typo introduced in the 119 patch (it doesn't
even compile: ack), fixes an older typo affecting readonly processing,
and adds a semaphore to the send queue. The send queue should now be
atomic both with respect to other senders and to the receive process; we
remove CURRENT before blocking, but don't touch the network or put the
request on the receive list until we have the semaphore.

This one does compile and has survived a basic load test.

--Stephen

----------------------------------------------------------------
--- drivers/block/nbd.c~ Fri Aug 28 10:17:48 1998
+++ drivers/block/nbd.c Fri Aug 28 11:00:59 1998
@@ -61,13 +61,20 @@
static int nbd_open(struct inode *inode, struct file *file)
{
int dev;
+ struct nbd_device *nbdev;

if (!inode)
return -EINVAL;
dev = MINOR(inode->i_rdev);
if (dev >= MAX_NBD)
return -ENODEV;
+
+ nbdev = &nbd_dev[dev];
nbd_dev[dev].refcnt++;
+ if (!(nbdev->flags & NBD_INITIALISED)) {
+ nbdev->queue_lock = MUTEX;
+ nbdev->flags |= NBD_INITIALISED;
+ }
MOD_INC_USE_COUNT;
return 0;
}
@@ -82,6 +89,7 @@
struct msghdr msg;
struct iovec iov;
unsigned long flags;
+ sigset_t oldset;

oldfs = get_fs();
set_fs(get_ds());
@@ -94,8 +102,6 @@


do {
- sigset_t oldset;
-
iov.iov_base = buf;
iov.iov_len = size;
msg.msg_name = NULL;
@@ -209,16 +215,18 @@
req = nbd_read_stat(lo);
if (!req)
return;
+ down (&lo->queue_lock);
#ifdef PARANOIA
if (req != lo->tail) {
printk(KERN_ALERT "NBD: I have problem...\n");
}
if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
printk(KERN_ALERT "NBD: request corrupted!\n");
- continue;
+ goto next;
}
if (lo->magic != LO_MAGIC) {
printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n");
+ up (&lo->queue_lock);
return;
}
#endif
@@ -231,6 +239,8 @@
lo->head = NULL;
}
lo->tail = lo->tail->next;
+ next:
+ up (&lo->queue_lock);
}
}

@@ -291,7 +301,7 @@
lo = &nbd_dev[dev];
if (!lo->file)
FAIL("Request when not-ready.");
- if ((req->cmd == WRITE) && (lo->flags && NBD_READ_ONLY))
+ if ((req->cmd == WRITE) && (lo->flags & NBD_READ_ONLY))
FAIL("Write on read-only");
#ifdef PARANOIA
if (lo->magic != LO_MAGIC)
@@ -301,6 +311,9 @@
req->errors = 0;
CURRENT = CURRENT->next;
req->next = NULL;
+
+ spin_unlock_irq(&io_request_lock);
+ down (&lo->queue_lock);
if (lo->head == NULL) {
lo->head = req;
lo->tail = req;
@@ -309,8 +322,8 @@
lo->head = req;
}

- spin_unlock_irq(&io_request_lock);
nbd_send_req(lo->sock, req); /* Why does this block? */
+ up (&lo->queue_lock);
spin_lock_irq(&io_request_lock);
continue;

--- include/linux/nbd.h~ Fri Aug 28 10:20:40 1998
+++ include/linux/nbd.h Fri Aug 28 10:44:51 1998
@@ -12,6 +12,7 @@
#ifdef MAJOR_NR

#include <linux/locks.h>
+#include <asm/semaphore.h>

#define LOCAL_END_REQUEST

@@ -42,11 +43,13 @@
int harderror; /* Code of hard error */
#define NBD_READ_ONLY 0x0001
#define NBD_WRITE_NOCHK 0x0002
+#define NBD_INITIALISED 0x0004
struct socket * sock;
struct file * file; /* If == NULL, device is not ready, yet */
int magic; /* FIXME: not if debugging is off */
struct request *head; /* Requests are added here... */
struct request *tail;
+ struct semaphore queue_lock;
};

/* This now IS in some kind of include file... */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.altern.org/andrebalsa/doc/lkml-faq.html