Re: [patch] Real-Time Preemption, -RT-2.6.9-rc4-mm1-U7

From: Thomas Gleixner
Date: Wed Oct 20 2004 - 00:32:47 EST


On Wed, 2004-10-20 at 01:39, Thomas Gleixner wrote:
> That's in scsi_error_handler() where a mutex is initialized locked and
> then acquired again. This triggers the deadlock/correctness check.

Some more fixes

- scsi_error_handler() fix

- device subsystem device_remove locking fix

tglx


diff --exclude='*~' -urN 2.6.9-rc4-mm1-RT-U7/drivers/base/bus.c 2.6.9-rc4-mm1-RT-U7-work/drivers/base/bus.c
--- 2.6.9-rc4-mm1-RT-U7/drivers/base/bus.c 2004-10-12 09:41:05.000000000 +0200
+++ 2.6.9-rc4-mm1-RT-U7-work/drivers/base/bus.c 2004-10-20 03:04:29.000000000 +0200
@@ -65,7 +65,7 @@
static void driver_release(struct kobject * kobj)
{
struct device_driver * drv = to_driver(kobj);
- up(&drv->unload_sem);
+ complete(&drv->unload_done);
}

static struct kobj_type ktype_driver = {
diff --exclude='*~' -urN 2.6.9-rc4-mm1-RT-U7/include/linux/device.h 2.6.9-rc4-mm1-RT-U7-work/include/linux/device.h
--- 2.6.9-rc4-mm1-RT-U7/include/linux/device.h 2004-10-12 09:41:58.000000000 +0200
+++ 2.6.9-rc4-mm1-RT-U7-work/include/linux/device.h 2004-10-20 03:05:02.000000000 +0200
@@ -102,7 +102,7 @@
char * name;
struct bus_type * bus;

- struct semaphore unload_sem;
+ struct completion unload_done;
struct kobject kobj;
struct list_head devices;

diff --exclude='*~' -urN 2.6.9-rc4-mm1-RT-U7/drivers/base/driver.c 2.6.9-rc4-mm1-RT-U7-work/drivers/base/driver.c
--- 2.6.9-rc4-mm1-RT-U7/drivers/base/driver.c 2004-10-12 09:41:05.000000000 +0200
+++ 2.6.9-rc4-mm1-RT-U7-work/drivers/base/driver.c 2004-10-20 03:11:20.000000000 +0200
@@ -79,14 +79,13 @@
* since most of the things we have to do deal with the bus
* structures.
*
- * The one interesting aspect is that we initialize @drv->unload_sem
- * to a locked state here. It will be unlocked when the driver
- * reference count reaches 0.
+ * We init the completion strcut here. When the reference
+ * count reaches zero, complete() is called from bus_release().
*/
int driver_register(struct device_driver * drv)
{
INIT_LIST_HEAD(&drv->devices);
- init_MUTEX_LOCKED(&drv->unload_sem);
+ init_completion(&drv->unload_done);
return bus_add_driver(drv);
}

@@ -97,18 +96,16 @@
*
* Again, we pass off most of the work to the bus-level call.
*
- * Though, once that is done, we attempt to take @drv->unload_sem.
- * This will block until the driver refcount reaches 0, and it is
- * released. Only modular drivers will call this function, and we
+ * Though, once that is done, we wait until the driver refcount
+ * reaches 0, and complete() is called in bus_release().
+ * Only modular drivers will call this function, and we
* have to guarantee that it won't complete, letting the driver
* unload until all references are gone.
*/
-
void driver_unregister(struct device_driver * drv)
{
bus_remove_driver(drv);
- down(&drv->unload_sem);
- up(&drv->unload_sem);
+ wait_for_completion(&drv->unload_done);
}

/**
diff --exclude='*~' -urN 2.6.9-rc4-mm1-RT-U7/drivers/scsi/hosts.c 2.6.9-rc4-mm1-RT-U7-work/drivers/scsi/hosts.c
--- 2.6.9-rc4-mm1-RT-U7/drivers/scsi/hosts.c 2004-10-12 09:41:34.000000000 +0200
+++ 2.6.9-rc4-mm1-RT-U7-work/drivers/scsi/hosts.c 2004-10-20 03:32:10.000000000 +0200
@@ -149,7 +149,7 @@
DECLARE_COMPLETION(sem);
shost->eh_notify = &sem;
shost->eh_kill = 1;
- up(shost->eh_wait);
+ wake_up(shost->eh_wait);
wait_for_completion(&sem);
shost->eh_notify = NULL;
}
diff --exclude='*~' -urN 2.6.9-rc4-mm1-RT-U7/drivers/scsi/scsi_error.c 2.6.9-rc4-mm1-RT-U7-work/drivers/scsi/scsi_error.c
--- 2.6.9-rc4-mm1-RT-U7/drivers/scsi/scsi_error.c 2004-10-19 20:29:46.000000000 +0200
+++ 2.6.9-rc4-mm1-RT-U7-work/drivers/scsi/scsi_error.c 2004-10-20 05:31:20.000000000 +0200
@@ -49,7 +49,7 @@
void scsi_eh_wakeup(struct Scsi_Host *shost)
{
if (shost->host_busy == shost->host_failed) {
- up(shost->eh_wait);
+ wake_up(shost->eh_wait);
SCSI_LOG_ERROR_RECOVERY(5,
printk("Waking error handler thread\n"));
}
@@ -1599,9 +1599,8 @@
{
struct Scsi_Host *shost = (struct Scsi_Host *) data;
int rtn;
- DECLARE_MUTEX(sem);
+ DECLARE_WAIT_QUEUE_HEAD(eh_wait);

- init_MUTEX_LOCKED(&sem);
/*
* Flush resources
*/
@@ -1610,7 +1609,7 @@

current->flags |= PF_NOFREEZE;

- shost->eh_wait = &sem;
+ shost->eh_wait = &eh_wait;
shost->ehandler = current;

/*
@@ -1632,15 +1631,12 @@
" sleeping\n",shost->host_no));

/*
- * Note - we always use down_interruptible with the semaphore
- * even if the module was loaded as part of the kernel. The
- * reason is that down() will cause this thread to be counted
- * in the load average as a running process, and down
- * interruptible doesn't. Given that we need to allow this
- * thread to die if the driver was loaded as a module, using
- * semaphores isn't unreasonable.
+ * Wait, until somebody decides to wake us due to an error
+ * or because we should be killed
*/
- down_interruptible(&sem);
+ wait_event_interruptible(eh_wait, shost->eh_kill ||
+ (shost->host_busy == shost->host_failed));
+
if (shost->eh_kill)
break;

diff --exclude='*~' -urN 2.6.9-rc4-mm1-RT-U7/include/scsi/scsi_host.h 2.6.9-rc4-mm1-RT-U7-work/include/scsi/scsi_host.h
--- 2.6.9-rc4-mm1-RT-U7/include/scsi/scsi_host.h 2004-10-12 09:42:00.000000000 +0200
+++ 2.6.9-rc4-mm1-RT-U7-work/include/scsi/scsi_host.h 2004-10-20 03:42:05.000000000 +0200
@@ -396,7 +396,7 @@

struct list_head eh_cmd_q;
struct task_struct * ehandler; /* Error recovery thread. */
- struct semaphore * eh_wait; /* The error recovery thread waits
+ wait_queue_head_t * eh_wait; /* The error recovery thread waits
on this. */
struct completion * eh_notify; /* wait for eh to begin or end */
struct semaphore * eh_action; /* Wait for specific actions on the