Re: WARNING in 2.6.25-07422-gb66e1f1

From: Dan Williams
Date: Mon May 12 2008 - 13:47:00 EST



On Thu, 2008-05-08 at 22:38 -0700, Neil Brown wrote:
> On Friday May 9, neilb@xxxxxxx wrote:
> > On Thursday May 8, dan.j.williams@xxxxxxxxx wrote:
> > > @@ -133,8 +137,10 @@ static linear_conf_t *linear_conf(mddev_t
> *mddev, int raid_disks)
> > >
> > > disk->rdev = rdev;
> > >
> > > + spin_lock(&conf->device_lock);
> > > blk_queue_stack_limits(mddev->queue,
> > > rdev->bdev->bd_disk->queue);
> > > + spin_unlock(&conf->device_lock);
> > > /* as we don't honour merge_bvec_fn, we must never
> risk
> > > * violating it, so limit ->max_sector to one PAGE, as
> > > * a one page request is never in violation.
> >
> > This shouldn't be necessary.
> > There is no actual race here -- mddev->queue->queue_flags is not
> going to be
> > accessed by anyone else until do_md_run does
> > mddev->queue->make_request_fn = mddev->pers->make_request;
> > which is much later.
> > So we only need to be sure that "queue_is_locked" doesn't complain.
> > And as q->queue_lock is still NULL at this point, it won't complain.
>
> Sorry, I got that backwards. It will complain, won't it. :-)
>
> I gotta say that I think it shouldn't. Introducing a spinlock in
> linear.c, raid0.c, multipath.c just to silence a "WARN_ON" seems like
> the wrong thing to do. Of course we could just use q->__queue_lock so
> we don't have to add a new lock, but we still have to take the lock
> unnecessarily.
>
> Unfortunately I cannot find a nice solution that both avoids clutter
> in md code and also protects against carelessly changing flags without
> a proper lock.....
>
> Maybe....
> We could get blk_queue_stack_limits to lock the queue, and always
> spin_lock_init __queue_lock. Then the only change needed in linear.c
> et al would be to set ->queue_lock to &->__queue_lock.
>
> Jens: What do you think of this??
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index b754a4a..2d31dc2 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -479,6 +479,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t
> gfp_mask, int node_id)
> kobject_init(&q->kobj, &blk_queue_ktype);
>
> mutex_init(&q->sysfs_lock);
> + spin_lock_init(&q->__queue_lock);
>
> return q;
> }
> @@ -541,10 +542,8 @@ blk_init_queue_node(request_fn_proc *rfn,
> spinlock_t *lock, int node_id)
> * if caller didn't supply a lock, they get per-queue locking
> with
> * our embedded lock
> */
> - if (!lock) {
> - spin_lock_init(&q->__queue_lock);
> + if (!lock)
> lock = &q->__queue_lock;
> - }
>
> q->request_fn = rfn;
> q->prep_rq_fn = NULL;
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index bb93d4c..488199a 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -286,8 +286,14 @@ void blk_queue_stack_limits(struct request_queue
> *t, struct request_queue *b)
> t->max_hw_segments = min(t->max_hw_segments,
> b->max_hw_segments);
> t->max_segment_size = min(t->max_segment_size,
> b->max_segment_size);
> t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
> - if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
> + if (!t->queue_lock)
> + WARN_ON_ONCE(1);
> + else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
> + unsigned long flags;
> + spin_lock_irqsave(&t->queue_lock, flags);
> queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
> + spin_unlock_irqrestore(&t->queue_lock, flags);
> + }
> }
> EXPORT_SYMBOL(blk_queue_stack_limits);
>
> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
> index 0b85117..552f81b 100644
> --- a/drivers/md/linear.c
> +++ b/drivers/md/linear.c
> @@ -250,6 +250,7 @@ static int linear_run (mddev_t *mddev)
> {
> linear_conf_t *conf;
>
> + mddev->queue_lock = &mddev->__queue_lock;
> conf = linear_conf(mddev, mddev->raid_disks);
>
> if (!conf)
> diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
> index 42ee1a2..90f85e4 100644
> --- a/drivers/md/multipath.c
> +++ b/drivers/md/multipath.c
> @@ -417,6 +417,7 @@ static int multipath_run (mddev_t *mddev)
> * bookkeeping area. [whatever we allocate in multipath_run(),
> * should be freed in multipath_stop()]
> */
> + mddev->queue_lock = &mddev->__queue_lock;
>
> conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
> mddev->private = conf;
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index 818b482..a179c8f 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -280,6 +280,7 @@ static int raid0_run (mddev_t *mddev)
> (mddev->chunk_size>>1)-1);
> blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9);
> blk_queue_segment_boundary(mddev->queue,
> (mddev->chunk_size>>1) - 1);
> + mddev->queue_lock = &mddev->__queue_lock;
>
> conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL);
> if (!conf)
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 6778b7c..ac409b7 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1935,6 +1935,9 @@ static int run(mddev_t *mddev)
> if (!conf->r1bio_pool)
> goto out_no_mem;
>
> + spin_lock_init(&conf->device_lock);
> + mddev->queue->queue_lock = &conf->device_lock;
> +
> rdev_for_each(rdev, tmp, mddev) {
> disk_idx = rdev->raid_disk;
> if (disk_idx >= mddev->raid_disks
> @@ -1958,7 +1961,6 @@ static int run(mddev_t *mddev)
> }
> conf->raid_disks = mddev->raid_disks;
> conf->mddev = mddev;
> - spin_lock_init(&conf->device_lock);
> INIT_LIST_HEAD(&conf->retry_list);
>
> spin_lock_init(&conf->resync_lock);
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 5938fa9..740f670 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -2082,6 +2082,9 @@ static int run(mddev_t *mddev)
> goto out_free_conf;
> }
>
> + spin_lock_init(&conf->device_lock);
> + mddev->queue->queue_lock = &mddev->queue->__queue_lock;
> +
> rdev_for_each(rdev, tmp, mddev) {
> disk_idx = rdev->raid_disk;
> if (disk_idx >= mddev->raid_disks
> @@ -2103,7 +2106,6 @@ static int run(mddev_t *mddev)
>
> disk->head_position = 0;
> }
> - spin_lock_init(&conf->device_lock);
> INIT_LIST_HEAD(&conf->retry_list);
>
> spin_lock_init(&conf->resync_lock);
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 087eee0..4fafc79 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -4256,6 +4256,7 @@ static int run(mddev_t *mddev)
> goto abort;
> }
> spin_lock_init(&conf->device_lock);
> + mddev->queue->queue_lock = &conf->device_lock;
> init_waitqueue_head(&conf->wait_for_stripe);
> init_waitqueue_head(&conf->wait_for_overlap);
> INIT_LIST_HEAD(&conf->handle_list);
>

Yes, this is simpler than what I had... spotted some fixups.

--
Dan

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 488199a..8dd8641 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -290,9 +290,9 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
WARN_ON_ONCE(1);
else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
unsigned long flags;
- spin_lock_irqsave(&t->queue_lock, flags);
+ spin_lock_irqsave(t->queue_lock, flags);
queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
- spin_unlock_irqrestore(&t->queue_lock, flags);
+ spin_unlock_irqrestore(t->queue_lock, flags);
}
}
EXPORT_SYMBOL(blk_queue_stack_limits);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 552f81b..1074824 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -250,7 +250,7 @@ static int linear_run (mddev_t *mddev)
{
linear_conf_t *conf;

- mddev->queue_lock = &mddev->__queue_lock;
+ mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = linear_conf(mddev, mddev->raid_disks);

if (!conf)
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 90f85e4..4f4d1f3 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -417,7 +417,7 @@ static int multipath_run (mddev_t *mddev)
* bookkeeping area. [whatever we allocate in multipath_run(),
* should be freed in multipath_stop()]
*/
- mddev->queue_lock = &mddev->__queue_lock;
+ mddev->queue->queue_lock = &mddev->queue->__queue_lock;

conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
mddev->private = conf;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index a179c8f..914c04d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -280,7 +280,7 @@ static int raid0_run (mddev_t *mddev)
(mddev->chunk_size>>1)-1);
blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9);
blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1);
- mddev->queue_lock = &mddev->__queue_lock;
+ mddev->queue->queue_lock = &mddev->queue->__queue_lock;

conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL);
if (!conf)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f46d448..8536ede 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2083,7 +2083,7 @@ static int run(mddev_t *mddev)
}

spin_lock_init(&conf->device_lock);
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
+ mddev->queue->queue_lock = &conf->device_lock;

rdev_for_each(rdev, tmp, mddev) {
disk_idx = rdev->raid_disk;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/