Re: Re: i686 hang on boot in userspace

From: gmu 2k6
Date: Tue Jul 25 2006 - 10:26:05 EST


On 7/25/06, Jens Axboe <axboe@xxxxxxx> wrote:
On Tue, Jul 25 2006, Jens Axboe wrote:
> On Tue, Jul 25 2006, gmu 2k6 wrote:
> > On 7/25/06, Jens Axboe <axboe@xxxxxxx> wrote:
> > >On Tue, Jul 25 2006, gmu 2k6 wrote:
> > >> ok, let's nail it to 2.6.17-git5 instead as it survived git status
> > >> compared to -git6
> > >> which seems to have correctly booted by accident the lastime. timing
> > >issues
> > >> I guess.
> > >
> > >I will try and reproduce it here now. It seems to be in between commit
> > >271f18f102c789f59644bb6c53a69da1df72b2f4 and commit
> > >dd67d051529387f6e44d22d1d5540ef281965fdd where the first one could also
> > >be bad.
> > >
> > >I'm assuming that acf421755593f7d7bd9352d57eda796c6eb4fa43 should be
> > >good, so you can try and verify that
> > >dd67d051529387f6e44d22d1d5540ef281965fdd is bad and bisect between the
> > >two. It's only about 6 commits, so should be quick enough to do.
> >
> > 1) no luck with remote serial console
> > 2) netconsole does not work although connecting to the listener with netcat
> > and
> > sending strings works
> > I'm gonna try via physical rs232 9pins and see how that works.
> > afterwards I will try to bisect the revisions you mentioned.
> >
> > btw, the issue seems to come and go as I managed to boot log into a .17-git6
> > kernel or is timing-dependent.
>
> I can reproduce it, you don't have to spend more time on bisecting or
> testing. This should fix it:
>
> diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> index 1c4df22..1eac041 100644
> --- a/drivers/block/cciss.c
> +++ b/drivers/block/cciss.c
> @@ -1238,6 +1238,7 @@ static void cciss_softirq_done(struct re
> CommandList_struct *cmd = rq->completion_data;
> ctlr_info_t *h = hba[cmd->ctlr];
> unsigned long flags;
> + request_queue_t *q;
> u64bit temp64;
> int i, ddir;
>
> @@ -1260,10 +1261,13 @@ #ifdef CCISS_DEBUG
> printk("Done with %p\n", rq);
> #endif /* CCISS_DEBUG */
>
> + q = rq->q;
> +
> add_disk_randomness(rq->rq_disk);
> spin_lock_irqsave(&h->lock, flags);
> end_that_request_last(rq, rq->errors);
> cmd_free(h, cmd, 1);
> + blk_start_queue(q);
> spin_unlock_irqrestore(&h->lock, flags);
> }
>
>
> A better fix would rework the start_queue logic entirely in the driver,
> but the above should get you running for now. I'll take a further look.

Something like this matches the current logic better. It's not very good
from a cpu efficiency point of view, but it's better than what is there
now since at least it's not in hard irq context.

Not tested yet, will do so right now.

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 1c4df22..a9e0510 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1233,6 +1233,50 @@ static inline void complete_buffers(stru
}
}

+static void cciss_check_queues(ctlr_info_t *h)
+{
+ int start_queue = h->next_to_run;
+ int i;
+
+ /* check to see if we have maxed out the number of commands that can
+ * be placed on the queue. If so then exit. We do this check here
+ * in case the interrupt we serviced was from an ioctl and did not
+ * free any new commands.
+ */
+ if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS)
+ return;
+
+ /* We have room on the queue for more commands. Now we need to queue
+ * them up. We will also keep track of the next queue to run so
+ * that every queue gets a chance to be started first.
+ */
+ for (i = 0; i < h->highest_lun + 1; i++) {
+ int curr_queue = (start_queue + i) % (h->highest_lun + 1);
+ /* make sure the disk has been added and the drive is real
+ * because this can be called from the middle of init_one.
+ */
+ if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
+ continue;
+ blk_start_queue(h->gendisk[curr_queue]->queue);
+
+ /* check to see if we have maxed out the number of commands
+ * that can be placed on the queue.
+ */
+ if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS) {
+ if (curr_queue == start_queue) {
+ h->next_to_run =
+ (start_queue + 1) % (h->highest_lun + 1);
+ break;
+ } else {
+ h->next_to_run = curr_queue;
+ break;
+ }
+ } else {
+ curr_queue = (curr_queue + 1) % (h->highest_lun + 1);
+ }
+ }
+}
+
static void cciss_softirq_done(struct request *rq)
{
CommandList_struct *cmd = rq->completion_data;
@@ -1264,6 +1308,7 @@ #endif /* CCISS_DEBUG */
spin_lock_irqsave(&h->lock, flags);
end_that_request_last(rq, rq->errors);
cmd_free(h, cmd, 1);
+ cciss_check_queues(h);
spin_unlock_irqrestore(&h->lock, flags);
}

@@ -2528,8 +2573,6 @@ static irqreturn_t do_cciss_intr(int irq
CommandList_struct *c;
unsigned long flags;
__u32 a, a1, a2;
- int j;
- int start_queue = h->next_to_run;

if (interrupt_not_for_us(h))
return IRQ_NONE;
@@ -2588,45 +2631,6 @@ # endif
}
}

- /* check to see if we have maxed out the number of commands that can
- * be placed on the queue. If so then exit. We do this check here
- * in case the interrupt we serviced was from an ioctl and did not
- * free any new commands.
- */
- if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS)
- goto cleanup;
-
- /* We have room on the queue for more commands. Now we need to queue
- * them up. We will also keep track of the next queue to run so
- * that every queue gets a chance to be started first.
- */
- for (j = 0; j < h->highest_lun + 1; j++) {
- int curr_queue = (start_queue + j) % (h->highest_lun + 1);
- /* make sure the disk has been added and the drive is real
- * because this can be called from the middle of init_one.
- */
- if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
- continue;
- blk_start_queue(h->gendisk[curr_queue]->queue);
-
- /* check to see if we have maxed out the number of commands
- * that can be placed on the queue.
- */
- if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS) {
- if (curr_queue == start_queue) {
- h->next_to_run =
- (start_queue + 1) % (h->highest_lun + 1);
- goto cleanup;
- } else {
- h->next_to_run = curr_queue;
- goto cleanup;
- }
- } else {
- curr_queue = (curr_queue + 1) % (h->highest_lun + 1);
- }
- }
-
- cleanup:
spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
return IRQ_HANDLED;
}

this makes the cciss init hang.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/