[v2.6.34-stable 130/165] md/raid5: abort any pending parity operations when array fails.

From: Paul Gortmaker
Date: Wed Aug 15 2012 - 15:52:17 EST


From: NeilBrown <neilb@xxxxxxx>

-------------------
This is a commit scheduled for the next v2.6.34 longterm release.
http://git.kernel.org/?p=linux/kernel/git/paulg/longterm-queue-2.6.34.git
If you see a problem with using this for longterm, please comment.
-------------------

commit 9a3f530f39f4490eaa18b02719fb74ce5f4d2d86 upstream.

When the number of failed devices exceeds the allowed number
we must abort any active parity operations (checks or updates) as they
are no longer meaningful, and can lead to a BUG_ON in
handle_parity_checks6.

This bug was introduce by commit 6c0069c0ae9659e3a91b68eaed06a5c6c37f45c8
in 2.6.29.

Reported-by: Manish Katiyar <mkatiyar@xxxxxxxxx>
Tested-by: Manish Katiyar <mkatiyar@xxxxxxxxx>
Acked-by: Dan Williams <dan.j.williams@xxxxxxxxx>
Signed-off-by: NeilBrown <neilb@xxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxx>
[PG: use 2.6.32.49 backport since raid5.c @ 9a3f530f/v3.2 differs more]
Signed-off-by: Paul Gortmaker <paul.gortmaker@xxxxxxxxxxxxx>
---
drivers/md/raid5.c | 32 ++++++++++++++++++++------------
1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index dc3e4fc..2936c3b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3040,12 +3040,16 @@ static void handle_stripe5(struct stripe_head *sh)
/* check if the array has lost two devices and, if so, some requests might
* need to be failed
*/
- if (s.failed > 1 && s.to_read+s.to_write+s.written)
- handle_failed_stripe(conf, sh, &s, disks, &return_bi);
- if (s.failed > 1 && s.syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,0);
- clear_bit(STRIPE_SYNCING, &sh->state);
- s.syncing = 0;
+ if (s.failed > 1) {
+ sh->check_state = 0;
+ sh->reconstruct_state = 0;
+ if (s.to_read+s.to_write+s.written)
+ handle_failed_stripe(conf, sh, &s, disks, &return_bi);
+ if (s.syncing) {
+ md_done_sync(conf->mddev, STRIPE_SECTORS,0);
+ clear_bit(STRIPE_SYNCING, &sh->state);
+ s.syncing = 0;
+ }
}

/* might be able to return some write requests if the parity block
@@ -3323,12 +3327,16 @@ static void handle_stripe6(struct stripe_head *sh)
/* check if the array has lost >2 devices and, if so, some requests
* might need to be failed
*/
- if (s.failed > 2 && s.to_read+s.to_write+s.written)
- handle_failed_stripe(conf, sh, &s, disks, &return_bi);
- if (s.failed > 2 && s.syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS,0);
- clear_bit(STRIPE_SYNCING, &sh->state);
- s.syncing = 0;
+ if (s.failed > 2) {
+ sh->check_state = 0;
+ sh->reconstruct_state = 0;
+ if (s.to_read+s.to_write+s.written)
+ handle_failed_stripe(conf, sh, &s, disks, &return_bi);
+ if (s.syncing) {
+ md_done_sync(conf->mddev, STRIPE_SECTORS,0);
+ clear_bit(STRIPE_SYNCING, &sh->state);
+ s.syncing = 0;
+ }
}

/*
--
1.7.12.rc2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/